In [None]:
!pip install fuzzy-c-means

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from tabulate import tabulate
from sklearn.cluster import KMeans, AffinityPropagation
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, plot_confusion_matrix, silhouette_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from fcmeans import FCM

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def get_dataset():
  dataset = pd.read_csv('/content/drive/MyDrive/Datasets/fraud_detection_bank_dataset.csv')
  return dataset

In [None]:
def normalize_data(data, norm):
  if norm == 'min':
    normalized = (data - data.min(axis=0)) / (data.max(axis=0) - data.min(axis=0))
  elif norm == 'std':
    normalized = ((data - np.mean(data, axis=0) ** 2) / np.std(data, axis=0, ddof=1) ** 2)
  normalized = np.nan_to_num(normalized, 0)
  return normalized

In [None]:
def fisher(g1, g2):
  m1 = g1.mean()
  m2 = g2.mean()
  s1 = g1.std(ddof=1)
  s2 = g2.std(ddof=1)
  return (abs(m1 - m2) ** 2) / ((s1 ** 2) + (s2 ** 2))

In [None]:
def plot_clustering(X, y_pred, centers):
  plt.figure()

  plt.scatter(centers[:, 0], centers[:, 1], c="red", marker='*', s=150)
  plt.scatter(X[:, 0], X[:, 1], c=y_pred)
  plt.title("Dados associados aos clusters")

  plt.show()

In [None]:
def preprocess_data(dataset, norm):
  X = dataset.iloc[:, 1:112]
  y = dataset.iloc[:, 113]
  return normalize_data(X.to_numpy(dtype='float'), norm), y.to_numpy(dtype='float')

In [None]:
def evaluate(X, y, predictions, centers):
  cm = confusion_matrix(y, predictions)
  tn, fp, fn, tp = confusion_matrix(y, predictions).ravel()
  acc = (tn + tp) / (tn + fp + fn + tp)
  tpr = tp / (tp + fn)
  tnr = tn / (tn + fp)
  sc = silhouette_score(X, y)
  fs = fisher(X[predictions == 0], X[predictions == 1])
  return cm, acc, tpr, tnr, sc, fs

In [None]:
dataset = get_dataset()
X, y = preprocess_data(dataset, 'min')

In [None]:
variances = [0.75, 0.9, 0.99]
models = ['K-Means', 'Fuzzy C-Means']
results = {i: {} for i in models}

In [None]:
for variance in variances:
  model = KMeans(n_clusters=2, random_state=0, algorithm='elkan')
  pca = PCA(n_components=variance, svd_solver='full')
  new_data = pca.fit_transform(np.copy(X), y)
  X_train, X_test, y_train, y_test = train_test_split(new_data, y, test_size=0.33, random_state=10)

  model.fit(X_train)
  predictions = model.predict(X_test)
  centers = model.cluster_centers_
  plot_clustering(X_test, predictions, centers)
  cm, acc, tpr, tnr, sc, fs = evaluate(X_test, y_test, predictions, centers)
  results['K-Means'].setdefault(variance, (pca.n_components_, cm, acc, tpr, tnr, sc, fs))

In [None]:
for variance in variances:
  model = FCM(n_clusters=2)
  pca = PCA(n_components=variance, svd_solver='full')
  new_data = pca.fit_transform(np.copy(X), y)
  X_train, X_test, y_train, y_test = train_test_split(new_data, y, test_size=0.33, random_state=10)

  model.fit(X_train)
  predictions = model.predict(X_test)
  centers = model.centers
  plot_clustering(X_test, predictions, centers)
  cm, acc, tpr, tnr, sc, fs = evaluate(X_test, y_test, predictions, centers)
  results['Fuzzy C-Means'].setdefault(variance, (pca.n_components_, cm, acc, tpr, tnr, sc, fs))

In [None]:
headers = ['Modelo', 'Variância 75%', 'Variância 90%', 'Variância 99%']
rows = []

for model, values in results.items():
  row = [model]
  for variance in values.keys():
    n_componentes, cm, acc, tpr, tnr, sc, fs = values[variance]
    row.append('Nº Comp.: %s\nACC: %.2f\nTPR: %.2f\nTNR: %.2f\nSC: %.2f\nCF: %.2f\nCM:\n%s' % (n_componentes, acc, tpr, tnr, sc, fs, cm))
  rows.append(row)

table = tabulate(rows, headers=headers, tablefmt='fancy_grid')
print(table)

╒═══════════════╤═════════════════╤═════════════════╤═════════════════╕
│ Modelo        │ Variância 75%   │ Variância 90%   │ Variância 99%   │
╞═══════════════╪═════════════════╪═════════════════╪═════════════════╡
│ K-Means       │ Nº Comp.: 2     │ Nº Comp.: 4     │ Nº Comp.: 18    │
│               │ ACC: 0.73       │ ACC: 0.73       │ ACC: 0.73       │
│               │ TPR: 0.00       │ TPR: 0.00       │ TPR: 0.00       │
│               │ TNR: 1.00       │ TNR: 1.00       │ TNR: 1.00       │
│               │ SC: 0.25        │ SC: 0.11        │ SC: 0.34        │
│               │ CF: nan         │ CF: nan         │ CF: nan         │
│               │ CM:             │ CM:             │ CM:             │
│               │ [[4933    0]    │ [[4933    0]    │ [[4933    0]    │
│               │  [1822    0]]   │  [1822    0]]   │  [1822    0]]   │
├───────────────┼─────────────────┼─────────────────┼─────────────────┤
│ Fuzzy C-Means │ Nº Comp.: 2     │ Nº Comp.: 4     │ Nº Comp.: 