# The Clustering Methods
1. K-Means
2. Fuzzy C-means or EM
4. DBScan
5. Spectral

## Import the Data

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import math
import numpy as np
from sklearn.mixture import GaussianMixture
from sklearn import cluster

In [2]:
# Dataset 1
unbal_train = pd.read_json('data/unbal_train.json').replace({'human': 0, 'bot':1})
unbal_test = pd.read_json('data/unbal_test.json').replace({'human': 0, 'bot':1})
# Dataset 2
bal_train = pd.read_json('data/bal_train.json').replace({'human': 0, 'bot':1})
bal_test = pd.read_json('data/bal_test.json').replace({'human': 0, 'bot':1})
# Dataset 3
crop_unbal_train = pd.read_json('data/crop_unbal_train.json').replace({'human': 0, 'bot':1})
crop_unbal_test = pd.read_json('data/crop_unbal_test.json').replace({'human': 0, 'bot':1})
# Dataset 4
human_train = pd.read_json('data/more_human_train.json').replace({'human': 0, 'bot':1})
human_test = pd.read_json('data/more_human_test.json').replace({'human': 0, 'bot':1})

train_df = [unbal_train, bal_train, crop_unbal_train, human_train]
test_df = [unbal_test, bal_test, crop_unbal_test, human_test]


In [3]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def print_analysis(test, predictions, label):
  print("-----------------------------------")
  print('{} Accuracy score: {}'.format(label, accuracy_score(test, predictions)))
  print('{} Precision score: {}'.format(label, precision_score(test, predictions)))
  print('{} Recall score: {}'.format(label, recall_score(test, predictions)))
  print('{} F1 score: {}'.format(label, f1_score(test, predictions)))
  print("-----------------------------------\n")

In [4]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

# # UNBALANCED
# x_train_u = train_u.to_numpy()[:,:-1].astype(int)   # all cols of train_u except for class label
# x_train_u_scaled = scaler.fit_transform(x_train_u)

# y_train_u = train_u.to_numpy()[:,-1].astype(int)    # only class label (last) column of train_u

# x_test_u = test_u.to_numpy()[:,:-1].astype(int)   # all cols of test_u except for class label
# x_test_u_scaled = scaler.transform(x_test_u)

# y_test_u = test_u.to_numpy()[:,-1].astype(int)    # only class label (last) column of test_u

# # BALANCED
# x_train_b = train_b.to_numpy()[:,:-1].astype(int)   # all cols of train_b except for class label
# x_train_b_scaled = scaler.fit_transform(x_train_b)

# y_train_b = train_b.to_numpy()[:,-1].astype(int)    # only class label (last) column of train_b

# x_test_b = test_b.to_numpy()[:,:-1].astype(int)   # all cols of test_b except for class label
# x_test_b_scaled = scaler.transform(x_test_b)

# y_test_b = test_b.to_numpy()[:,-1].astype(int)    # only class label (last) column of test_b

train_data = []
train_labels = []
test_data = []
test_labels = []
for i in range(len(train_df)):
  train = train_df[i]
  test = test_df[i]
  x_train = train.to_numpy()[:, :-1].astype(int)
  x_train_scaled = scaler.fit_transform(x_train)
  train_data.append(x_train_scaled)
  y_train = train.to_numpy()[:, -1].astype(int)
  train_labels.append(y_train)

  x_test = test.to_numpy()[:, :-1].astype(int)
  x_test_scaled = scaler.transform(x_test)
  test_data.append(x_test_scaled)
  y_test = test.to_numpy()[:, -1].astype(int)
  test_labels.append(y_test)

# K-Means Clustering

### K-means training and resulting centroids

In [5]:
k_means = []
for i in range(len(train_data)):
  print('Dataset '+ str(i+1))
  data = pd.DataFrame(train_data[i], columns = train_df[i].columns[:-1])
  k = cluster.KMeans(n_clusters=2, max_iter=100, random_state=1)
  k.fit(data)
  k_means.append(k)
  centroids = k.cluster_centers_
  print(pd.DataFrame(centroids, columns=data.columns))

# # K-means on Unbalanced Data
# data = pd.DataFrame(x_train_u_scaled, columns=train_u.columns[:-1])
# k_means = cluster.KMeans(n_clusters=2, max_iter=100, random_state=1)
# k_means.fit(data) 
# labels = k_means.labels_
# pd.DataFrame(labels, columns=['Cluster ID'])

Dataset 1
   verified  geo_enabled  profile_use_background_image  default_profile  \
0 -0.091565    -0.238496                      0.250715         0.341746   
1  0.654181     1.703922                     -1.791221        -2.441587   

   followers_count  friends_count  listed_count  favourites_count  \
0        -0.010240      -0.068542     -0.032056         -0.147109   
1         0.073161       0.489692      0.229025          1.051013   

   statuses_count  
0       -0.150072  
1        1.072178  
Dataset 2
   verified  geo_enabled  profile_use_background_image  default_profile  \
0 -0.159374    -0.429082                      0.390623          0.59580   
1  0.340001     0.915381                     -0.833335         -1.27105   

   followers_count  friends_count  listed_count  favourites_count  \
0        -0.051474      -0.107433     -0.067492         -0.238147   
1         0.109811       0.229191      0.143984          0.508051   

   statuses_count  
0       -0.254474  
1        0.5

### Evalute Models on Training Data

In [6]:
for i in range(len(k_means)):
  print('Dataset '+ str(i+1))
  k = k_means[i]
  labels = np.uint(k.predict(train_data[i]))
  # flip the labels since the random assignment got them backwards
  labels_flipped = np.logical_not(labels).astype(int)
  print_analysis(labels, train_labels[i], 'Training')
  print_analysis(labels_flipped, train_labels[i], 'Training Flipped')


# # Evaluate on training data
# labels = k_means.predict(x_train_u_scaled)
# labels = labels.reshape(-1, 1)
# cols = train_u.columns.tolist()
# cols.append('Cluster ID')
# check = pd.DataFrame(np.concatenate((train_u, labels), axis=1), columns=cols)
# check

Dataset 1
-----------------------------------
Training Accuracy score: 0.0808330653738653
Training Precision score: 0.02588415442150829
Training Recall score: 0.17721774193548387
Training F1 score: 0.04517073922762661
-----------------------------------

-----------------------------------
Training Flipped Accuracy score: 0.9191669346261347
Training Flipped Precision score: 0.9741158455784917
Training Flipped Recall score: 0.9326454086667231
Training Flipped F1 score: 0.9529296537420061
-----------------------------------

Dataset 2
-----------------------------------
Training Accuracy score: 0.2093148992044489
Training Precision score: 0.026062674526838348
Training Recall score: 0.04070753574024715
Training F1 score: 0.03177905986947886
-----------------------------------

-----------------------------------
Training Flipped Accuracy score: 0.7906851007955511
Training Flipped Precision score: 0.9739373254731617
Training Flipped Recall score: 0.7117913832199546
Training Flipped F1 scor

### Evaluate Model on Testing Data

In [7]:
for i in range(len(k_means)):
  print('Dataset '+ str(i+1))
  k = k_means[i]
  labels = np.uint(k.predict(test_data[i]))
  # flip the labels since the random assignment got them backwards
  labels_flipped = np.logical_not(labels).astype(int)
  print_analysis(labels, test_labels[i], 'Testing')
  print_analysis(labels_flipped, test_labels[i], 'Testing Flipped')



Dataset 1
-----------------------------------
Testing Accuracy score: 0.08795013850415513
Testing Precision score: 0.03111006363422107
Testing Recall score: 0.20935765265662173
Testing F1 score: 0.054170514004309014
-----------------------------------

-----------------------------------
Testing Flipped Accuracy score: 0.9120498614958449
Testing Flipped Precision score: 0.9688899363657789
Testing Flipped Recall score: 0.9293545834746242
Testing Flipped F1 score: 0.9487105521260023
-----------------------------------

Dataset 2
-----------------------------------
Testing Accuracy score: 0.2119246215631758
Testing Precision score: 0.02916160388821385
Testing Recall score: 0.04795204795204795
Testing F1 score: 0.03626747261050245
-----------------------------------

-----------------------------------
Testing Flipped Accuracy score: 0.7880753784368242
Testing Flipped Precision score: 0.9708383961117861
Testing Flipped Recall score: 0.7146690518783542
Testing Flipped F1 score: 0.8232869654

# Prototype-Based Model: Gaussian Mixture

### Gaussian Mixture training and resulting centroids

In [8]:
gaussian = []
for i in range(len(train_data)):
  print('Dataset '+ str(i+1))
  data = pd.DataFrame(train_data[i], columns = train_df[i].columns[:-1])
  gm = GaussianMixture(n_components=2, random_state=1).fit(data)
  gaussian.append(gm)
  centroids = gm.means_
  print(pd.DataFrame(centroids, columns=data.columns))

Dataset 1
   verified  geo_enabled  profile_use_background_image  default_profile  \
0 -0.093986    -0.318948                      0.250715         0.383728   
1  0.417597     1.417138                     -1.113970        -1.704969   

   followers_count  friends_count  listed_count  favourites_count  \
0        -0.010501      -0.085712     -0.033815         -0.177492   
1         0.046656       0.380835      0.150246          0.788625   

   statuses_count  
0       -0.170037  
1        0.755501  
Dataset 2
   verified  geo_enabled  profile_use_background_image  default_profile  \
0  0.430538     0.650834                     -1.014924        -1.065952   
1 -0.165705    -0.250492                      0.390623         0.410262   

   followers_count  friends_count  listed_count  favourites_count  \
0         0.139240       0.299022      0.183139          0.635698   
1        -0.053591      -0.115087     -0.070486         -0.244667   

   statuses_count  
0        0.664184  
1       -0.2

### Evaluate on Training Data

In [9]:
for i in range(len(gaussian)):
  print('Dataset '+ str(i+1))
  gm = gaussian[i]
  labels = np.uint(gm.predict(train_data[i]))
  # flip the labels since the random assignment got them backwards
  labels_flipped = np.logical_not(labels).astype(int)
  print_analysis(labels, train_labels[i], 'Training')
  print_analysis(labels_flipped, train_labels[i], 'Training Flipped')


Dataset 1
-----------------------------------
Training Accuracy score: 0.04707017240099928
Training Precision score: 0.04216849730557437
Training Recall score: 0.19270623065536266
Training F1 score: 0.06919545784005798
-----------------------------------

-----------------------------------
Training Flipped Accuracy score: 0.9529298275990007
Training Flipped Precision score: 0.9578315026944256
Training Flipped Recall score: 0.9857264076610703
Training Flipped F1 score: 0.9715787744373253
-----------------------------------

Dataset 2
-----------------------------------
Training Accuracy score: 0.7480497412527999
Training Precision score: 0.9722308408315234
Training Recall score: 0.6702673796791444
Training F1 score: 0.7934920232970373
-----------------------------------

-----------------------------------
Training Flipped Accuracy score: 0.2519502587472001
Training Flipped Precision score: 0.027769159168476575
Training Flipped Recall score: 0.04976369196552683
Training Flipped F1 scor

### Evaluate on Testing Data

In [10]:
for i in range(len(gaussian)):
  print('Dataset '+ str(i+1))
  gm = gaussian[i]
  labels = np.uint(gm.predict(test_data[i]))
  # flip the labels since the random assignment got them backwards
  labels_flipped = np.logical_not(labels).astype(int)
  print_analysis(labels, test_labels[i], 'Testing')
  print_analysis(labels_flipped, test_labels[i], 'Testing Flipped')

Dataset 1
-----------------------------------
Testing Accuracy score: 0.05500593589236249
Testing Precision score: 0.05102521800612774
Testing Recall score: 0.22412008281573498
Testing F1 score: 0.08312535995392589
-----------------------------------

-----------------------------------
Testing Flipped Accuracy score: 0.9449940641076375
Testing Flipped Precision score: 0.9489747819938723
Testing Flipped Recall score: 0.9849559686888454
Testing Flipped F1 score: 0.9666306565838435
-----------------------------------

Dataset 2
-----------------------------------
Testing Accuracy score: 0.7537843682421995
Testing Precision score: 0.9678007290400972
Testing Recall score: 0.6816431322207959
Testing F1 score: 0.7998995731860405
-----------------------------------

-----------------------------------
Testing Flipped Accuracy score: 0.24621563175780042
Testing Flipped Precision score: 0.03219927095990279
Testing Flipped Recall score: 0.058888888888888886
Testing Flipped F1 score: 0.0416339355

# Check for outliers

In [11]:
cols = train_df[0].columns.tolist()
cols.append('Cluster ID')
check = pd.DataFrame(np.concatenate((train_data[-1], labels), axis=1), columns=cols)
check['Cluster ID'].value_counts()

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)

This appears to struggling with a single outlier so what if we remove it

In [None]:
train_u_new = train_u.drop(train_u.index[17007])
x_train_u_new = train_u_new.to_numpy()[:,:-1].astype(int)
x_train_u_new_scaled = scaler.fit_transform(x_train_u_new)
y_train_u_new = train_u_new.to_numpy()[:,-1].astype(int)

x_test_u_new_scaled = scaler.transform(x_test_u)

In [None]:
gm = GaussianMixture(n_components=2, random_state=0).fit(x_train_u_new_scaled)
gm.means_

array([[-0.09412137, -0.31914039,  0.25360375,  0.38436984, -0.01097372,
        -0.08258622, -0.03375535, -0.17752917, -0.17573083],
       [ 0.41520221,  1.4078396 , -1.1187346 , -1.69558947,  0.04840892,
         0.36431661,  0.14890661,  0.78314308,  0.77520998]])

### Evaluate on Training Data

In [None]:
labels = gm.predict(x_train_u_new_scaled)
labels = labels.reshape(-1, 1)
cols = test_u.columns.tolist()
cols.append('Cluster ID')
check = pd.DataFrame(np.concatenate((train_u_new, labels), axis=1), columns=cols)
check


Unnamed: 0,verified,geo_enabled,profile_use_background_image,default_profile,followers_count,friends_count,listed_count,favourites_count,statuses_count,bot,Cluster ID
0,False,False,True,True,1629,1644,0,76,1375,0,1
1,False,False,True,False,136,339,14,14474,7915,0,1
2,False,False,True,True,0,0,0,0,1,1,0
3,False,False,True,True,2,0,0,0,103,1,0
4,False,False,True,True,0,0,0,0,37,1,0
...,...,...,...,...,...,...,...,...,...,...,...
40423,False,False,True,True,12,116,0,109,92,1,0
40424,False,False,True,True,0,0,0,0,15,1,0
40425,False,False,True,True,0,0,0,0,59,1,0
40426,False,False,True,True,0,55,0,0,8,1,0


In [None]:
labels = np.uint(gm.predict(x_train_u_new_scaled))
# flip the labels since the random assignment got them backwards
labels_flipped = np.logical_not(labels).astype(int)
print_analysis(labels_flipped, y_train_u_new, 'Training')

-----------------------------------
Training Accuracy score: 0.951246660730187
Training Precision score: 0.956130020021199
Training Recall score: 0.9854043392504931
Training F1 score: 0.9705464815672679
-----------------------------------



### Evaluate on Testing Data

In [None]:
labels = gm.predict(x_test_u_new_scaled)
labels = labels.reshape(-1, 1)
cols = test_u.columns.tolist()
cols.append('Cluster ID')
check = pd.DataFrame(np.concatenate((test_u, labels), axis=1), columns=cols)
check

Unnamed: 0,verified,geo_enabled,profile_use_background_image,default_profile,followers_count,friends_count,listed_count,favourites_count,statuses_count,bot,Cluster ID
0,False,False,True,True,206,201,0,7,151,1,0
1,False,False,True,True,1,3,0,2,14,1,0
2,False,False,True,True,0,0,0,0,3,1,0
3,False,False,True,True,0,0,0,0,21,1,0
4,False,False,True,True,0,0,0,0,44,1,0
...,...,...,...,...,...,...,...,...,...,...,...
10103,False,False,True,True,5,62,0,9,4,1,0
10104,False,False,True,False,5524,347,131,321,8319,0,1
10105,False,False,True,True,0,0,0,0,33,1,0
10106,False,False,True,True,2,217,0,0,43,1,0


In [None]:
labels = np.uint(gm.predict(x_test_u_new_scaled))
# flip the labels since the random assignment got them backwards
labels_flipped = np.logical_not(labels).astype(int)
print_analysis(labels_flipped, y_test_u, 'Testing')

-----------------------------------
Testing Accuracy score: 0.9531064503363672
Testing Precision score: 0.9583726415094339
Testing Recall score: 0.9853297769156159
Testing F1 score: 0.9716642754662841
-----------------------------------



### Train Model on Balanced Data

In [None]:
from sklearn.mixture import GaussianMixture

gm = GaussianMixture(n_components=2, random_state=0).fit(x_train_b_scaled)
gm.means_

array([[-2.77744956e-03, -5.71528344e-04,  4.41013628e-05,
         6.33606808e-04, -1.18661748e-02, -1.70606769e-03,
        -1.80791904e-02, -1.18010791e-04, -1.65283404e-03],
       [ 5.99049580e+00,  1.23269138e+00, -9.51192892e-02,
        -1.36658428e+00,  2.55933614e+01,  3.67970366e+00,
         3.89938004e+01,  2.54529608e-01,  3.56488756e+00]])

### Evaluate on Training Data

In [None]:
labels = gm.predict(x_train_b_scaled)
labels = labels.reshape(-1, 1)
cols = train_b.columns.tolist()
cols.append('Cluster ID')
check = pd.DataFrame(np.concatenate((train_b, labels), axis=1), columns=cols)
check

Unnamed: 0,verified,geo_enabled,profile_use_background_image,default_profile,followers_count,friends_count,listed_count,favourites_count,statuses_count,bot,Cluster ID
0,False,True,True,True,122,109,0,1085,812,0,0
1,False,False,True,True,0,0,0,0,3,1,0
2,False,False,True,False,84,175,1,1561,3878,0,0
3,False,False,True,True,0,18,0,4,5,1,0
4,False,False,True,True,0,41,0,10,24,1,0
...,...,...,...,...,...,...,...,...,...,...,...
12942,False,False,True,True,0,0,0,0,27,1,0
12943,False,True,True,False,203,177,10,9129,14144,0,0
12944,False,False,True,True,153,166,0,27,13,1,0
12945,False,True,True,True,2085,1865,65,32121,40349,0,0


In [None]:
labels = np.uint(gm.predict(x_train_b_scaled))
# flip the labels since the random assignment got them backwards
labels_flipped = np.logical_not(labels).astype(int)
print_analysis(labels_flipped, y_train_b, 'Training')

-----------------------------------
Training Accuracy score: 0.49787595581988103
Training Precision score: 1.0
Training Recall score: 0.49764314967931383
Training F1 score: 0.6645683917238533
-----------------------------------



In [None]:
check['Cluster ID'].value_counts()

0    12941
1        6
Name: Cluster ID, dtype: int64

In [None]:
check[check['Cluster ID'] == 1]

Unnamed: 0,verified,geo_enabled,profile_use_background_image,default_profile,followers_count,friends_count,listed_count,favourites_count,statuses_count,bot,Cluster ID
2539,True,False,True,False,2830107,635,31524,64348,33591,0,1
6175,True,True,True,False,1591499,350,26736,2114,202745,0,1
8093,True,True,False,False,6639843,5797,25439,6645,220668,0,1
8537,True,True,True,False,4193906,289,20713,162,10146,0,1
9403,True,True,True,False,50865898,8320,67930,376,11335,0,1
12130,True,True,True,False,4119086,93030,16676,95,62492,0,1


### Evaluate on Testing Data

In [None]:
labels = gm.predict(x_test_b_scaled)
labels = labels.reshape(-1, 1)
cols = test_b.columns.tolist()
cols.append('Cluster ID')
check = pd.DataFrame(np.concatenate((test_b, labels), axis=1), columns=cols)
check

Unnamed: 0,verified,geo_enabled,profile_use_background_image,default_profile,followers_count,friends_count,listed_count,favourites_count,statuses_count,bot,Cluster ID
0,False,False,True,True,1,0,0,0,93,1,0
1,False,False,True,True,0,0,0,0,220,1,0
2,False,False,True,True,3,789,0,137,269,1,0
3,False,False,True,True,0,0,0,0,47,1,0
4,False,False,True,True,0,0,0,0,83,1,0
...,...,...,...,...,...,...,...,...,...,...,...
3232,False,True,True,False,13145,872,1019,8607,149818,0,0
3233,False,False,True,True,42,56,0,3280,1629,0,0
3234,False,True,False,False,687,605,30,3166,3776,0,0
3235,False,True,True,False,3114,4684,138,7809,8129,0,0


In [None]:
labels = np.uint(gm.predict(x_test_b_scaled))
# flip the labels since the random assignment got them backwards
labels_flipped = np.logical_not(labels).astype(int)
print_analysis(labels_flipped, y_test_b, 'Testing')

-----------------------------------
Testing Accuracy score: 0.5103490886623416
Testing Precision score: 1.0
Testing Recall score: 0.5103490886623416
Testing F1 score: 0.6758028226631213
-----------------------------------



Again appears to be struggling with a single outlier. Try removing

In [None]:
train_b_new = train_b.drop(train_b.index[3798])
x_train_b_new = train_b_new.to_numpy()[:,:-1].astype(int)
x_train_b_new_scaled = scaler.fit_transform(x_train_b_new)
y_train_b_new = train_b_new.to_numpy()[:,-1].astype(int)

x_test_b_new_scaled = scaler.transform(x_test_b)

In [None]:
gm = GaussianMixture(n_components=2, random_state=0).fit(x_train_b_new_scaled)
gm.means_

array([[-0.16644822, -0.16312698,  0.05457096,  0.18143088, -0.01952676,
        -0.10547728, -0.05637636, -0.21563272, -0.23475893],
       [ 0.86421617,  0.84697198, -0.28333801, -0.94200767,  0.10138493,
         0.54764881,  0.29271185,  1.11958717,  1.21889236]])

### Evaluate on Training Data

In [None]:
labels = gm.predict(x_train_b_new_scaled)
labels = labels.reshape(-1, 1)
cols = train_b_new.columns.tolist()
cols.append('Cluster ID')
check = pd.DataFrame(np.concatenate((train_b_new, labels), axis=1), columns=cols)
check

Unnamed: 0,verified,geo_enabled,profile_use_background_image,default_profile,followers_count,friends_count,listed_count,favourites_count,statuses_count,bot,Cluster ID
0,False,False,True,True,0,39,0,35,45,1,0
1,False,True,False,False,231,274,0,15243,13250,0,0
2,False,False,True,True,2,0,0,0,148,1,0
3,False,True,True,False,371,347,10,36,7267,0,0
4,False,True,True,True,43,51,0,4,166,0,0
...,...,...,...,...,...,...,...,...,...,...,...
12941,False,False,True,True,0,0,0,0,1,1,0
12942,False,False,True,True,50,52,7,70,128,0,0
12943,False,True,True,False,40,222,1,335,685,0,0
12944,False,False,True,True,0,0,0,0,18,1,0


In [None]:
labels = np.uint(gm.predict(x_train_b_new_scaled))
# flip the labels since the random assignment got them backwards
labels_flipped = np.logical_not(labels).astype(int)
print_analysis(labels_flipped, y_train_b_new, 'Training')

-----------------------------------
Training Accuracy score: 0.6599721921829136
Training Precision score: 0.9990713511840272
Training Recall score: 0.5948760482904801
Training F1 score: 0.7457255083179298
-----------------------------------



### Evaluate on Testing Data

In [None]:
labels = gm.predict(x_test_b_new_scaled)
labels = labels.reshape(-1, 1)
cols = test_b.columns.tolist()
cols.append('Cluster ID')
check = pd.DataFrame(np.concatenate((test_b, labels), axis=1), columns=cols)
check

Unnamed: 0,verified,geo_enabled,profile_use_background_image,default_profile,followers_count,friends_count,listed_count,favourites_count,statuses_count,bot,Cluster ID
0,False,True,True,False,2722,2693,135,101,7599,0,1
1,False,False,False,False,33,109,1,126,175,0,0
2,False,False,True,True,3972,3816,66,26765,11974,0,1
3,False,False,True,True,0,177,0,0,82,1,0
4,False,False,True,True,0,0,2,0,17,1,0
...,...,...,...,...,...,...,...,...,...,...,...
3232,False,True,True,False,358,264,3,6452,6357,0,0
3233,False,False,True,False,2069,1883,58,7827,10921,0,1
3234,False,True,False,False,185,191,3,8986,22954,0,0
3235,False,False,True,True,158,154,0,75,85,1,0


In [None]:
labels = np.uint(gm.predict(x_test_b_new_scaled))
# flip the labels since the random assignment got them backwards
labels_flipped = np.logical_not(labels).astype(int)
print_analysis(labels_flipped, y_test_b, 'Testing')

-----------------------------------
Testing Accuracy score: 0.6570898980537535
Testing Precision score: 0.9993868792152054
Testing Recall score: 0.5951077035414385
Testing F1 score: 0.7459954233409611
-----------------------------------

