In [1]:
import math
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt


from sklearn.utils import shuffle
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report

import os
import csv
import random

%matplotlib inline

### Step 1: Load the csv files that contain the generated features

In [2]:
features = []
labels = []

with open('./papsmear-features-normal.csv', newline='') as csvfile:
    stored_features = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in stored_features:
        filename = row[0]
        class_label = row[1]
        v = np.array(row[2:len(row)], dtype=np.float32)
        f = [filename, class_label, v]
        features.append(f)
        labels.append(class_label)

In [6]:
with open('./papsmear-features-displastic.csv', newline='') as csvfile:
    stored_features = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in stored_features:
        filename = row[0]
        class_label = row[1]
        v = np.array(row[2:len(row)], dtype=np.float32)
        f = [filename, class_label, v]
        features.append(f)
        labels.append(class_label)

### Step 2: Split the dataa into training and testing

In [7]:
feature_length = len(features[0][2])
features, labels = shuffle(features, labels, random_state=0)

In [8]:
N_train = 200
features_train = features[0:N_train]
features_test = features[N_train:len(features)]

labels_train = labels[0:N_train]
labels_test = labels[N_train:len(features)]

In [9]:
data_train = np.zeros((len(features_train), feature_length))
data_test  = np.zeros((len(features_test), feature_length))

In [10]:
for i in range(0, len(features_train)):
    data_train[i, :] = features_train[i][2]
    
for i in range(0, len(features_test)):
    data_test[i, :] = features_test[i][2]

### Step 3: Set up and train the classifier 

In [11]:
clf = GaussianMixture(n_components=2, covariance_type='full', 
                      tol=0.001, reg_covar=1e-06, 
                      max_iter=100, n_init=1, 
                      init_params='kmeans', 
                      weights_init=None, means_init=None, 
                      precisions_init=None, random_state=None, 
                      warm_start=False, verbose=0, 
                      verbose_interval=10)

In [12]:
clf.fit(data_train, np.asarray(labels_train))

GaussianMixture(covariance_type='full', init_params='kmeans', max_iter=100,
                means_init=None, n_components=2, n_init=1, precisions_init=None,
                random_state=None, reg_covar=1e-06, tol=0.001, verbose=0,
                verbose_interval=10, warm_start=False, weights_init=None)

In [13]:
prediction = clf.predict(data_test)

In [14]:
print(prediction)

[0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 1 0 0 0 0
 0 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1
 0 1 0 1 0 1 0 0 0]


In [15]:
print(np.array(labels_test, dtype=np.int))

[1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1
 0 1 0 1 1 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1
 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1
 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 0 1 0 1 1 1 1 1 1 0 1 1
 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 1 1 1 1 1 1 0 0 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1
 0 1 1 1 1 1 1 0 1]


In [16]:
print(classification_report(np.array(labels_test, dtype=np.int), prediction))

              precision    recall  f1-score   support

           0       0.18      0.95      0.31        42
           1       0.96      0.20      0.34       226

    accuracy                           0.32       268
   macro avg       0.57      0.58      0.32       268
weighted avg       0.84      0.32      0.33       268



In [17]:
print(clf.means_)

[[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  3.31669243e-04 3.20658002e-02 2.20198542e-01 2.54748030e-01
  2.86631995e-01 1.61130055e-01 2.89589176e-02 1.59349908e-02
  1.95822749e+02 8.98797684e+00 1.56790015e-01 8.46187641e-04
  2.79485293e-02 8.82862233e-01]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  1.71659513e-40 1.14925367e-02 2.32130093e-01 1.87230433e-01
  1.47882864e-01 1.85259894e-01 1.09315718e-01 1.26688462e-01
  7.63922567e+02 1.83177991e+01 1.03940531e-01 3.16577115e-03
  3.45294773e-02 8.20133462e-01]]


In [18]:
print(clf.covariances_)

[[[ 1.00000000e-06  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00]
  [ 0.00000000e+00  1.00000000e-06  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00]
  [ 0.00000000e+00  0.00000000e+00  1.00000000e-06  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.000

In [19]:
print(clf.weights_)

[0.81481273 0.18518727]
