<div class="alert alert-block alert-info">
Visit the <a href="https://github.com/engs1258/biomedical-image-analysis-notebooks/wiki">wiki</a> pages to find some additional documentation and instructions on how view an interactive verson of these notebooks using binder.</div>

---

In [1]:
import math
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt


from sklearn.utils import shuffle
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report

import os
import csv
import random

%matplotlib inline

### Step 1: Load the csv files that contain the generated features

In [2]:
features = []
labels = []

with open('./papsmear-features-normal.csv', newline='') as csvfile:
    stored_features = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in stored_features:
        filename = row[0]
        class_label = row[1]
        v = np.array(row[2:len(row)], dtype=np.float32)
        f = [filename, class_label, v]
        features.append(f)
        labels.append(class_label)

In [3]:
with open('./papsmear-features-displastic.csv', newline='') as csvfile:
    stored_features = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in stored_features:
        filename = row[0]
        class_label = row[1]
        v = np.array(row[2:len(row)], dtype=np.float32)
        f = [filename, class_label, v]
        features.append(f)
        labels.append(class_label)

### Step 2: Split the dataa into training and testing

In [4]:
feature_length = len(features[0][2])
features, labels = shuffle(features, labels, random_state=0)

In [5]:
N_train = 200
features_train = features[0:N_train]
features_test = features[N_train:len(features)]

labels_train = labels[0:N_train]
labels_test = labels[N_train:len(features)]

In [6]:
data_train = np.zeros((len(features_train), feature_length))
data_test  = np.zeros((len(features_test), feature_length))

In [7]:
for i in range(0, len(features_train)):
    data_train[i, :] = features_train[i][2]
    
for i in range(0, len(features_test)):
    data_test[i, :] = features_test[i][2]

### Step 3: Set up and train the classifier 

In [8]:
clf = GaussianMixture(n_components=2, covariance_type='full', 
                      tol=0.001, reg_covar=1e-06, 
                      max_iter=100, n_init=1, 
                      init_params='kmeans', 
                      weights_init=None, means_init=None, 
                      precisions_init=None, random_state=None, 
                      warm_start=False, verbose=0, 
                      verbose_interval=10)

In [9]:
clf.fit(data_train, np.asarray(labels_train))

GaussianMixture(covariance_type='full', init_params='kmeans', max_iter=100,
                means_init=None, n_components=2, n_init=1, precisions_init=None,
                random_state=None, reg_covar=1e-06, tol=0.001, verbose=0,
                verbose_interval=10, warm_start=False, weights_init=None)

In [10]:
prediction = clf.predict(data_test)

In [11]:
print(prediction)

[0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 0
 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0]


In [12]:
print(np.array(labels_test, dtype=np.int))

[0 0 1 1 1 0 1 1 1 0 0 1 0 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 0 1
 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 0 1 1 1 0 1 1 1 1 0 1]


In [13]:
print(classification_report(np.array(labels_test, dtype=np.int), prediction))

              precision    recall  f1-score   support

           0       0.28      1.00      0.44        17
           1       1.00      0.20      0.34        54

    accuracy                           0.39        71
   macro avg       0.64      0.60      0.39        71
weighted avg       0.83      0.39      0.36        71



In [14]:
print(clf.means_)

[[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  4.47339517e-04 2.32957183e-02 1.95819141e-01 2.62816611e-01
  2.98331316e-01 1.73104287e-01 2.85436570e-02 1.76419304e-02
  1.91579380e+02 8.78789394e+00 1.63707329e-01 1.02947163e-03
  3.00724549e-02 8.81049601e-01]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  5.45597234e-42 1.92254262e-02 2.59227568e-01 1.72663854e-01
  1.54146533e-01 1.68006024e-01 9.68542342e-02 1.29876364e-01
  7.56932893e+02 1.84162966e+01 1.07226077e-01 3.25062733e-03
  3.62447689e-02 8.20119651e-01]]


In [15]:
print(clf.covariances_)

[[[ 1.00000000e-06  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00]
  [ 0.00000000e+00  1.00000000e-06  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00]
  [ 0.00000000e+00  0.00000000e+00  1.00000000e-06  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.000

In [16]:
print(clf.weights_)

[0.85876718 0.14123282]
