In [None]:
import numpy as np                                                                                                 
import matplotlib.pyplot as plt                                                                                    
import pandas as pd

import bc_utils as butils

from scipy.spatial import distance

from sklearn.model_selection import train_test_split                                                               
from sklearn.preprocessing import StandardScaler                                                                   
from sklearn.neighbors import KNeighborsClassifier 

In [None]:
# Read dataset to pandas dataframe                                                                                 
df = pd.read_csv("iris.csv")
df                                                                       

In [None]:
#creating test datasets for iris
names = ['setosa', 'versicolor', 'virginica']

dataframes = {}
for name in names:
    tmp_df = df[df['species'] == name]
    dataframes[name] = butils.TestDf(tmp_df)

full_test_df = pd.DataFrame()
for name in names:
    full_test_df = full_test_df.append(dataframes[name].test_df)

#changing last column name to 'cluster' #TODO do it better
cols = list(full_test_df.columns[:-1])
cols.append('cluster')
full_test_df.columns = cols

In [None]:
#using delta_medoids_full algorithm
results = {}
for name in names:
    delta_df = dataframes[name].train_df.iloc[:, :-1]

    result = butils.delta_medoids_full(delta_df, butils.estimate_delta(delta_df, distance.cosine), distance.cosine)
    result['cluster'] = name
    results[name] = result
    
print(results)

In [None]:
#using delta_medoids_one_shot algorithm
results2 = {}
for name in names:
    delta_df = dataframes[name].train_df.iloc[:, :-1]

    result = butils.delta_medoids_one_shot(delta_df, butils.estimate_delta(delta_df, distance.cosine), distance.cosine)
    result['cluster'] = name
    results2[name] = result
    
print(results2)

In [None]:
#creating training DataFrames for comparing oneshot and full delta medoids algorithm
train_delta_medoids_full = pd.DataFrame()
train_delta_medoids_one_shot = pd.DataFrame()

for name in names:
    train_delta_medoids_full = train_delta_medoids_full.append(results[name])
    train_delta_medoids_one_shot = train_delta_medoids_one_shot.append(results2[name])


In [None]:
def classifyPoints(ref_df, test_df):

    X_train = ref_df.iloc[:, :-1].values
    y_train = ref_df.iloc[:, -1].values
    X_test = test_df.iloc[:, :-1].values
    y_test = test_df.iloc[:, -1].values

    from sklearn.preprocessing import StandardScaler  
    scaler = StandardScaler()  
    scaler.fit(X_train)

    X_train = scaler.transform(X_train)  
    X_test = scaler.transform(X_test)

    from sklearn.neighbors import KNeighborsClassifier  
    classifier = KNeighborsClassifier(n_neighbors=5)
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)

    from sklearn.metrics import classification_report, confusion_matrix  
    print(confusion_matrix(y_test, y_pred))  
    print(classification_report(y_test, y_pred))

In [None]:
classifyPoints(train_delta_medoids_full, full_test_df)

In [None]:
classifyPoints(train_delta_medoids_one_shot, full_test_df)