### ENES UÇAR - 21040101029
### Bilgisayar Mühendisliği
### Github: https://github.com/enesucar/MachineLearningFinalProject

#### Imports

In [91]:
import pandas as pd
from sklearn import preprocessing, svm, metrics
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
import matplotlib.pyplot as plt

#### Dataset

In [92]:
headerList = [
  'pregnant', 
  'oral_glucose_tolerance_test', 
  'diastolic_blood_pressure', 
  'triceps_skinfold_thickness', 
  'serum_insulin', 
  'body_mass_index', 
  'diabetes_pedigree_function', 
  'age', 
  'class_variable']

df = pd.read_csv("data-set.txt",  sep='	', names=headerList)

### Normalize

In [93]:
min_max_scaler = preprocessing.MinMaxScaler()
normalized_features = min_max_scaler.fit_transform(df)
df = pd.DataFrame(normalized_features, index=df.index, columns=df.columns)

### X and Y Values

In [94]:
x = df[[
  'pregnant', 
  'oral_glucose_tolerance_test', 
  'diastolic_blood_pressure', 
  'triceps_skinfold_thickness', 
  'serum_insulin', 
  'body_mass_index', 
  'diabetes_pedigree_function', 
  'age']
  ].values

y = df['class_variable'].values

### Metrics

In [95]:
def get_accuracy(y_test, y_pred):
    return accuracy_score(y_test, y_pred)

def get_f1(y_test, y_pred):
    return f1_score(y_test, y_pred, average = "weighted")

def get_precision(y_test, y_pred):
    return precision_score(y_test, y_pred, average = "weighted")

def get_recall(y_test, y_pred):
    return recall_score(y_test, y_pred, average = "weighted")

def get_confusion_matrix(y_test, y_pred):
    return metrics.confusion_matrix(y_test, y_pred)

def get_metrics(y_test, y_pred):
  return { 
      "accuarcy": get_accuracy(y_test, y_pred),
      "f1": get_f1(y_test, y_pred),
      "precision": get_precision(y_test, y_pred),
      "recall": get_recall(y_test, y_pred),
      "confusion_matrix": get_confusion_matrix(y_test, y_pred)
  }

### Naive Bayes

In [96]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, stratify = y)
gnb = GaussianNB()
gnb.fit(x_train, y_train)
y_pred = gnb.predict(x_test)

reports = get_metrics(y_test, y_pred)
reports

{'accuarcy': 0.7272727272727273,
 'f1': 0.7260499634869146,
 'precision': 0.7250788427259015,
 'recall': 0.7272727272727273,
 'confusion_matrix': array([[120,  30],
        [ 33,  48]], dtype=int64)}

### KNN

In [97]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, stratify = y)
knn = KNeighborsClassifier()
k_values = list(range(1, 10))
param_grid = {'n_neighbors': k_values}
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(x_train, y_train)
best_k = grid_search.best_params_['n_neighbors']
y_pred = grid_search.predict(x_test)

reports = get_metrics(y_test, y_pred)
reports

{'accuarcy': 0.7489177489177489,
 'f1': 0.7415163300372248,
 'precision': 0.7419785034242865,
 'recall': 0.7489177489177489,
 'confusion_matrix': array([[129,  21],
        [ 37,  44]], dtype=int64)}

### MLPClassifier

In [98]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, stratify = y)
clf = MLPClassifier(hidden_layer_sizes=(8),activation="logistic",random_state=1,learning_rate_init=0.01, max_iter=500)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)

reports = get_metrics(y_test, y_pred)
reports

{'accuarcy': 0.8051948051948052,
 'f1': 0.803016884345034,
 'precision': 0.8024326336428248,
 'recall': 0.8051948051948052,
 'confusion_matrix': array([[131,  19],
        [ 26,  55]], dtype=int64)}

### Support Vector Machines 

In [99]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, stratify = y)
clf = svm.SVC()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)

reports = get_metrics(y_test, y_pred)
reports

{'accuarcy': 0.7359307359307359,
 'f1': 0.7249037831658631,
 'precision': 0.7274246221614643,
 'recall': 0.7359307359307359,
 'confusion_matrix': array([[130,  20],
        [ 41,  40]], dtype=int64)}