# Parkinson 

##### imports 

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, plot_confusion_matrix
from matplotlib import patches
from matplotlib.patches import Circle, Wedge, Polygon, Ellipse
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

# EDA

In [None]:
PD = pd.read_csv('Parkinsson disease.csv')

In [None]:
PD.rename(columns={'status':'parkinson'}, inplace=True) # Rename label column: "status" to "parkinson" - More intuitive.

In [None]:
PD.drop('name',axis=1,inplace=True) # No classification use for name column.

In [None]:
PD['parkinson'] = PD.pop('parkinson') # Move our label to the last column.

In [None]:
PD = PD.drop(['NHR','MDVP:Fhi(Hz)'],axis=1)

In [None]:
PD["MDVP:Fo(Hz)"][(PD["MDVP:Fo(Hz)"]<150)&(PD["parkinson"]<0.2)] = 219

# SPLITING THE DATA

In [None]:
#splitting into X (Feature Only Dataframe) and Y (Label Only Dataframe).
X = PD.drop('parkinson',axis=1)
Y = PD['parkinson']

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.23, random_state=420) # Train test split.

In [None]:
#scaling the data using the MinMax Scaler.
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train) # Fit the X_train data on the scaler and scale it accordingly 
X_test = scaler.transform(X_test) # Scale the X_test data

In [None]:
[type(d) for d in [X_train, Y_train, X_test, Y_test]] # Check which data might be susceptible to incorrect indexing

In [None]:
#fix indexing accordingly
Y_test.reset_index(drop=True, inplace=True)
Y_train.reset_index(drop=True, inplace=True)

# KNN

In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors = 3, metric = 'minkowski', p = 2)
knn_classifier.fit(X_train, Y_train)
print(knn_classifier.score(X_test, Y_test)) 
y_pred = knn_classifier.predict(X_test)
cm = confusion_matrix(Y_test, y_pred)
print(sns.heatmap(cm, annot=True, fmt="d"))

### Applying __K-FOLD__ on the _data_ 

In [None]:
from sklearn.model_selection import cross_val_score
#K-Fold Cross Validation
accuracies = cross_val_score(estimator = knn_classifier, X = X_train, y = Y_train, cv = 10)

print("Accuracy: {:.2f} %".format(accuracies.mean()*100))  #<-------------------mean
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))#<------------------std
accuracies

#### Checking with __Grid Search__ the best parameters

In [None]:
#grid Search
parameters = {'n_neighbors': [ 2,3, 5, 7, 1, 9, 11, 13], 'metric': ['minkowski','manhattan','cosine'], 'p': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]}
grid_search = GridSearchCV(estimator = knn_classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search.fit(X_train, Y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_#<-----------------------------the best
print("Best Parameters:", best_parameters)

### KNN after improvments

In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors = 7, metric = 'minkowski', p = 1)
knn_classifier.fit(X_train, Y_train)
knn_score = knn_classifier.score(X_test, Y_test)
print(knn_score)
Y_pred = knn_classifier.predict(X_test)
print(Y_pred)

#cm = confusion_matrix(Y_test, Y_pred)
#print(sns.heatmap(cm, annot=True, fmt='d'))
print(knn_score)

knn_dict = {'model': knn_classifier, 'score': knn_score, "scaler":scaler}

# SAVING TO MongoDB

In [None]:
def save_model_to_db(dict, client, db, dbconnection, model_name):
    import pickle
    import time
    import pymongo
    #pickling the model
    pickled_model = pickle.dumps(dict["model"])
    pickled_scaler = pickle.dumps(dict["scaler"])
    dict["model"]=pickled_model
    dict["scaler"]=pickled_scaler
    
    #saving model to mongoDB
    #creating connection
    myclient = pymongo.MongoClient(client)
    
    #creating database in mongodb
    mydb = myclient[db]
    
    #creating collection
    mycon = mydb[dbconnection]
    info = mycon.insert_one({model_name: pickled_model, 'name': model_name, 'created_time':time.time(),"score":dict["score"],"scaler":pickled_scaler})
    print(info.inserted_id, ' saved with this id successfully!')
    
    details = {
        'inserted_id':info.inserted_id,
        'model_name':model_name,
        'created_time':time.time()
    }
    return details

In [None]:
#saving knn model to mongoDB
save_model_to_db(knn_dict, 'mongodb://localhost:27017', 'Parkinson_Prediction', 'knn_model', 'first_model')