In [97]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [3]:
df=pd.read_csv("KNN_Project_Data")

In [4]:
df.head()

Unnamed: 0,XVPM,GWYH,TRAT,TLLZ,IGGA,HYKR,EDFS,GUUB,MGJM,JHZC,TARGET CLASS
0,1636.670614,817.988525,2565.995189,358.347163,550.417491,1618.870897,2147.641254,330.727893,1494.878631,845.136088,0
1,1013.40276,577.587332,2644.141273,280.428203,1161.873391,2084.107872,853.404981,447.157619,1193.032521,861.081809,1
2,1300.035501,820.518697,2025.854469,525.562292,922.206261,2552.355407,818.676686,845.491492,1968.367513,1647.186291,1
3,1059.347542,1066.866418,612.000041,480.827789,419.467495,685.666983,852.86781,341.664784,1154.391368,1450.935357,0
4,1018.340526,1313.679056,950.622661,724.742174,843.065903,1370.554164,905.469453,658.118202,539.45935,1899.850792,0


### Converting dataset to scalar dataset

In [8]:
standard=StandardScaler()

In [9]:
standard.fit(df.drop("TARGET CLASS",axis=1))

StandardScaler(copy=True, with_mean=True, with_std=True)

In [11]:
df_scalar=standard.transform(df.drop("TARGET CLASS",axis=1))

In [12]:
df_new=pd.DataFrame(data=df_scalar,columns=df.columns[0:-1])

In [14]:
X=df_new

In [15]:
y=df["TARGET CLASS"]

### Spliting the dataset into training and testing sets

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

### First lets create a decision tree model :

In [18]:
from sklearn.tree import DecisionTreeClassifier


In [19]:
dtm=DecisionTreeClassifier()

In [20]:
dtm.fit(X_train,y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [21]:
y_pred=dtm.predict(X_test)

### Model Evaluation

In [30]:
print(confusion_matrix(y_test,y_pred))
print("------------------------------")
print(classification_report(y_test,y_pred))

[[105  47]
 [ 48 100]]
------------------------------
              precision    recall  f1-score   support

           0       0.69      0.69      0.69       152
           1       0.68      0.68      0.68       148

   micro avg       0.68      0.68      0.68       300
   macro avg       0.68      0.68      0.68       300
weighted avg       0.68      0.68      0.68       300



### Lets Try to improve this Using Random FOrest Model 

In [82]:
from sklearn.ensemble import RandomForestClassifier

In [83]:
rnc=RandomForestClassifier(n_estimators=10)

In [84]:
rnc.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [85]:
y_pred_rnc=rnc.predict(X_test)

### Evaluation

In [86]:
print(confusion_matrix(y_test,y_pred_rnc))
print("------------------------------")
print(classification_report(y_test,y_pred_rnc))

[[123  29]
 [ 42 106]]
------------------------------
              precision    recall  f1-score   support

           0       0.75      0.81      0.78       152
           1       0.79      0.72      0.75       148

   micro avg       0.76      0.76      0.76       300
   macro avg       0.77      0.76      0.76       300
weighted avg       0.77      0.76      0.76       300



## AS we can see we have significantly increased the precision score from 0.69 to 0.83

# Function that runs the requested algirithm and returns y_pred , Confusion metrics and confusion report

In [87]:
def model(algo,X_train, X_test, y_train, y_test):
    
    model=algo.fit(X_train,y_train)
    y_pred=model.predict(X_test)
    con_matrix=confusion_matrix(y_test,y_pred)
    class_report=classification_report(y_test,y_pred)
    
    return y_pred,con_matrix,class_report
    
    

# Random Forest 

In [88]:
 y_pred_rf,con_matrix_rf,class_report_rf=model(RandomForestClassifier(n_estimators=10),X_train, X_test, y_train, y_test)

In [78]:

print("con_matrix_rf:" ,"\n",con_matrix_rf )
print("---------------------------")
print("Class_report_rf:",class_report_rf)

con_matrix_rf: 
 [[122  30]
 [ 40 108]]
---------------------------
Class_report_rf:               precision    recall  f1-score   support

           0       0.75      0.80      0.78       152
           1       0.78      0.73      0.76       148

   micro avg       0.77      0.77      0.77       300
   macro avg       0.77      0.77      0.77       300
weighted avg       0.77      0.77      0.77       300



# Desision Tree

In [63]:
 y_pred_,con_matrix,class_report=model(DecisionTreeClassifier(),X_train, X_test, y_train, y_test)

In [89]:

print("con_matrix_dt:" ,"\n",con_matrix )
print("---------------------------")
print("Class_report_dt:","\n",class_report)

con_matrix_dt: 
 [[128  24]
 [ 26 122]]
---------------------------
Class_report_dt: 
               precision    recall  f1-score   support

           0       0.83      0.84      0.84       152
           1       0.84      0.82      0.83       148

   micro avg       0.83      0.83      0.83       300
   macro avg       0.83      0.83      0.83       300
weighted avg       0.83      0.83      0.83       300



# Logistic Regression

In [67]:
 y_pred_,con_matrix,class_report=model(LogisticRegression(),X_train, X_test, y_train, y_test)



In [99]:
print("con_matrix_lr:" ,"\n",con_matrix )
print("---------------------------")
print("Class_report_lr:","\n",class_report)

con_matrix_lr: 
 [[128  24]
 [ 26 122]]
---------------------------
Class_report_lr: 
               precision    recall  f1-score   support

           0       0.83      0.84      0.84       152
           1       0.84      0.82      0.83       148

   micro avg       0.83      0.83      0.83       300
   macro avg       0.83      0.83      0.83       300
weighted avg       0.83      0.83      0.83       300



# SVM-Support vector calssifier

In [94]:
 y_pred_svc,con_matrix_svc,class_report_svc=model(SVC(),X_train, X_test, y_train, y_test)



In [98]:
print("con_matrix_lr:" ,"\n",con_matrix_svc )
print("---------------------------")
print("Class_report_lr:","\n",class_report_svc)

con_matrix_lr: 
 [[126  26]
 [ 25 123]]
---------------------------
Class_report_lr: 
               precision    recall  f1-score   support

           0       0.83      0.83      0.83       152
           1       0.83      0.83      0.83       148

   micro avg       0.83      0.83      0.83       300
   macro avg       0.83      0.83      0.83       300
weighted avg       0.83      0.83      0.83       300



# Grid Search to OPTIMIZE SVM result 

In [100]:
param_grid={"C":[0.1,1,10,10,100],"gamma":[1,0.1,0.01,0.001,0.0001]}

In [103]:
grid=GridSearchCV(SVC(),param_grid,verbose=4)

In [104]:
grid.fit(X_train,y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.1s remaining:    0.0s


Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] C=0.1, gamma=1 ..................................................
[CV] ......... C=0.1, gamma=1, score=0.5042735042735043, total=   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ......... C=0.1, gamma=1, score=0.5021459227467812, total=   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ......... C=0.1, gamma=1, score=0.5021459227467812, total=   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....... C=0.1, gamma=0.1, score=0.8504273504273504, total=   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....... C=0.1, gamma=0.1, score=0.8454935622317596, total=   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....... C=0.1, gamma=0.1, score=0.8369098712446352, total=   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...... C=0.

[CV] ..... C=10, gamma=0.0001, score=0.8461538461538461, total=   0.0s
[CV] C=10, gamma=0.0001 ..............................................
[CV] ..... C=10, gamma=0.0001, score=0.8326180257510729, total=   0.0s
[CV] C=10, gamma=0.0001 ..............................................
[CV] ..... C=10, gamma=0.0001, score=0.8283261802575107, total=   0.0s
[CV] C=100, gamma=1 ..................................................
[CV] ......... C=100, gamma=1, score=0.7564102564102564, total=   0.0s
[CV] C=100, gamma=1 ..................................................
[CV] ......... C=100, gamma=1, score=0.7682403433476395, total=   0.0s
[CV] C=100, gamma=1 ..................................................
[CV] .......... C=100, gamma=1, score=0.759656652360515, total=   0.0s
[CV] C=100, gamma=0.1 ................................................
[CV] ....... C=100, gamma=0.1, score=0.7264957264957265, total=   0.0s
[CV] C=100, gamma=0.1 ................................................
[CV] .

[Parallel(n_jobs=1)]: Done  75 out of  75 | elapsed:    1.5s finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [0.1, 1, 10, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001, 0.0001]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=4)

In [105]:
grid.best_params_

{'C': 0.1, 'gamma': 0.1}

In [106]:
grid.estimator

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

### Lets re-run the SVM model using best parameters stated by Grid Searh:

In [112]:
y_pred_svc,con_matrix_svg,class_report_svg=model(GridSearchCV(SVC(C=0.1,gamma=0.1),param_grid,verbose=4,cv=5),X_train, X_test, y_train, y_test)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV] C=0.1, gamma=1 ..................................................
[CV] ......... C=0.1, gamma=1, score=0.5035460992907801, total=   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ......... C=0.1, gamma=1, score=0.5035460992907801, total=   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ........................ C=0.1, gamma=1, score=0.5, total=   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ......... C=0.1, gamma=1, score=0.5035971223021583, total=   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ......... C=0.1, gamma=1, score=0.5035971223021583, total=   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....... C=0.1, gamma=0.1, score=0.8297872340425532, total=   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....... C=

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.1s remaining:    0.0s


[CV] ....... C=0.1, gamma=0.1, score=0.8345323741007195, total=   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....... C=0.1, gamma=0.1, score=0.8273381294964028, total=   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...... C=0.1, gamma=0.01, score=0.8368794326241135, total=   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...... C=0.1, gamma=0.01, score=0.8581560283687943, total=   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...... C=0.1, gamma=0.01, score=0.8357142857142857, total=   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...... C=0.1, gamma=0.01, score=0.8201438848920863, total=   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...... C=0.1, gamma=0.01, score=0.8273381294964028, total=   0.0s
[CV] C=0.1, gamma=0.001 ..............................................
[CV] .

[CV] ..... C=10, gamma=0.0001, score=0.8214285714285714, total=   0.0s
[CV] C=10, gamma=0.0001 ..............................................
[CV] ..... C=10, gamma=0.0001, score=0.8201438848920863, total=   0.0s
[CV] C=10, gamma=0.0001 ..............................................
[CV] ..... C=10, gamma=0.0001, score=0.8201438848920863, total=   0.0s
[CV] C=10, gamma=1 ...................................................
[CV] .......... C=10, gamma=1, score=0.7659574468085106, total=   0.0s
[CV] C=10, gamma=1 ...................................................
[CV] .......... C=10, gamma=1, score=0.7801418439716312, total=   0.0s
[CV] C=10, gamma=1 ...................................................
[CV] ......................... C=10, gamma=1, score=0.8, total=   0.0s
[CV] C=10, gamma=1 ...................................................
[CV] .......... C=10, gamma=1, score=0.7913669064748201, total=   0.0s
[CV] C=10, gamma=1 ...................................................
[CV] .

[Parallel(n_jobs=1)]: Done 125 out of 125 | elapsed:    2.9s finished


In [113]:
print("con_matrix_lr:" ,"\n",con_matrix_svg )
print("---------------------------")
print("Class_report_lr:","\n",class_report_svg)

con_matrix_lr: 
 [[127  25]
 [ 29 119]]
---------------------------
Class_report_lr: 
               precision    recall  f1-score   support

           0       0.81      0.84      0.82       152
           1       0.83      0.80      0.82       148

   micro avg       0.82      0.82      0.82       300
   macro avg       0.82      0.82      0.82       300
weighted avg       0.82      0.82      0.82       300

