# Training and Saving Models

- Logistic regression
- Decison Tree
- Support Vector Machine

In [8]:
# imports

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import GridSearchCV

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

import pickle

%matplotlib inline

In [3]:
# Load train data

X_train = pd.read_csv('preprocessed_data/X_train.csv')
y_train = pd.read_csv('preprocessed_data/y_train.csv')

### Feature Scaling

In [6]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_train = pd.DataFrame(X_train_scaled)

# 1.Models

## 1.1 k-Nearest Neighbours (KNN)

In [9]:
# search for optimun parameters using gridsearch
params= {'n_neighbors': np.arange(1, 10)}
grid_search = GridSearchCV(estimator = KNeighborsClassifier(), param_grid = params, 
                           scoring = 'accuracy', cv = 10, n_jobs = -1)
knn_clf = GridSearchCV(KNeighborsClassifier(),params,cv=3, n_jobs=-1)

In [10]:
# train the model
knn_clf.fit(X_train,y_train)
knn_clf.best_params_ 

{'n_neighbors': 1}

In [11]:
# Save KNN model

filename = 'models/knn_model.sav'
pickle.dump(knn_clf, open(filename, 'wb'))

## 1.2 Decision Trees

In [12]:

dtree= DecisionTreeClassifier(random_state=7)

In [13]:
# grid search for optimum parameters
params = {'max_features': ['auto', 'sqrt', 'log2'],
          'min_samples_split': [2,3,4,5,6,7,8,9,10,11,12,13,14,15], 
          'min_samples_leaf':[1,2,3,4,5,6,7,8,9,10,11]}
tree_clf = GridSearchCV(dtree, param_grid=params, n_jobs=-1)

In [14]:
# train the model
tree_clf.fit(X_train,y_train)
tree_clf.best_params_ 

{'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2}

In [15]:
# Save Desicion tree Model

filename = 'models/tree_model.sav'
pickle.dump(tree_clf, open(filename, 'wb'))

## 1.3 Support Vector Machine

In [16]:
#grid search for optimum parameters
Cs = [0.001, 0.01, 0.1, 1, 10]
gammas = [0.001, 0.01, 0.1, 1]
param_grid = {'C': Cs, 'gamma' : gammas}
svm_clf = GridSearchCV(SVC(kernel='rbf', probability=True), param_grid, cv=10)

In [17]:
# train the model
svm_clf.fit(X_train,y_train)
svm_clf.best_params_ 

{'C': 10, 'gamma': 1}

In [18]:
# Save svm Model

filename = 'models/svm_model.sav'
pickle.dump(svm_clf, open(filename, 'wb'))