In [75]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.metrics import f1_score, make_scorer, accuracy_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.feature_selection import SelectFromModel
import warnings
warnings.filterwarnings('ignore')

In [76]:
data = pd.read_csv("X_train.csv")

In [77]:
data.head()

Unnamed: 0,ssc_p,hsc_p,degree_p,etest_p,mba_p,salary,gender_M,ssc_b_Others,hsc_b_Others,hsc_s_Commerce,hsc_s_Science,degree_t_Others,degree_t_Sci&Tech,workex_Yes,specialisation_Mkt&HR,status_Placed
0,0.0,2.14876,-0.727273,-0.680851,-0.385078,0.106195,1,1,1,1,0,0,1,0,1,1
1,0.816556,1.101653,1.043636,0.659574,0.515042,-0.141593,1,0,1,0,1,0,1,1,0,1
2,-0.13245,0.247934,-0.181818,0.170213,-0.505415,0.035398,1,0,0,0,0,0,0,0,0,1
3,-0.728477,-1.07438,-1.272727,-0.212766,-0.309266,-0.849558,1,0,0,0,1,0,1,0,1,0
4,1.245033,0.710744,0.663636,1.097872,-0.78219,0.654867,1,0,0,1,0,0,0,0,0,1


In [78]:
x = data.iloc[:,[0,1,2,3,4,6,7,8,9,10,11,12,13,14]]
y = data.iloc[:,[15]]

In [79]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

## Logistic Regression

In [80]:
model = LogisticRegression()
features = SelectFromModel(model)
features.fit(x_train.values, y_train.values)
selected_feat = x_train.columns[(features.get_support())]
xFeatTrain = x_train[selected_feat].values
xFeatTest = x_test[selected_feat].values
model.fit(xFeatTrain, y_train.values)
pred = model.predict(xFeatTest)
print("Accuracy on testing data:\t {:f}".format(accuracy_score(y_test, pred)))
print("F-score on testing data:\t {:f}".format(f1_score(y_test, pred)))

Accuracy on testing data:	 0.697674
F-score on testing data:	 0.779661


## Gaussian NB

In [81]:
naive = GaussianNB()
scorer = make_scorer(f1_score)
param = {'var_smoothing' : [1e-9, 5e-9, 1e-8, 5e-8, 1e-7]}
model = GridSearchCV(naive, param, scoring=scorer, cv=5)
model.fit(x_train.values, y_train.values)
print(model.best_params_)
print(model.best_score_)
model = model.best_estimator_
pred = model.predict(x_test.values)
print("Accuracy on testing data:\t {:f}".format(accuracy_score(y_test, pred)))
print("F-score on testing data:\t {:f}".format(f1_score(y_test, pred)))

{'var_smoothing': 1e-09}
0.8739277766025486
Accuracy on testing data:	 0.744186
F-score on testing data:	 0.792453


## Decision Tree

In [82]:
tree = DecisionTreeClassifier()
scorer = make_scorer(f1_score)
param = {'criterion' : ['gini', 'entropy'], 'splitter': [ 'best', 'random'], 
                 'max_depth':[None,3,6,9,12], 'min_samples_split':[50,30,10,5, 2], 
                 'min_samples_leaf':[1,5,9,12], 'min_weight_fraction_leaf':[0, 0.1, 0.2, 0.5], 
                 'max_features':['auto', 'sqrt', 'log2'], 'max_leaf_nodes':[None, 50, 100, 10], 
                 'min_impurity_decrease':[0.0, 0.5, 1.0, 1.5]}

model = RandomizedSearchCV(tree, param, scoring=scorer, cv=5)
model.fit(x_train.values, y_train.values)
print(model.best_params_)
print(model.best_score_)
model = model.best_estimator_
pred = model.predict(x_test.values)
print("Accuracy on testing data:\t {:f}".format(accuracy_score(y_test, pred)))
print("F-score on testing data:\t {:f}".format(f1_score(y_test, pred)))

{'splitter': 'best', 'min_weight_fraction_leaf': 0.1, 'min_samples_split': 2, 'min_samples_leaf': 9, 'min_impurity_decrease': 1.0, 'max_leaf_nodes': None, 'max_features': 'sqrt', 'max_depth': None, 'criterion': 'entropy'}
0.8259302552113773
Accuracy on testing data:	 0.627907
F-score on testing data:	 0.771429


In [83]:
features = SelectFromModel(model)
features.fit(x_train.values, y_train.values)
selected_feat = x_train.columns[(features.get_support())]
xFeatTrain = x_train[selected_feat].values
xFeatTest = x_test[selected_feat].values
model.fit(xFeatTrain, y_train.values)
pred = model.predict(xFeatTest)
print("Accuracy on testing data:\t {:f}".format(accuracy_score(y_test, pred)))
print("F-score on testing data:\t {:f}".format(f1_score(y_test, pred)))

Accuracy on testing data:	 0.627907
F-score on testing data:	 0.771429


## Bagging Classifier

In [84]:
bagging = BaggingClassifier()
param = {'n_estimators':[10,100, 200, 300, 400], 'max_samples':[1, 10, 50, 100], 
          'max_features':[1,2], 'bootstrap':[True, False], 'bootstrap_features':[True, False], 
          'oob_score':[False, True], 'warm_start':[True, False], 'n_jobs':[None, -1, 1, 2, 4], 
          'verbose':[0,1]}
model = RandomizedSearchCV(bagging, param, scoring=scorer, cv=5)
model.fit(x_train.values, y_train.values)
print("*****************************************************")
print(model.best_params_)
print(model.best_score_)
model = model.best_estimator_
pred = model.predict(x_test.values)

print("Accuracy on testing data:\t {:f}".format(accuracy_score(y_test, pred)))
print("F-score on testing data:\t {:f}".format(f1_score(y_test, pred)))
print("*****************************************************")


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.5s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Don

*****************************************************
{'warm_start': False, 'verbose': 0, 'oob_score': True, 'n_jobs': 2, 'n_estimators': 10, 'max_samples': 100, 'max_features': 2, 'bootstrap_features': False, 'bootstrap': True}
0.8422944124961042
Accuracy on testing data:	 0.627907
F-score on testing data:	 0.771429
*****************************************************


[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.2s finished


## Ada Boost Classifier

In [85]:
ada = AdaBoostClassifier()
param = {'n_estimators':[10,100, 200, 300, 400], 'learning_rate':[1.0, 1.5, 2], 
            'algorithm':['SAMME', 'SAMME.R']}
model = RandomizedSearchCV(ada, param, scoring=scorer, cv=5)
model.fit(x_train.values, y_train.values)
print("*****************************************************")
print(model.best_params_)
print(model.best_score_)
model = model.best_estimator_
pred = model.predict(x_test.values)

print("Accuracy on testing data:\t {:f}".format(accuracy_score(y_test, pred)))
print("F-score on testing data:\t {:f}".format(f1_score(y_test, pred)))
print("*****************************************************")

*****************************************************
{'n_estimators': 200, 'learning_rate': 1.5, 'algorithm': 'SAMME'}
0.9201835451161597
Accuracy on testing data:	 0.790698
F-score on testing data:	 0.842105
*****************************************************


In [86]:
features = SelectFromModel(model)
features.fit(x_train.values, y_train.values)
selected_feat = x_train.columns[(features.get_support())]
xFeatTrain = x_train[selected_feat].values
xFeatTest = x_test[selected_feat].values
model.fit(xFeatTrain, y_train.values)
pred = model.predict(xFeatTest)
print("Accuracy on testing data:\t {:f}".format(accuracy_score(y_test, pred)))
print("F-score on testing data:\t {:f}".format(f1_score(y_test, pred)))

Accuracy on testing data:	 0.767442
F-score on testing data:	 0.821429


## Random Forest Classifier

In [87]:
forest = RandomForestClassifier()
param = {'n_estimators':[10,100, 200, 300, 400], 'criterion':['gini', 'entropy'], 
               'max_depth':[None,3,6,9,12], 'min_samples_split':[50,30,10,5, 2],
               'min_samples_leaf':[1,5,9,12], 'min_weight_fraction_leaf':[0, 0.1, 0.2, 0.5],
               'max_features':['auto', 'sqrt', 'log2'], 'max_leaf_nodes':[None, 50, 100, 10], 
               'min_impurity_decrease':[0.0, 0.5, 1.0, 1.5], 'bootstrap':[True, False],
               'oob_score':[False, True], 'n_jobs':[None, -1, 1, 2, 4], 'verbose':[0,1], 
               'warm_start':[True, False], 'class_weight':['balanced', 'balanced_subsample'], 
               'ccp_alpha':[0.0, 0.1, 0.5, 1], 'max_samples':[1, 10, 50, 100]}
model = RandomizedSearchCV(forest, param, scoring=scorer, cv=5)
model.fit(x_train.values, y_train.values)
print("*****************************************************")
print(model.best_params_)
print(model.best_score_)
model = model.best_estimator_
pred = model.predict(x_test.values)

print("Accuracy on testing data:\t {:f}".format(accuracy_score(y_test, pred)))
print("F-score on testing data:\t {:f}".format(f1_score(y_test, pred)))
print("*****************************************************")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0

*****************************************************
{'warm_start': False, 'verbose': 1, 'oob_score': True, 'n_jobs': 1, 'n_estimators': 200, 'min_weight_fraction_leaf': 0.1, 'min_samples_split': 30, 'min_samples_leaf': 9, 'min_impurity_decrease': 1.5, 'max_samples': 1, 'max_leaf_nodes': 100, 'max_features': 'sqrt', 'max_depth': None, 'criterion': 'entropy', 'class_weight': 'balanced', 'ccp_alpha': 0.0, 'bootstrap': True}
0.8259302552113773
Accuracy on testing data:	 0.627907
F-score on testing data:	 0.771429
*****************************************************


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 200 out of 200 | elapsed:    0.0s finished


## Gradient Boosting Classifier

In [88]:
boosting = GradientBoostingClassifier()
param = {'loss':['deviance', 'exponential'], 'learning_rate':[0.1, 0.3, 0.6, 0.9], 
                 'n_estimators':[10, 100, 200, 300, 400], 'subsample':[0.2, 0.4, 0.8, 1.0], 
                 'criterion':['friedman_mse', 'mse', 'mae'], 'min_samples_split':[50,30,10,5, 2],
                 'min_samples_leaf':[1,5,9,12], 'min_weight_fraction_leaf':[0, 0.1, 0.2, 0.5], 
                 'max_depth':[None,3,6,9,12], 'min_impurity_decrease':[0.0, 0.5, 1.0, 1.5], 
                 'max_features':['auto', 'sqrt', 'log2'], 'verbose':[0,1], 'max_leaf_nodes':[None, 50, 100, 10],
                 'warm_start':[True, False], 'validation_fraction':[0.1, 0.4, 0.8, 1.2], 
                 'n_iter_no_change':[None, 1, 2, 3, 4], 'tol':[1e-4, 5e-4, 1e-5], 
                 'ccp_alpha':[0.0, 0.1, 0.5, 1]}
model = RandomizedSearchCV(boosting, param, scoring=scorer, cv=5)
model.fit(x_train.values, y_train.values)
print("*****************************************************")
print(model.best_params_)
print(model.best_score_)
model = model.best_estimator_
pred = model.predict(x_test.values)

print("Accuracy on testing data:\t {:f}".format(accuracy_score(y_test, pred)))
print("F-score on testing data:\t {:f}".format(f1_score(y_test, pred)))
print("*****************************************************")

      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.9357          -0.0020            0.01s
         2           0.9101          -0.0013            0.01s
         3           0.9357          -0.0020            0.01s
         4           0.8820          -0.0130            0.01s
         5           0.8648          -0.0199            0.01s
         6           0.9377          -0.0034            0.01s
         7           0.8953          -0.0031            0.00s
         8           0.9236           0.0001            0.00s
         9           0.9233          -0.0000            0.00s
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.9490          -0.0126            0.01s
         2           0.9099           0.0002            0.01s
         3           0.8496          -0.0285            0.01s
         4           0.8631          -0.0113            0.01s
      Iter       Train Loss      OOB Improve   Remaining Time 
     

In [89]:
features = SelectFromModel(model)
features.fit(x_train.values, y_train.values)
selected_feat = x_train.columns[(features.get_support())]
xFeatTrain = x_train[selected_feat].values
xFeatTest = x_test[selected_feat].values
model.fit(xFeatTrain, y_train.values)
pred = model.predict(xFeatTest)
print("Accuracy on testing data:\t {:f}".format(accuracy_score(y_test, pred)))
print("F-score on testing data:\t {:f}".format(f1_score(y_test, pred)))

Accuracy on testing data:	 0.627907
F-score on testing data:	 0.771429


## K Neighbors Classifier

In [90]:
neighbors = KNeighborsClassifier()
param = {'n_neighbors':[4,5,8,10], 'weights':['uniform', 'distance'], 'algorithm':['auto', 'ball_tree', 'kd_tree', 'brute'], 
           'leaf_size':[20, 30, 50], 'p':[1,2,3], 'n_jobs':[None, -1, 1, 2, 4]}
model = RandomizedSearchCV(neighbors, param, scoring=scorer, cv=5)
model.fit(x_train.values, y_train.values)
print("*****************************************************")
print(model.best_params_)
print(model.best_score_)
model = model.best_estimator_
pred = model.predict(x_test.values)

print("Accuracy on testing data:\t {:f}".format(accuracy_score(y_test, pred)))
print("F-score on testing data:\t {:f}".format(f1_score(y_test, pred)))
print("*****************************************************")

*****************************************************
{'weights': 'uniform', 'p': 1, 'n_neighbors': 10, 'n_jobs': None, 'leaf_size': 50, 'algorithm': 'kd_tree'}
0.8991622477083077
Accuracy on testing data:	 0.790698
F-score on testing data:	 0.852459
*****************************************************


## SGD Classifier

In [94]:
sgd = SGDClassifier()
param = {'loss':['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron', 'squared_loss', 
                    'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'], 
            'penalty':['l2', 'l1', 'elasticnet', 'none'], 'alpha':[0.0001, 0.001, 0.005], 
            'l1_ratio':[0, 0.15, 0.5, 0.7], 'fit_intercept':[True, False], 
            'max_iter':[500, 1000, 1500],'tol':[1e-4, 5e-4, 1e-5], 'epsilon':[0.1, 0.3,  0.5], 
            'n_jobs':[-1, 1, 2, 4], 
            'power_t':[0.5,0.8, 0.4], 'early_stopping':[True, False],'validation_fraction':[0.1, 0.4, 0.8], 
            'n_iter_no_change':[ 1, 2, 3, 4], 'warm_start':[True, False]}
model = RandomizedSearchCV(sgd, param, scoring=scorer, cv=5)
model.fit(x_train.values, y_train.values)
print("*****************************************************")
print(model.best_params_)
print(model.best_score_)
model = model.best_estimator_
pred = model.predict(x_test.values)

print("Accuracy on testing data:\t {:f}".format(accuracy_score(y_test, pred)))
print("F-score on testing data:\t {:f}".format(f1_score(y_test, pred)))
print("*****************************************************")

*****************************************************
{'warm_start': True, 'validation_fraction': 0.1, 'tol': 1e-05, 'power_t': 0.8, 'penalty': 'l2', 'n_jobs': 4, 'n_iter_no_change': 4, 'max_iter': 500, 'loss': 'hinge', 'l1_ratio': 0.15, 'fit_intercept': True, 'epsilon': 0.3, 'early_stopping': True, 'alpha': 0.001}
0.9103821127113052
Accuracy on testing data:	 0.790698
F-score on testing data:	 0.836364
*****************************************************


## Logistic Regression

In [97]:
logistic = LogisticRegression()
param = {'penalty':['l2', 'l1', 'elasticnet', 'none'], 'dual':[True, False], 'tol':[1e-4, 5e-4, 1e-5], 
                 'C':[0.5, 1.0, 1.5, 2.0], 'fit_intercept':[True, False], 'solver':['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'], 
                 'max_iter':[500, 1000, 1500], 'multi_class':['auto', 'ovr', 'multinomial'], 'verbose':[0,1], 
                 'warm_start':[True, False], 'n_jobs':[None, -1, 1, 2, 4]}
model = RandomizedSearchCV(logistic, param, scoring=scorer, cv=5)
model.fit(x_train.values, y_train.values)
print("*****************************************************")
print(model.best_params_)
print(model.best_score_)
model = model.best_estimator_
pred = model.predict(x_test.values)

print("Accuracy on testing data:\t {:f}".format(accuracy_score(y_test, pred)))
print("F-score on testing data:\t {:f}".format(f1_score(y_test, pred)))
print("*****************************************************")

[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 |

[LibLinear][LibLinear][LibLinear][LibLinear][LibLinear][LibLinear]*****************************************************
{'warm_start': True, 'verbose': 1, 'tol': 0.0001, 'solver': 'liblinear', 'penalty': 'l2', 'n_jobs': 2, 'multi_class': 'auto', 'max_iter': 1000, 'fit_intercept': False, 'dual': True, 'C': 0.5}
0.9293374667320874
Accuracy on testing data:	 0.813953
F-score on testing data:	 0.857143
*****************************************************


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished


## OUTCOMES

So, outcomes are as follows : 

Accuracy and f1-score values of different models are as follows:

> The highes the accuracy and f1-score better the model is.

1.   Logistic Regression (no params passed) : 
    *   Accuracy on testing data:	 0.697674
    *   F-score on testing data:	 0.779661
2.  Gaussian Naive Bayes :
    *   Accuracy on testing data:	 0.744186
    *   F-score on testing data:	 0.792453
    * Params : ```{'var_smoothing': 1e-09}```
3. Decision Tree (without feature selection) : 
    *   Accuracy on testing data:	 0.627907
    *   F-score on testing data:	 0.771429
    * Params : ```{'splitter': 'best', 'min_weight_fraction_leaf': 0.1, 'min_samples_split': 2, 'min_samples_leaf': 9, 'min_impurity_decrease': 1.0, 'max_leaf_nodes': None, 'max_features': 'sqrt', 'max_depth': None, 'criterion': 'entropy'}```

4. Decision Tree (with feature selection) : 
    * Accuracy on testing data:	 0.627907
    * F-score on testing data:	 0.771429
5. Bagging Classifier : 
    * Accuracy on testing data:	 0.627907
    * F-score on testing data:	 0.771429
    * Params : ```{'warm_start': False, 'verbose': 0, 'oob_score': True, 'n_jobs': 2, 'n_estimators': 10, 'max_samples': 100, 'max_features': 2, 'bootstrap_features': False, 'bootstrap': True}```
6. AdaBoost (without feature selection) : 
    * Accuracy on testing data:	 0.790698
    * F-score on testing data:	 0.842105
    * Params : ```{'n_estimators': 200, 'learning_rate': 1.5, 'algorithm': 'SAMME'}```
7. AdaBoost (with feature selection) : 
    * Accuracy on testing data:	 0.767442
    * F-score on testing data:	 0.821429
8. Random Forest : 
    * Accuracy on testing data:	 0.627907
    * F-score on testing data:	 0.771429
    * Params : ```{'warm_start': False, 'verbose': 1, 'oob_score': True, 'n_jobs': 1, 'n_estimators': 200, 'min_weight_fraction_leaf': 0.1, 'min_samples_split': 30, 'min_samples_leaf': 9, 'min_impurity_decrease': 1.5, 'max_samples': 1, 'max_leaf_nodes': 100, 'max_features': 'sqrt', 'max_depth': None, 'criterion': 'entropy', 'class_weight': 'balanced', 'ccp_alpha': 0.0, 'bootstrap': True}```
9. Gradient Boosting (without feature selection) : 
    * Accuracy on testing data:	 0.627907
    * F-score on testing data:	 0.771429
    * Params : ```{'warm_start': True, 'verbose': 0, 'validation_fraction': 0.8, 'tol': 0.0005, 'subsample': 0.4, 'n_iter_no_change': None, 'n_estimators': 400, 'min_weight_fraction_leaf': 0.2, 'min_samples_split': 10, 'min_samples_leaf': 12, 'min_impurity_decrease': 1.5, 'max_leaf_nodes': None, 'max_features': 'sqrt', 'max_depth': 12, 'loss': 'deviance', 'learning_rate': 0.6, 'criterion': 'mae', 'ccp_alpha': 0.1}```
10. Gradient Boosting (with feature selection) : 
    * Accuracy on testing data:	 0.627907
    * F-score on testing data:	 0.771429
11. K Neighbors : 
    * Accuracy on testing data:	 0.790698
    * F-score on testing data:	 0.852459
    * Params : ```{'weights': 'uniform', 'p': 1, 'n_neighbors': 10, 'n_jobs': None, 'leaf_size': 50, 'algorithm': 'kd_tree'}```
12. SGD : 
    * Accuracy on testing data:	 0.790698
    * F-score on testing data:	 0.836364
    * Params : ```{'warm_start': True, 'validation_fraction': 0.1, 'tol': 1e-05, 'power_t': 0.8, 'penalty': 'l2', 'n_jobs': 4, 'n_iter_no_change': 4, 'max_iter': 500, 'loss': 'hinge', 'l1_ratio': 0.15, 'fit_intercept': True, 'epsilon': 0.3, 'early_stopping': True, 'alpha': 0.001}```
13. Logistic Regression : 
    * Accuracy on testing data:	 0.813953
    * F-score on testing data:	 0.857143
    * Params : ```{'warm_start': True, 'verbose': 1, 'tol': 0.0001, 'solver': 'liblinear', 'penalty': 'l2', 'n_jobs': 2, 'multi_class': 'auto', 'max_iter': 1000, 'fit_intercept': False, 'dual': True, 'C': 0.5}```




