# Generating predictions for 5 models and exporting

**Methods:**
>1. Load data and create subset
>2. Generate SVM-poly model and export
>3. Generate GBM model
>4. Generate AdaBoost Model
>5. Generate Logistic regression model
>6. Generate kNN Model
>7. Load all models, concat, and export

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.svm as skl_svm
import sklearn.cross_validation as skl_cv
import seaborn as sns
import os
import sys

base_path = '/home/lundi/Python/MNIST/'
sys.path.append(base_path + '/libraries/')

import time

import sklearn.linear_model as skl_lm
import sklearn.ensemble as skl_ensemble
from sklearn.grid_search import GridSearchCV

import MNIST_data_processor as mdp
import MNIST_model_functions as mmf

MNIST_data_processor = mdp.MNIST_data_processor()
MNIST_model_functions = mmf.MNIST_model_functions()

## 1. Load data and create subset

In [3]:
X, y = MNIST_data_processor.load_subset_data(train_test=False)

## 2. Generate SVM-poly model and export

In [None]:
svc_poly_clf = skl_svm.SVC(
    C=2.8e-5, 
    degree=2, 
    gamma='auto', 
    kernel='poly', 
    tol=0.001,
    probability=True
)
start_time = time.time()

svc_poly_results = MNIST_model_functions.cross_val_predict_proba(
                        estimator = svc_poly_clf, 
                        X = X, y = y, 
                        cv=5, 
                        model_name = 'SVC_Poly'
                    )
svc_poly_results.to_csv(base_path + '/data/prediction_results/2016.11.7-svc_results_subset.csv')
end_time = time.time()

print 'Elapsed Time: ', (end_time - start_time) / 60.0, ' mins'

Elapsed Time:  1.43721616666  mins


## 3. Generate GBM model

In [None]:
gbm_clf = skl_ensemble.GradientBoostingClassifier(verbose=True, n_estimators=1500, learning_rate=0.01, max_leaf_nodes=50)

start_time = time.time()

gbm_results = MNIST_model_functions.cross_val_predict_proba(
                        estimator = gbm_clf, 
                        X = X, y = y, 
                        cv=5, 
                        model_name = 'GBM'
                    )
gbm_results.to_csv(base_path + '/data/prediction_results/2016.11.7-gbm_results_subset.csv')
end_time = time.time()

print 'Elapsed Time: ', (end_time - start_time) / 60.0, ' mins'

      Iter       Train Loss   Remaining Time 
         1        8993.6978           26.85m


## 4. Generate RF Model

In [None]:
rf_clf = skl_ensemble.RandomForestClassifier(n_estimators = 1500, criterion = 'entropy', max_depth = 25)

rf_results = MNIST_model_functions.cross_val_predict_proba(rf_clf, X = X, y = y, cv = 5, model_name = 'RF')
rf_results.to_csv(base_path + '/data/prediction_results/2016.11.7-rf_results_subset.csv')

## 5. Generate Logistic regression model

In [None]:
lr_clf = skl_lm.LogisticRegression(penalty = 'l2', C = 1.4e-6)

lr_results = MNIST_model_functions.cross_val_predict_proba(lr_clf, X = X, y = y, cv = 5, model_name = 'LR')
lr_results.to_csv(base_path + '/data/prediction_results/2016.11.7-lr_results_subset.csv')

## 6. Generate kNN Model