# Generating predictions for 5 models and exporting

**Methods:**
>1. Load data and create subset
>2. Generate SVM-poly model and export
>3. Generate GBM model
>4. Generate AdaBoost Model
>5. Generate Logistic regression model
>6. Generate kNN Model
>7. Load all models, concat, and export

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.svm as skl_svm
import sklearn.cross_validation as skl_cv
import seaborn as sns
import os
import sys

base_path = '/home/lundi/Python/MNIST/'
sys.path.append(base_path + '/libraries/')

import time

import sklearn.linear_model as skl_lm
import sklearn.ensemble as skl_ensemble
from sklearn.grid_search import GridSearchCV

import MNIST_data_processor as mdp
import MNIST_model_functions as mmf

MNIST_data_processor = mdp.MNIST_data_processor()
MNIST_model_functions = mmf.MNIST_model_functions()

## 1. Load data and create subset

In [2]:
X, y = MNIST_data_processor.load_full_data(train_test=False)

## 2. Generate SVM-poly model and export

In [4]:
svc_poly_clf = skl_svm.SVC(
    C=2.8e-5, 
    degree=2, 
    gamma='auto', 
    kernel='poly', 
    tol=0.001,
    probability=True
)
start_time = time.time()

svc_poly_results = MNIST_model_functions.cross_val_predict_proba(
                        estimator = svc_poly_clf, 
                        X = X, y = y, 
                        cv=5, 
                        model_name = 'SVC_Poly'
                    )
svc_poly_results.to_csv(base_path + '/data/prediction_results/2016.11.7-svc_results.csv')
end_time = time.time()

print 'Elapsed Time: ', (end_time - start_time) / 60.0, ' mins'

Elapsed Time:  53.5365842183  mins


## 3. Generate GBM model

In [5]:
gbm_clf = skl_ensemble.GradientBoostingClassifier(verbose=True, n_estimators=1500, learning_rate=0.01, max_leaf_nodes=50)

start_time = time.time()

gbm_results = MNIST_model_functions.cross_val_predict_proba(
                        estimator = gbm_clf, 
                        X = X, y = y, 
                        cv=5, 
                        model_name = 'GBM'
                    )
gbm_results.to_csv(base_path + '/data/prediction_results/2016.11.7-gbm_results.csv')
end_time = time.time()

print 'Elapsed Time: ', (end_time - start_time) / 60.0, ' mins'

      Iter       Train Loss   Remaining Time 
         1       75643.7433          275.14m
         2       74051.2971          267.62m
         3       72543.2483          267.39m
         4       71128.0655          267.59m
         5       69803.7753          269.41m
         6       68534.5349          267.77m
         7       67330.6865          267.33m
         8       66182.2154          265.83m
         9       65083.4920          265.33m
        10       64050.8950          265.18m
        20       55474.3252          256.52m
        30       49017.6927          254.11m
        40       43768.2087          251.76m
        50       39454.9186          250.07m
        60       35731.2641          250.97m
        70       32543.0560          252.21m
        80       29802.9301          252.05m
        90       27432.2246          251.18m
       100       25392.3966          250.08m
       200       13536.0581          239.40m
       300        9000.1539          221.41m
       40

## 4. Generate RF Model

In [6]:
rf_clf = skl_ensemble.RandomForestClassifier(n_estimators = 1500, criterion = 'entropy', max_depth = 25)

rf_results = MNIST_model_functions.cross_val_predict_proba(rf_clf, X = X, y = y, cv = 5, model_name = 'RF')
rf_results.to_csv(base_path + '/data/prediction_results/2016.11.7-rf_results.csv')

## 5. Generate Logistic regression model

In [7]:
lr_clf = skl_lm.LogisticRegression(penalty = 'l2', C = 1.4e-6)

lr_results = MNIST_model_functions.cross_val_predict_proba(lr_clf, X = X, y = y, cv = 5, model_name = 'LR')
lr_results.to_csv(base_path + '/data/prediction_results/2016.11.7-lr_results.csv')

## 6. Generate kNN Model