# Generating predictions for 5 models and exporting

**Methods:**
>1. Load data and create subset
>2. Generate SVM-poly model and export
>3. Generate GBM model
>4. Generate AdaBoost Model
>5. Generate Logistic regression model
>6. Generate kNN Model
>7. Load all models, concat, and export

In [10]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.svm as skl_svm
import sklearn.cross_validation as skl_cv
import seaborn as sns
import os
import sys

base_path = '/home/lundi/Python/MNIST/'
sys.path.append(base_path + '/libraries/')

import time

import sklearn.linear_model as skl_lm
import sklearn.ensemble as skl_ensemble
from sklearn.grid_search import GridSearchCV

import MNIST_data_processor as mdp
import MNIST_model_functions as mmf

MNIST_data_processor = mdp.MNIST_data_processor()
MNIST_model_functions = mmf.MNIST_model_functions()

## 1. Load data and create subset

In [14]:
X, y = MNIST_data_processor.load_subset_data(train_test=False)

In [23]:
X.dtypes

pixel0      int64
pixel1      int64
pixel2      int64
pixel3      int64
pixel4      int64
pixel5      int64
pixel6      int64
pixel7      int64
pixel8      int64
pixel9      int64
pixel10     int64
pixel11     int64
pixel12     int64
pixel13     int64
pixel14     int64
pixel15     int64
pixel16     int64
pixel17     int64
pixel18     int64
pixel19     int64
pixel20     int64
pixel21     int64
pixel22     int64
pixel23     int64
pixel24     int64
pixel25     int64
pixel26     int64
pixel27     int64
pixel28     int64
pixel29     int64
            ...  
pixel754    int64
pixel755    int64
pixel756    int64
pixel757    int64
pixel758    int64
pixel759    int64
pixel760    int64
pixel761    int64
pixel762    int64
pixel763    int64
pixel764    int64
pixel765    int64
pixel766    int64
pixel767    int64
pixel768    int64
pixel769    int64
pixel770    int64
pixel771    int64
pixel772    int64
pixel773    int64
pixel774    int64
pixel775    int64
pixel776    int64
pixel777    int64
pixel778  

In [24]:
y.dtype

dtype('int64')

## 2. Generate SVM-poly model and export

In [15]:
svc_poly_clf = skl_svm.SVC(
    C=2.8e-5, 
    degree=2, 
    gamma='auto', 
    kernel='poly', 
    tol=0.001,
    probability=True
)
start_time = time.time()

svc_poly_results = MNIST_model_functions.cross_val_predict_proba(
                        estimator = svc_poly_clf, 
                        X = X, y = y, 
                        cv=5, 
                        model_name = 'SVC_Poly'
                    )
svc_poly_results.to_csv(base_path + '/data/prediction_results/2016.11.7-svc_results.csv')
end_time = time.time()

print 'Elapsed Time: ', (end_time - start_time) / 60.0, ' mins'

Elapsed Time:  1.46004854838  mins


## 3. Generate GBM model

In [16]:
gbm_clf = skl_ensemble.GradientBoostingClassifier(verbose=True, n_estimators=1500, learning_rate=0.01, max_leaf_nodes=50)

start_time = time.time()

gbm_results = MNIST_model_functions.cross_val_predict_proba(
                        estimator = gbm_clf, 
                        X = X, y = y, 
                        cv=5, 
                        model_name = 'GBM'
                    )
gbm_results.to_csv(base_path + '/data/prediction_results/2016.11.7-gbm_results.csv')
end_time = time.time()

print 'Elapsed Time: ', (end_time - start_time) / 60.0, ' mins'

      Iter       Train Loss   Remaining Time 
         1        8993.6978           26.90m
         2        8794.3160           26.95m
         3        8607.8377           26.94m
         4        8432.9118           26.97m
         5        8267.6491           26.94m
         6        8111.1804           26.98m
         7        7963.6818           27.01m
         8        7822.7756           27.06m
         9        7686.8483           27.09m
        10        7556.8896           27.14m
        20        6473.7756           27.55m
        30        5661.8096           27.60m
        40        5007.4252           27.77m
        50        4466.0110           27.68m
        60        4009.3726           27.60m
        70        3622.3111           27.76m
        80        3277.9020           28.03m
        90        2968.2557           28.05m
       100        2695.6103           28.23m
       200        1196.9376           27.16m
       300         643.1813           25.28m
       40

## 4. Generate RF Model

In [18]:
rf_clf = skl_ensemble.RandomForestClassifier(n_estimators = 1500, criterion = 'entropy', max_depth = 25)

rf_results = MNIST_model_functions.cross_val_predict_proba(rf_clf, X = X, y = y, cv = 5, model_name = 'RF')
rf_results.to_csv(base_path + '/data/prediction_results/2016.11.7-rf_results.csv')

## 5. Generate Logistic regression model

In [17]:
lr_clf = skl_lm.LogisticRegression(penalty = 'l2', C = 1.4e-6)

lr_results = MNIST_model_functions.cross_val_predict_proba(lr_clf, X = X, y = y, cv = 5, model_name = 'LR')
lr_results.to_csv(base_path + '/data/prediction_results/2016.11.7-lr_results.csv')

## 6. Generate kNN Model