# Putting the final model together

In [1]:
import os.path
import pandas as pd
import numpy as np 

from sklearn.cross_validation import train_test_split
from sklearn.datasets import load_digits
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import make_scorer, \
    hamming_loss
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, Imputer
from sklearn.svm import SVC
import pickle



In [2]:
DATA_DIR = os.path.join("tree_data_FINALTEST.csv")
tree_data_test = pd.read_csv(DATA_DIR, index_col=0)
pd.set_option('display.max_columns', 1000)

In [3]:
#reset the index - IMPORTANT to use the function! 
tree_data_test.reset_index(inplace=True, drop=True)

In [4]:
#split into X and Y
tree_features = tree_data_test.iloc[:, [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51]]
tree_class = tree_data_test.Tree_Type

# Load all my models!

In [5]:
#model 1
with open('model1_svm_onevsall.pickle', "rb") as input_file1:
    model1 = pickle.load(input_file1)    

In [6]:
#model 2 
with open('model2_svm_onevsall.pickle', "rb") as input_file2:
    model2 = pickle.load(input_file2) 

In [7]:
#model 3 
with open('model3_svm_onevsall.pickle', "rb") as input_file3:
    model3 = pickle.load(input_file3) 

In [8]:
#model 4
with open('model4_svm_onevsall.pickle', "rb") as input_file4:
    model4 = pickle.load(input_file4) 

In [9]:
#model 5
with open('model5_svm_onevsall.pickle', "rb") as input_file5:
    model5 = pickle.load(input_file5) 

In [10]:
#model 6
with open('model6_svm_onevsall.pickle', "rb") as input_file6:
    model6 = pickle.load(input_file6) 

In [11]:
#model 7
with open('model7_svm_onevsall.pickle', "rb") as input_file7:
    model7 = pickle.load(input_file7) 

# Final Model Building

In [12]:
import numpy as np

def which_tree(data): 
    
    data = data.as_matrix()
    all_prob = []
    for x in data:
        prob1 = model1.predict_proba([x])[0][1]
        prob2 = model2.predict_proba([x])[0][1]
        prob3 = model3.predict_proba([x])[0][1]
        prob4 = model4.predict_proba([x])[0][1]
        prob5 = model5.predict_proba([x])[0][1]
        prob6 = model6.predict_proba([x])[0][1]
        prob7 = model7.predict_proba([x])[0][1]
        
        best_prob = np.argmax([prob1, prob2, prob3, prob4, prob5, prob6, prob7])
        if best_prob == 0: 
            all_prob.append(1)
        elif best_prob == 1:
            all_prob.append(2)
        elif best_prob == 2:
            all_prob.append(3)
        elif best_prob == 3:
            all_prob.append(4)
        elif best_prob == 4:
            all_prob.append(5)
        elif best_prob == 5: 
            all_prob.append(6)
        elif best_prob == 6:
            all_prob.append(7)
        
    return all_prob

In [15]:
#my predictions!
tree_class_predictions = which_tree(tree_features)

In [16]:
tree_class_predictions

[3,
 7,
 1,
 7,
 6,
 6,
 5,
 3,
 5,
 5,
 1,
 3,
 1,
 4,
 7,
 4,
 1,
 5,
 3,
 5,
 1,
 2,
 2,
 1,
 7,
 2,
 3,
 4,
 4,
 5,
 3,
 1,
 6,
 4,
 4,
 7,
 6,
 1,
 4,
 4,
 6,
 4,
 6,
 6,
 1,
 1,
 1,
 5,
 4,
 5,
 6,
 3,
 3,
 1,
 1,
 4,
 5,
 6,
 2,
 6,
 7,
 3,
 5,
 3,
 2,
 2,
 3,
 7,
 6,
 7,
 5,
 7,
 4,
 5,
 6,
 6,
 4,
 4,
 6,
 7,
 5,
 7,
 1,
 7,
 5,
 4,
 4,
 2,
 7,
 4,
 7,
 6,
 4,
 1,
 3,
 5,
 7,
 5,
 1,
 5,
 4,
 2,
 4,
 5,
 5,
 6,
 3,
 1,
 7,
 2,
 2,
 3,
 4,
 5,
 1,
 7,
 3,
 2,
 7,
 4,
 1,
 5,
 1,
 6,
 1,
 5,
 1,
 4,
 4,
 6,
 2,
 6,
 4,
 3,
 2,
 2,
 5,
 4,
 6,
 2,
 1,
 2,
 4,
 5,
 5,
 5,
 6,
 5,
 4,
 2,
 5,
 3,
 3,
 3,
 1,
 5,
 1,
 1,
 5,
 1,
 5,
 4,
 5,
 6,
 1,
 5,
 7,
 7,
 4,
 6,
 5,
 1,
 2,
 2,
 4,
 3,
 3,
 4,
 1,
 6,
 1,
 6,
 3,
 6,
 7,
 1,
 4,
 4,
 5,
 3,
 7,
 6,
 4,
 2,
 6,
 6,
 7,
 3,
 4,
 1,
 7,
 4,
 5,
 5,
 4,
 6,
 6,
 2,
 4,
 4,
 2,
 5,
 5,
 1,
 7,
 7,
 4,
 1,
 6,
 2,
 4,
 2,
 4,
 3,
 4,
 2,
 5,
 3,
 4,
 4,
 5,
 4,
 6,
 1,
 3,
 2,
 3,
 4,
 3,
 5,
 6,
 7,
 5,
 5,
 6,
 5,
 7,
 1,
 3,
 6,


1: Spruce/Fir  
2: Lodgepole Pine  
3: Ponderosa Pine  
4: Cottonwood/Willow  
5: Aspen  
6: Douglas-fir  
7: Krummholz  

In [16]:
#this is the actual prediction - converting it to a list so I can look at some metrics
tree_classes = tree_class.tolist()

In [17]:
#look at how accurate our model is. 0.85 -- not too bad for a base model 
from sklearn.metrics import accuracy_score
accuracy_score(tree_classes, tree_class_predictions)

0.79728835978835977

In [18]:
#classification report on my predictions 
from sklearn.metrics import classification_report
print(classification_report(tree_classes, tree_class_predictions))

             precision    recall  f1-score   support

          1       0.70      0.68      0.69       435
          2       0.73      0.60      0.66       467
          3       0.77      0.66      0.71       394
          4       0.87      0.97      0.92       420
          5       0.82      0.94      0.88       437
          6       0.75      0.77      0.76       434
          7       0.91      0.97      0.94       437

avg / total       0.79      0.80      0.79      3024

