In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

In [2]:
train = pd.read_csv("dataset/train.csv")
test = pd.read_csv("dataset/test.csv")

In [3]:
train.head()

Unnamed: 0,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth_old,SignHeight_old,SignFacing (Target),SignWidth,SignHeight
0,2c9180975a056a64015a1e0a52e57021,Rear,195,1.02,46,45,Rear,0.714834,0.699294
1,2c9180975a056a64015a1e17b32171e4,Rear,203,1.09,59,54,Rear,0.737673,0.675158
2,2c9180975a056a64015a1de4deb16bd5,Front,26,0.96,104,108,Front,0.693642,0.72032
3,2c9180975a056a64015a1de4deb16bdd,Rear,199,0.81,38,47,Rear,0.628722,0.77763
4,2c9180975a056a64015a1de4deb16bd6,Rear,208,0.93,54,58,Rear,0.681419,0.731894


In [4]:
test.head()

Unnamed: 0,Id,DetectedCamera,AngleOfSign,SignAspectRatio,SignWidth_old,SignHeight_old,SignWidth,SignHeight
0,2c9180975a056a64015a1e10d3f270fe,Right,67,0.63,107,169,0.534933,0.844894
1,2c9180975a056a64015a1de4deb16bdc,Front,16,0.88,61,69,0.66234,0.749204
2,2c9180975a056a64015a1e0e70ea70ce,Right,44,1.15,232,202,0.754185,0.656661
3,2c9180975a056a64015a1dfed0c46ec6,Right,50,1.1,137,125,0.738719,0.674014
4,2c9180975a056a64015a1dfed0c46ec7,Front,30,0.95,99,104,0.689481,0.724304


In [6]:
train['DetectedCamera'].value_counts()

Front    10910
Right    10516
Left      9298
Rear      7761
Name: DetectedCamera, dtype: int64

In [7]:
#encode as integer
mapping = {'Front':0, 'Right':1, 'Left':2, 'Rear':3}
train = train.replace({'DetectedCamera':mapping})
test = test.replace({'DetectedCamera':mapping})

In [8]:
#renaming column
train.rename(columns = {'SignFacing (Target)': 'Target'}, inplace=True)

In [9]:
#encode Target Variable based on sample submission file
mapping = {'Front':0, 'Left':1, 'Rear':2, 'Right':3}
train = train.replace({'Target':mapping})

In [10]:
#target variable
y_train = train['Target']
test_id = test['Id']

In [11]:
#drop columns
train.drop(['Target','Id'], inplace=True, axis=1)
test.drop('Id',inplace=True,axis=1)

In [12]:
train.drop(['SignWidth_old','SignHeight_old'], inplace=True, axis=1)
test.drop(['SignWidth_old','SignHeight_old'], inplace=True, axis=1)

In [13]:
#train model
clf = RandomForestClassifier(n_estimators=60,max_features=5,min_samples_split=100,oob_score=True,n_jobs=-1)
clf.fit(train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features=5, max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=100, min_weight_fraction_leaf=0.0,
            n_estimators=60, n_jobs=-1, oob_score=True, random_state=None,
            verbose=0, warm_start=False)

In [14]:
# uncomment if you want to visualize estimator tree

# from sklearn.tree import export_graphviz
# import pydot

# dot_data = StringIO()
# export_graphviz(clf.estimators_[0], out_file=dot_data, node_ids=True, feature_names=list(train), filled=True )

# # dot_format contains all the information of the tree
# dot_format = dot_data.getvalue()
# graph = pydot.graph_from_dot_data(dot_format)

# # graph is a list
# graph[0].write_png('output.png')

In [15]:
clf.score(train,y_train)

0.96380407951149794

In [16]:
#predict on test data
pred = clf.predict_proba(test)

In [18]:
#write submission file and submit
columns = ['Front','Left','Rear','Right']
sub = pd.DataFrame(data=pred, columns=columns)
sub['Id'] = test_id
sub = sub[['Id','Front','Left','Rear','Right']]
sub.to_csv("submission/sub_rf.csv", index=False)

In [19]:
from sklearn.ensemble import AdaBoostClassifier
adaboost = AdaBoostClassifier(n_estimators=400,learning_rate=.9)

In [20]:
adaboost.fit(train,y_train)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=0.9, n_estimators=400, random_state=None)

In [21]:
#predict on test data
pred = adaboost.predict_proba(test)

In [22]:
#write submission file and submit
columns = ['Front','Left','Rear','Right']
sub = pd.DataFrame(data=pred, columns=columns)
sub['Id'] = test_id
sub = sub[['Id','Front','Left','Rear','Right']]
sub.to_csv("submission/sub_adaboost.csv", index=False)