## SVM 

In [1]:
from sklearn.svm import SVC
from sklearn import cross_validation
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.pipeline import make_pipeline
import pandas as pd



In [2]:
train_data = pd.read_csv('./input/train_prep.csv')
test_data = pd.read_csv('./input/test_prep.csv')

param = train_data.ix[:, :-1] # take all rows and all but last column from training data
res = train_data.ix[:, -1] # take all rows and only last column from training data
test_data = test_data.drop('ID', 1) # drop ID column from test data

In [3]:
# create output file
def create_output_file(data, file_name):
    output = []
    for pred in data:
        oi = [0] * 5
        oi[pred - 1] = 1
        output.append(oi)
    output = pd.DataFrame(output, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
    output.index.names = ['ID']
    output.index += 1
    output.to_csv('./output/Sub-' + file_name, index_label='ID')

In [5]:
clf = SVC(probability=True)
cross_validation.cross_val_score(clf, param, res, scoring="neg_log_loss")

array([-1.02285952, -1.00597704, -1.01029705])

In [6]:
clf.fit(param, res)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [8]:
pred = clf.predict(test_data)
create_output_file(pred, 'SVM.csv')
print pred

[5 1 1 ..., 5 1 5]


### Pipeline with feature selection and estimation

In [10]:
param_new = SelectKBest(chi2, k=3).fit_transform(param, res)
cross_validation.cross_val_score(clf, param_new, res)

array([ 0.59436715,  0.6037037 ,  0.60087572])

In [11]:
selector = SelectKBest(chi2, k=3)
predictor = make_pipeline(selector, clf)
predictor.fit(param, res)
predictions = predictor.predict_proba(test_data)

In [12]:
predictions = predictor.predict(test_data)
create_output_file(predictions, 'SVM-Kbest.csv')