In [35]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC # "Support vector classifier"
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer, accuracy_score

In [36]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

In [37]:
import time                                                
def timeit(method):
    def timed(*args, **kw):
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()
        if 'log_time' in kw:
            name = kw.get('log_name', method.__name__.upper())
            kw['log_time'][name] = int((te - ts) * 1000)
        else:
            print('%r  %2.2f ms' % \
                  (method.__name__, (te - ts) * 1000))
        return result
    return timed


In [38]:
def titanic(train, test):
    train_df = pd.read_csv(train)
    test_df = pd.read_csv(test)
    combine = [train_df, test_df]

    train_df = train_df.drop(['Ticket', 'Cabin'], axis=1)
    test_df = test_df.drop(['Ticket', 'Cabin'], axis=1)
    combine = [train_df, test_df]

    for dataset in combine:
        dataset['Title'] = dataset.Name.str.extract(' ([A-Za-z]+)\.', expand=False)

    pd.crosstab(train_df['Title'], train_df['Sex'])

    for dataset in combine:
        dataset['Title'] = dataset['Title'].replace(['Lady', 'Countess','Capt', 'Col',\
        'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')

        dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss')
        dataset['Title'] = dataset['Title'].replace('Ms', 'Miss')
        dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs')

    train_df[['Title', 'Survived']].groupby(['Title'], as_index=False).mean()

    title_mapping = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}
    for dataset in combine:
        dataset['Title'] = dataset['Title'].map(title_mapping)
        dataset['Title'] = dataset['Title'].fillna(0)

    train_df = train_df.drop(['Name', 'PassengerId'], axis=1)
    test_df = test_df.drop(['Name'], axis=1)
    combine = [train_df, test_df]

    for dataset in combine:
        dataset['Sex'] = dataset['Sex'].map( {'female': 1, 'male': 0} ).astype(int)

    guess_ages = np.zeros((2,3))
    guess_ages

    for dataset in combine:
        for i in range(0, 2):
            for j in range(0, 3):
                guess_df = dataset[(dataset['Sex'] == i) & \
                                      (dataset['Pclass'] == j+1)]['Age'].dropna()

                # age_mean = guess_df.mean()
                # age_std = guess_df.std()
                # age_guess = rnd.uniform(age_mean - age_std, age_mean + age_std)

                age_guess = guess_df.median()

                # Convert random age float to nearest .5 age
                guess_ages[i,j] = int( age_guess/0.5 + 0.5 ) * 0.5

        for i in range(0, 2):
            for j in range(0, 3):
                dataset.loc[ (dataset.Age.isnull()) & (dataset.Sex == i) & (dataset.Pclass == j+1),\
                        'Age'] = guess_ages[i,j]

        dataset['Age'] = dataset['Age'].astype(int)

    for dataset in combine:
        dataset['FamilySize'] = dataset['SibSp'] + dataset['Parch'] + 1



    for dataset in combine:
        dataset['IsAlone'] = 0
        dataset.loc[dataset['FamilySize'] == 1, 'IsAlone'] = 1

    combine = [train_df, test_df]

    freq_port = train_df.Embarked.dropna().mode()[0]

    for dataset in combine:
        dataset['Embarked'] = dataset['Embarked'].fillna(freq_port)

    test_df['Fare'].fillna(test_df['Fare'].dropna().median(), inplace=True)

    test_df.head(10)
    return [train_df,test_df]

In [13]:
train_t,test_t = titanic('./train.csv','./test.csv')

In [14]:
train_t = pd.get_dummies(train_t)

In [15]:
test_t = pd.get_dummies(test_t)

In [16]:
X = train_t.iloc[:,1:]

In [17]:
y = train_t['Survived']

In [18]:
X_train,X_test,y_train,y_test = train_test_split(X, y,
                            test_size=0.2, random_state=23)

In [19]:
@timeit
def train_svm(kernel='rbf', C=1.0, gamma='auto', degree=3, coef0=0.0):
    if kernel == 'linear':
        model = SVC(kernel=kernel, C=C,random_state=1)
    elif kernel == 'poly':
        model = SVC(kernel=kernel,C=C,gamma=gamma,coef0=coef0, random_state=1)
    elif kernel == 'rbf':
        model = SVC(kernel=kernel,C=C,gamma=gamma, random_state=1)
    elif kernel == 'sigmoid':
        model = SVC(kernel=kernel,C=C,gamma=gamma,coef0=coef0, random_state=1)
    return model

In [20]:
clf = train_svm('linear',C=0.4)
clf.fit(X_train,y_train)
predict = clf.predict(X_test)
accuracy_score(y_test,predict)

'train_svm'  0.05 ms


0.81005586592178769

In [21]:
clf.fit(X_train,y_train)

SVC(C=0.4, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel=u'linear',
  max_iter=-1, probability=False, random_state=1, shrinking=True,
  tol=0.001, verbose=False)

In [22]:
predict = clf.predict(X_test)
accuracy_score(y_test,predict)

0.81005586592178769

In [23]:
import pickle

In [24]:
def pickle_dump(data, filename):
    with open(filename, 'wb') as f:
        pickle.dump(data,f, protocol = pickle.HIGHEST_PROTOCOL)
        

In [28]:
def pickle_load(filename):
    with open(filename , 'rb') as f:
        return pickle.load(f)

In [26]:
pickle_dump(clf,'svmmodel1')

In [30]:
x = pickle_load('svmmodel1')

In [31]:
x

SVC(C=0.4, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel=u'linear',
  max_iter=-1, probability=False, random_state=1, shrinking=True,
  tol=0.001, verbose=False)

In [32]:
def grid_obj_creator(classifier, parameters, X, y):
    acc_scorer = make_scorer(accuracy_score)
    grid_obj = GridSearchCV(classifier, parameters, scoring=acc_scorer)
    grid_obj = grid_obj.fit(X, y)
    return grid_obj

In [33]:
params = { 'kernel': ['linear'], 'C': np.arange(0.5,1.5,0.1), 'shrinking':[True,False]}

In [34]:
grid_obj_creator(clf,params,X,y)

NameError: global name 'GridSearchCV' is not defined