In [21]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

In [22]:
hurricane_df = pd.read_csv('data/pacific_cleaned.csv')
hurricane_df = hurricane_df.dropna()
hurricane_df = hurricane_df.drop(['ID', 'Name', 'Event', 'DateTime'], axis=1)
hurricane_df

Unnamed: 0,Status,Latitude,Longitude,Maximum Wind,Minimum Pressure,Low Wind NE,Low Wind SE,Low Wind SW,Low Wind NW,Moderate Wind NE,Moderate Wind SE,Moderate Wind SW,Moderate Wind NW,High Wind NE,High Wind SE,High Wind SW,High Wind NW
19744,TD,14.7,-107.6,25,1006.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19745,TD,15.4,-108.5,30,1006.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19746,TS,16.0,-109.1,35,1005.0,75.0,75.0,75.0,75.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19747,TS,16.6,-109.6,45,1000.0,75.0,75.0,75.0,75.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19748,TS,17.1,-109.9,50,997.0,75.0,75.0,75.0,75.0,25.0,25.0,25.0,25.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26132,LO,21.7,-109.0,35,1002.0,60.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
26133,LO,22.4,-108.7,30,1007.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
26134,LO,23.1,-108.3,30,1008.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
26135,LO,23.5,-107.9,25,1009.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
X = hurricane_df.drop('Status', axis=1)
y = hurricane_df['Status']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [31]:
model = SVC()
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf'],
    'gamma': ['auto']
}

grid_search = GridSearchCV(model, param_grid, n_jobs=-1)

grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_



KeyboardInterrupt: 

In [29]:
print('best params', grid_search.best_params_)
print('results', grid_search.cv_results_)
print('best score', grid_search.best_score_)
print('score', grid_search.score(X_train, y_train))

best params {'C': 1, 'gamma': 'auto', 'kernel': 'linear'}
results {'mean_fit_time': array([ 3.03334589,  5.08608532,  5.73345428,  6.38093147,  8.70753388,
        6.69798613, 12.40518184,  6.04988828]), 'std_fit_time': array([0.64410748, 1.20822085, 0.63762798, 1.16471496, 1.79103057,
       1.03530235, 1.96718815, 0.77648128]), 'mean_score_time': array([0.08319111, 1.34815702, 0.07573295, 1.22784872, 0.06979008,
       1.3115262 , 0.04934244, 1.0117661 ]), 'std_score_time': array([0.01660365, 0.30265806, 0.01718903, 0.21203116, 0.01332048,
       0.27370215, 0.00878361, 0.29997302]), 'param_C': masked_array(data=[0.1, 0.1, 1, 1, 10, 10, 100, 100],
             mask=[False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'param_gamma': masked_array(data=['auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto'],
             mask=[False, False, False, False, False, False, False, False],
       fill_value='?',

In [30]:
svc_model = SVC(C=10, kernel='linear', gamma='auto')

svc_model.fit(X_train, y_train)

predict = svc_model.predict(X_test)

print(predict)
print('accuracy score: ', accuracy_score(y_test, predict))

[' HU' ' LO' ' HU' ... ' DB' ' HU' ' TD']
accuracy score:  0.8646322378716745
