In [1]:
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
import os

In [2]:
weather = pd.read_csv('kyoto_weather.csv')
weather.head()

Unnamed: 0.1,Unnamed: 0,Date,Temperature(C),Max_temp,Min_temp,Dew_point,Max_dew_point,Min_dew_point,Pressure,Humidity,Wind,Clouds,Description
0,0,2017-01-01 00:00:00,6,7,4,3.6,4.6,1.6,1028,88,2,75,Clouds
1,1,2017-01-01 01:00:00,7,9,5,4.6,6.6,2.6,1028,88,1,75,Clouds
2,2,2017-01-01 02:00:00,9,10,7,2.0,3.0,0.0,1028,65,1,40,Clouds
3,3,2017-01-01 03:00:00,11,11,10,0.2,0.2,-0.8,1027,46,1,20,Clouds
4,4,2017-01-01 04:00:00,12,12,11,0.6,0.6,-0.4,1026,43,1,20,Clouds


In [3]:
# Assign X (data) and y (target)
predictors = ['Temperature(C)','Dew_point','Pressure','Wind','Clouds']
X = weather[predictors]
y = weather["Description"]

In [4]:
# Split data into training and testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [5]:
# Create the SVC Model
from sklearn.svm import SVC 
model = SVC(kernel='linear')
model

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [6]:
# Create the GridSearch estimator along with a parameter object containing the values to adjust
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10, 50],
              'gamma': [0.0001, 0.0005, 0.001, 0.005]}
grid = GridSearchCV(model, param_grid, verbose=3)

In [7]:
# Fit the model using the grid search estimator. 
# This will take the SVC model and try each combination of parameters
grid.fit(X_train, y_train)

Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV] C=1, gamma=0.0001 ...............................................




[CV] ...... C=1, gamma=0.0001, score=0.8306211322484529, total=   3.7s
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.0s remaining:    0.0s


[CV] ....... C=1, gamma=0.0001, score=0.832416322787712, total=   3.5s
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    7.8s remaining:    0.0s


[CV] ...... C=1, gamma=0.0001, score=0.8338306173972917, total=   3.6s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ...... C=1, gamma=0.0005, score=0.8306211322484529, total=   3.9s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ....... C=1, gamma=0.0005, score=0.832416322787712, total=   3.5s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ...... C=1, gamma=0.0005, score=0.8338306173972917, total=   3.5s
[CV] C=1, gamma=0.001 ................................................
[CV] ....... C=1, gamma=0.001, score=0.8306211322484529, total=   3.8s
[CV] C=1, gamma=0.001 ................................................
[CV] ........ C=1, gamma=0.001, score=0.832416322787712, total=   3.7s
[CV] C=1, gamma=0.001 ................................................
[CV] ....... C=1, gamma=0.001, score=0.8338306173972917, total=   3.4s
[CV] C=1, gamma=0.005 ................................................
[CV] .

[Parallel(n_jobs=1)]: Done  48 out of  48 | elapsed:  3.7min finished


GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [1, 5, 10, 50], 'gamma': [0.0001, 0.0005, 0.001, 0.005]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=3)

In [8]:
# List the best parameters for this dataset
print(grid.best_params_)

{'C': 10, 'gamma': 0.0001}


In [9]:
# List the best score
print(grid.best_score_)

0.8329001681700046


In [10]:
# Make predictions with the hypertuned model
predictions = grid.predict(X_test)

In [11]:
# Calculate classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions,
                            target_names=predictors))

                precision    recall  f1-score   support

Temperature(C)       0.99      1.00      0.99       758
     Dew_point       0.81      0.93      0.86      2598
      Pressure       0.00      0.00      0.00         1
          Wind       0.00      0.00      0.00         1
        Clouds       0.00      0.00      0.00        20

   avg / total       0.80      0.82      0.80      4361



  .format(len(labels), len(target_names))
  'precision', 'predicted', average, warn_for)
