## Support vector machine

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, ConfusionMatrixDisplay, accuracy_score

#### Read data

In [None]:
df = pd.read_csv('../../../../datasets/parte1/dataset_cleaned.csv')

#### X and y arrays

In [None]:
X = df.drop(['RainTomorrow'], axis=1)
y = df['RainTomorrow']

#### Train Test Split

Now let's split the data into a training set and a testing set. We will train out model on the training set and then use the test set to evaluate the model.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2023)

#### Training 

Using GridSearchCV to find the best hyperparameters

In [6]:
# Use GridSearchCV para encontrar os melhores hiperparâmetros
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100, 1000],
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
    #'kernel': ['rbf', 'linear', 'sigmoid']
    'kernel': ['rbf', 'linear', 'sigmoid']
}

model = SVC(random_state=2023)

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, refit=True, verbose=2, cv=5, scoring="f1", n_jobs=-1)
grid_search.fit(X_train, y_train)

Inspect the best parameters

In [None]:
grid_search.best_params_

Get the best estimator

In [None]:
model = grid_search.best_estimator_

Get the predictions using the trained model

In [None]:
predictions = model.predict(X_test)

#### Model Evaluation

Classification report

In [None]:
print(classification_report(y_test,predictions))

Confusion Matrix

In [None]:
ConfusionMatrixDisplay.from_predictions(y_test, predictions)
plt.show()