In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, ConfusionMatrixDisplay, accuracy_score

In [None]:
df = pd.read_csv('../../../../datasets/parte1/dataset_cleaned.csv')

In [None]:
X = df.drop(['RainTomorrow'], axis=1)
y = df['RainTomorrow']

if 'weight' in df:
    class_weights_dict = df.set_index('RainTomorrow')['weight'].to_dict()

    X.drop(['weight'], axis=1, inplace=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2023)

In [None]:
model = None

if 'weight' in df:
    model = SVC(random_state=2023, class_weight=class_weights_dict)

else:
    model = SVC(random_state=2023)

In [6]:
# Use GridSearchCV para encontrar os melhores hiperparâmetros
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100, 1000],
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
    #'kernel': ['rbf', 'linear', 'sigmoid']
    'kernel': ['rbf', 'linear', 'sigmoid']
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, refit=True, verbose=2, cv=5, scoring="f1", n_jobs=-1)
grid_search.fit(X_train, y_train)

model = grid_search.best_estimator_


In [None]:
grid_search.best_params_

In [None]:
predictions = model.predict(X_test)

In [None]:
print("%0.2f accuracy" % (accuracy_score(y_test, predictions)))

In [None]:
print(classification_report(y_test,predictions))

In [None]:
ConfusionMatrixDisplay.from_predictions(y_test, predictions)
plt.show()