In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import PolynomialFeatures
from sklearn.decomposition import PCA

In [3]:
# Load the dataset
data = pd.read_csv('../Data/weatherAUS.csv')

In [4]:
data.dropna(inplace=True)

features = ['MinTemp', 'MaxTemp', 'Rainfall', 'WindGustSpeed', 'Humidity9am', 'Humidity3pm']

X = data[features]
y = data['RainTomorrow']

In [5]:
X['InteractionFeature'] = X['MinTemp'] * X['MaxTemp']

In [6]:
poly = PolynomialFeatures(degree=2, include_bias=False)

In [7]:
X_poly = poly.fit_transform(X)

In [8]:
# Apply PCA for dimensionality reduction
pca = PCA(n_components=5)  # Choose the number of principal components

In [9]:
X_pca = pca.fit_transform(X_poly)

In [10]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

In [11]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [12]:
# Create and train SVM classifier
clf = SVC(kernel='rbf', random_state=42)
clf.fit(X_train_scaled, y_train)

SVC(random_state=42)

In [13]:
# Make predictions
y_pred = clf.predict(X_test_scaled)

In [14]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print(classification_report(y_test, y_pred))

Accuracy: 0.84
              precision    recall  f1-score   support

          No       0.85      0.97      0.90      8799
         Yes       0.77      0.39      0.52      2485

    accuracy                           0.84     11284
   macro avg       0.81      0.68      0.71     11284
weighted avg       0.83      0.84      0.82     11284

