In [2]:
# Required Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.ensemble import ExtraTreesClassifier  # Extra Trees Classifier

# Load your dataset (ensure 'Rainfall' is the target variable)
data = pd.read_csv('Weather_data.csv')

# Create a new 'Rainfall' column from 'precip_mm' if it doesn't exist
if 'Rainfall' not in data.columns and 'precip_mm' in data.columns:
    data['Rainfall'] = data['precip_mm']
elif 'Rainfall' not in data.columns:
    raise KeyError("'Rainfall' column is missing and 'precip_mm' is also not available. Please check the dataset.")

# Assuming 'Rainfall' is the target variable and binarizing it for classification
data['Rainfall'] = data['Rainfall'].apply(lambda x: 1 if x > 0 else 0)  # Binarizing for classification

# One-hot encoding for categorical features
data = pd.get_dummies(data, drop_first=True)  # Convert categorical variables into dummy/indicator variables

# Features and target
X = data.drop(columns=['Rainfall'])  # Features
y = data['Rainfall']  # Target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Extra Trees Classifier (Extremely Randomized Trees)
print("Extra Trees Classifier:")
model_et = ExtraTreesClassifier(random_state=42)
model_et.fit(X_train, y_train)

# Make predictions
y_pred_et = model_et.predict(X_test)

# Calculate metrics
accuracy_et = accuracy_score(y_test, y_pred_et)
confusion_et = confusion_matrix(y_test, y_pred_et)

# Output results
print(f'Accuracy: {accuracy_et}')
print('Confusion Matrix:')
print(confusion_et)


Extra Trees Classifier:
Accuracy: 0.9850436227669298
Confusion Matrix:
[[5002   19]
 [  89 2111]]
