In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Assuming 'data' is your DataFrame and it has been loaded previously

if 'Rainfall' not in data.columns:
    raise KeyError("'Rainfall' column is missing and 'precip_mm' is also not available. Please check the dataset.")

# Assuming 'Rainfall' is the target variable and binarizing it for classification
data['Rainfall'] = data['Rainfall'].apply(lambda x: 1 if x > 0 else 0)  # Binarizing for classification

# One-hot encoding for categorical features
data = pd.get_dummies(data, drop_first=True)  # Convert categorical variables into dummy/indicator variables

# Features and target
X = data.drop(columns=['Rainfall'])  # Features
y = data['Rainfall']  # Target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scaling the data (important for ANN)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define individual models
ann_model = MLPClassifier(max_iter=1000, random_state=42)  # Artificial Neural Network (ANN)
rf_model = RandomForestClassifier(random_state=42)  # Random Forest
lr_model = LogisticRegression(max_iter=2000, random_state=42)  # Logistic Regression with increased max_iter

# Combine models into a Voting Classifier (hard voting)
voting_model = VotingClassifier(estimators=[
    ('ann', ann_model),  # ANN
    ('rf', rf_model),    # Random Forest
    ('lr', lr_model)     # Logistic Regression
], voting='hard')  # 'hard' voting for majority class decision

# Train the Voting Classifier
print("Voting Classifier:")
voting_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred_voting = voting_model.predict(X_test_scaled)

# Calculate metrics
accuracy_voting = accuracy_score(y_test, y_pred_voting)
confusion_voting = confusion_matrix(y_test, y_pred_voting)

# Output results
print(f'Accuracy: {accuracy_voting}')
print('Confusion Matrix:')
print(confusion_voting)

Voting Classifier:
Accuracy: 0.989613626921479
Confusion Matrix:
[[5015    6]
 [  69 2131]]
