In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load your dataset (ensure 'Rainfall' is the target variable)
data = pd.read_csv('Weather_data.csv')

# Create a new 'Rainfall' column from 'precip_mm' if it doesn't exist
if 'Rainfall' not in data.columns and 'precip_mm' in data.columns:
    data['Rainfall'] = data['precip_mm']
elif 'Rainfall' not in data.columns:
    raise KeyError("'Rainfall' column is missing and 'precip_mm' is also not available. Please check the dataset.")

# One-hot encoding for categorical features
data = pd.get_dummies(data, drop_first=True)  # Convert categorical variables into dummy/indicator variables

# Assuming 'Rainfall' is the target variable
X = data.drop(columns=['Rainfall'])  # Features
y = data['Rainfall']  # Target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Base models (base learners)
base_learners = [
    ('rf', RandomForestRegressor(random_state=42)),  # Random Forest
    ('gb', GradientBoostingRegressor(random_state=42))  # Gradient Boosting
]

# Meta-model (Linear Regression in this case)
meta_model = LinearRegression()

# Stacking Regressor
stacking_model = StackingRegressor(
    estimators=base_learners,  # List of base learners
    final_estimator=meta_model,  # Meta-model
    cv=5  # Cross-validation for base models
)

# Train the Stacking Regressor
print("Stacking Regressor:")
stacking_model.fit(X_train, y_train)

# Make predictions
y_pred_stacking = stacking_model.predict(X_test)

# Calculate metrics
mse_stacking = mean_squared_error(y_test, y_pred_stacking)
r2_stacking = r2_score(y_test, y_pred_stacking)

# Define a custom accuracy function for regression
def calculate_accuracy(y_true, y_pred, tolerance=0.1):
    return (abs(y_true - y_pred) <= tolerance).mean()

# Calculate custom accuracy
accuracy_stacking = calculate_accuracy(y_test, y_pred_stacking)

# Output results
print(f'MSE: {mse_stacking}, R^2: {r2_stacking}, Accuracy: {accuracy_stacking}\n')

Stacking Regressor:
MSE: 0.00023500050404377335, R^2: 0.9998067737831569, Accuracy: 0.9968148455892536

