In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load the dataset
file_path = 'final_processed_dataset.csv'
rainfall_data = pd.read_csv(file_path)

# Defining features (X) and target (y)
X = rainfall_data.drop(columns=['precipMM','date_time'])
y = rainfall_data['precipMM']

# Encode categorical columns (e.g., 'year', 'month', 'day', 'hour')
label_encoder = LabelEncoder()
categorical_columns = ['month', 'day', 'hour']
for col in categorical_columns:
    X[col] = label_encoder.fit_transform(X[col])

# Split the dataset into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [2]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Initialize the model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train_scaled, y_train)

# Predict on the test set
rf_predictions = rf_model.predict(X_test_scaled)

# Evaluate the model
rf_mse = mean_squared_error(y_test, rf_predictions)
rf_mae = mean_absolute_error(y_test, rf_predictions)
rf_r2 = r2_score(y_test, rf_predictions)

print(f"Random Forest Evaluation:\nMSE: {rf_mse}\nMAE: {rf_mae}\nR²: {rf_r2}")


Random Forest Evaluation:
MSE: 0.22757278302049253
MAE: 0.1319801963147925
R²: 0.6697222605860944


In [3]:
from sklearn.svm import SVR

# Initialize the model
svr_model = SVR()

# Train the model
svr_model.fit(X_train_scaled, y_train)

# Predict on the test set
svr_predictions = svr_model.predict(X_test_scaled)

# Evaluate the model
svr_mse = mean_squared_error(y_test, svr_predictions)
svr_mae = mean_absolute_error(y_test, svr_predictions)
svr_r2 = r2_score(y_test, svr_predictions)

print(f"Support Vector Regressor Evaluation:\nMSE: {svr_mse}\nMAE: {svr_mae}\nR²: {svr_r2}")


Support Vector Regressor Evaluation:
MSE: 0.4491219586399516
MAE: 0.17482563152507352
R²: 0.3481866185755986


In [4]:
from sklearn.neighbors import KNeighborsRegressor

# Initialize the model
knn_model = KNeighborsRegressor(n_neighbors=5)

# Train the model
knn_model.fit(X_train_scaled, y_train)

# Predict on the test set
knn_predictions = knn_model.predict(X_test_scaled)

# Evaluate the model
knn_mse = mean_squared_error(y_test, knn_predictions)
knn_mae = mean_absolute_error(y_test, knn_predictions)
knn_r2 = r2_score(y_test, knn_predictions)

print(f"K-Nearest Neighbors Evaluation:\nMSE: {knn_mse}\nMAE: {knn_mae}\nR²: {knn_r2}")


K-Nearest Neighbors Evaluation:
MSE: 0.2446637162045807
MAE: 0.13510246254520408
R²: 0.6449180871625684


In [5]:
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Initialize the model
xgb_model = XGBRegressor(n_estimators=100, random_state=42)

# Train the model
xgb_model.fit(X_train_scaled, y_train)

# Predict on the test set
xgb_predictions = xgb_model.predict(X_test_scaled)

# Evaluate the model
xgb_mse = mean_squared_error(y_test, xgb_predictions)
xgb_mae = mean_absolute_error(y_test, xgb_predictions)
xgb_r2 = r2_score(y_test, xgb_predictions)

print(f"XGBoost Evaluation:\nMSE: {xgb_mse}\nMAE: {xgb_mae}\nR²: {xgb_r2}")


XGBoost Evaluation:
MSE: 0.2503324617224022
MAE: 0.1558910407539692
R²: 0.6366910029300474
