In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso, BayesianRidge
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, explained_variance_score
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.pipeline import make_pipeline

In [None]:
# Load your dataset
df = pd.read_csv('flood.csv')  # Use your dataset file path here

In [None]:
# Assuming 'FloodProbability' is the target variable
X = df.drop('FloodProbability', axis=1)
y = df['FloodProbability']

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize dictionaries to store metrics
performance_metrics = {}

In [None]:
def print_and_store_metrics(model_name, y_test, y_pred, tolerance=0.1):
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Print metrics formatted to 5 decimal places
    print(f'{model_name}:')
    print(f'MSE: {mse:}')
    print(f'RMSE: {rmse:}')
    print(f'MAE: {mae:}')
    print(f'R2: {r2:}')
    
    # Store metrics without rounding (full values)
    performance_metrics[model_name] = {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'R2': r2,
    }


In [None]:
# Linear Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
linear_pred = linear_model.predict(X_test)
performance_metrics['Linear Regression'] = {
    'MSE': mean_squared_error(y_test, linear_pred),
    'R2': r2_score(y_test, linear_pred),
    'MAE': mean_absolute_error(y_test, linear_pred),
    'Explained Variance': explained_variance_score(y_test, linear_pred)
    
}


In [None]:
# Linear Regression
print_and_store_metrics('Linear Regression', y_test, linear_pred)

In [None]:
# Ridge Regression
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)
ridge_pred = ridge_model.predict(X_test)
performance_metrics['Ridge Regression'] = {
    'MSE': mean_squared_error(y_test, ridge_pred),
    'R2': r2_score(y_test, ridge_pred),
    'MAE': mean_absolute_error(y_test, ridge_pred),
    'Explained Variance': explained_variance_score(y_test, ridge_pred)
}


In [None]:
print_and_store_metrics('Ridge Regression', y_test, ridge_pred)

In [None]:
# Lasso Regression
lasso_model = Lasso(alpha=1.0)
lasso_model.fit(X_train, y_train)
lasso_pred = lasso_model.predict(X_test)
performance_metrics['Lasso Regression'] = {
    'MSE': mean_squared_error(y_test, lasso_pred),
    'R2': r2_score(y_test, lasso_pred),
    'MAE': mean_absolute_error(y_test, lasso_pred),
    'Explained Variance': explained_variance_score(y_test, lasso_pred)
}

In [None]:
print_and_store_metrics('Lasso Regression', y_test, lasso_pred)

In [None]:
# Polynomial Regression (degree=2)
poly_model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
poly_model.fit(X_train, y_train)
poly_pred = poly_model.predict(X_test)
performance_metrics['Polynomial Regression'] = {
    'MSE': mean_squared_error(y_test, poly_pred),
    'R2': r2_score(y_test, poly_pred),
    'MAE': mean_absolute_error(y_test, poly_pred),
    'Explained Variance': explained_variance_score(y_test, poly_pred)
}

In [None]:
print_and_store_metrics('Polynomial Regression', y_test, poly_pred)

In [None]:
# Support Vector Regression (SVR)
svr_model = SVR(kernel='linear')
svr_model.fit(X_train, y_train)
svr_pred = svr_model.predict(X_test)
performance_metrics['SVR'] = {
    'MSE': mean_squared_error(y_test, svr_pred),
    'R2': r2_score(y_test, svr_pred),
    'MAE': mean_absolute_error(y_test, svr_pred),
    'Explained Variance': explained_variance_score(y_test, svr_pred)
}

In [None]:
print_and_store_metrics('SVR', y_test, svr_pred)

In [None]:
# Decision Tree Regression
dt_model = DecisionTreeRegressor()
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)
performance_metrics['Decision Tree Regression'] = {
    'MSE': mean_squared_error(y_test, dt_pred),
    'R2': r2_score(y_test, dt_pred),
    'MAE': mean_absolute_error(y_test, dt_pred),
    'Explained Variance': explained_variance_score(y_test, dt_pred)
}

In [None]:
print_and_store_metrics('Decision Tree Regression', y_test, dt_pred)

In [None]:
# Random Forest Regression
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
performance_metrics['Random Forest Regression'] = {
    'MSE': mean_squared_error(y_test, rf_pred),
    'R2': r2_score(y_test, rf_pred),
    'MAE': mean_absolute_error(y_test, rf_pred),
    'Explained Variance': explained_variance_score(y_test, rf_pred)
}



In [None]:
# Train a RandomForestRegressor model
rfc = RandomForestRegressor(n_estimators=100, random_state=42)
rfc.fit(X_train, y_train)



In [None]:
# Define custom colors
colors = ["#9bb7d4", "#0f4c81"]           
cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", colors)


In [None]:
print_and_store_metrics('Random Forest Regression', y_test, rf_pred)

In [None]:
# Gradient Boosting Regression
gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_model.fit(X_train, y_train)
gb_pred = gb_model.predict(X_test)
performance_metrics['Gradient Boosting Regression'] = {
    'MSE': mean_squared_error(y_test, gb_pred),
    'R2': r2_score(y_test, gb_pred),
    'MAE': mean_absolute_error(y_test, gb_pred),
    'Explained Variance': explained_variance_score(y_test, gb_pred)
}


In [None]:
print_and_store_metrics('Gradient Boosting Regression', y_test, gb_pred)

In [None]:
# Bayesian Ridge Regression
bayesian_model = BayesianRidge()
bayesian_model.fit(X_train, y_train)
bayesian_pred = bayesian_model.predict(X_test)
performance_metrics['Bayesian Ridge Regression'] = {
    'MSE': mean_squared_error(y_test, bayesian_pred),
    'R2': r2_score(y_test, bayesian_pred),
    'MAE': mean_absolute_error(y_test, bayesian_pred),
    'Explained Variance': explained_variance_score(y_test, bayesian_pred)
}


In [None]:
print_and_store_metrics('Bayesian Ridge Regression', y_test, bayesian_pred)

In [None]:
# Display performance metrics for all models
performance_df = pd.DataFrame(performance_metrics).T
print(performance_df)