In [2]:
!pip install pandas numpy scikit-learn joblib streamlit shap matplotlib seaborn plotly


Collecting streamlit
  Downloading streamlit-1.49.1-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.49.1-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m92.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m91.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.49.1


In [1]:
!pip install -U scikit-learn




In [3]:
from google.colab import files
uploaded = files.upload()


Saving archive.zip to archive.zip


In [3]:
"""
train_model.py (Colab-Compatible)
Trains regression model on flood.csv and saves model + metrics.
"""

import zipfile
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import json

DATA_ZIP = Path('archive.zip')
CSV_NAME = 'flood.csv'
MODEL_OUT = Path('model.joblib')
METRICS_OUT = Path('metrics.txt')

def load_from_zip(zip_path: Path, csv_name: str):
    with zipfile.ZipFile(zip_path) as z:
        with z.open(csv_name) as fh:
            df = pd.read_csv(fh)
    return df

def prepare_data(df: pd.DataFrame):
    if 'FloodProbability' not in df.columns:
        raise ValueError("Target column 'FloodProbability' not found in dataset.")

    X = df.drop(columns=['FloodProbability'])
    y = df['FloodProbability'].values

    num_cols = X.select_dtypes(include=[np.number]).columns.tolist()
    X_num = X[num_cols].copy()

    return X_num, y

def rmse(y_true, y_pred):
    """Manual RMSE calculation to avoid sklearn 'squared' argument issue."""
    return np.sqrt(mean_squared_error(y_true, y_pred))

def main():
    print("Loading data...")
    df = load_from_zip(DATA_ZIP, CSV_NAME)
    print("Shape:", df.shape)

    X, y = prepare_data(df)
    print("Features:", X.shape[1])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    imputer = SimpleImputer(strategy='median')

    # Baseline model: Ridge
    ridge_pipe = Pipeline([
        ('imputer', imputer),
        ('scaler', StandardScaler()),
        ('ridge', Ridge())
    ])

    # Random forest pipeline
    rf_pipe = Pipeline([
        ('imputer', imputer),
        ('rf', RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1))
    ])

    print("Training baseline (Ridge)...")
    ridge_pipe.fit(X_train, y_train)
    y_pred_ridge = ridge_pipe.predict(X_test)
    ridge_rmse = rmse(y_test, y_pred_ridge)
    ridge_r2 = r2_score(y_test, y_pred_ridge)

    print("Training RandomForest...")
    rf_pipe.fit(X_train, y_train)
    y_pred_rf = rf_pipe.predict(X_test)
    rf_rmse = rmse(y_test, y_pred_rf)
    rf_r2 = r2_score(y_test, y_pred_rf)

    if rf_rmse <= ridge_rmse:
        best = rf_pipe
        chosen = "RandomForest"
        best_rmse, best_r2 = rf_rmse, rf_r2
    else:
        best = ridge_pipe
        chosen = "Ridge"
        best_rmse, best_r2 = ridge_rmse, ridge_r2

    print(f"Chosen model: {chosen}, RMSE={best_rmse:.4f}, R2={best_r2:.4f}")

    joblib.dump(best, MODEL_OUT)
    print("Saved model to", MODEL_OUT)

    metrics = {
        'ridge': {'rmse': float(ridge_rmse), 'r2': float(ridge_r2)},
        'random_forest': {'rmse': float(rf_rmse), 'r2': float(rf_r2)},
        'chosen': {'model': chosen, 'rmse': float(best_rmse), 'r2': float(best_r2)}
    }

    with open(METRICS_OUT, 'w') as f:
        json.dump(metrics, f, indent=2)
    print("Saved metrics to", METRICS_OUT)

if __name__ == '__main__':
    main()





Loading data...
Shape: (50000, 21)
Features: 20
Training baseline (Ridge)...
Training RandomForest...
Chosen model: Ridge, RMSE=0.0000, R2=1.0000
Saved model to model.joblib
Saved metrics to metrics.txt


In [4]:
import json
with open("metrics.txt") as f:
    metrics = json.load(f)
metrics


{'ridge': {'rmse': 1.2443422668261952e-06, 'r2': 0.9999999993783039},
 'random_forest': {'rmse': 0.02581144784054158, 'r2': 0.7325003400116332},
 'chosen': {'model': 'Ridge',
  'rmse': 1.2443422668261952e-06,
  'r2': 0.9999999993783039}}

In [5]:
import joblib
model = joblib.load("model.joblib")
import pandas as pd

if "rf" in model.named_steps:
    rf = model.named_steps['rf']
    feature_names = rf.feature_names_in_
    importances = pd.DataFrame({"feature": feature_names, "importance": rf.feature_importances_})
    display(importances.sort_values("importance", ascending=False).head(10))


In [21]:
!ngrok config add-authtoken 32fbTlqM5dc091ViFiP8xmBr6g1_3g9a5pDhUVhSXwZfc4cg3


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [6]:
!pip install streamlit pyngrok


Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.3.0-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.3.0


In [23]:
!pip install streamlit pyngrok -q

In [24]:
!streamlit --version


Streamlit, version 1.49.1


In [33]:
%%writefile app.py
import streamlit as st
import joblib
import numpy as np

# Load your trained model
model = joblib.load("model.joblib")
st.title("Flood Risk Prediction")

# Try to get feature names from the model
try:
    feature_names = model.feature_names_in_
except AttributeError:
    st.warning("Model does not have stored feature names. Using generic feature names.")
    feature_names = [f"feature_{i+1}" for i in range(20)]

# Create input fields dynamically
input_values = []
st.subheader("Enter feature values:")
for name in feature_names:
    val = st.number_input(f"{name}:", value=0.0)
    input_values.append(val)

# Predict button
if st.button("Predict"):
    X = np.array([input_values])
    try:
        risk = model.predict(X)[0]
        st.success(f"Flood Risk Prediction: {risk}")
    except Exception as e:
        st.error(f"Error during prediction: {e}")


Overwriting app.py


In [36]:
!pip install streamlit pyngrok -q

from pyngrok import ngrok
import os
import time

port = 8501

# Start Streamlit in background
os.system(f"streamlit run app.py --server.port {port} &")

# Wait a few seconds to ensure Streamlit starts
time.sleep(5)

# Open ngrok tunnel
public_url = ngrok.connect(port)
print(f" Your Streamlit app is live here: {public_url}")


 Your Streamlit app is live here: NgrokTunnel: "https://4bf54d280271.ngrok-free.app" -> "http://localhost:8501"
