In [None]:
# 🔹 STEP 0: Install requirements (if needed)
!pip install pandas scikit-learn joblib

: 

In [8]:
# 🔹 STEP 1: Upload CSV
from google.colab import files
uploaded = files.upload()

Saving ecostat_sample_dataset.csv to ecostat_sample_dataset.csv


In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [10]:
# Load dataset
filename = list(uploaded.keys())[0]
df = pd.read_csv(filename)

In [11]:
# 🔹 STEP 2: Data Cleaning
df['rainwater_harvesting'] = df['rainwater_harvesting'].astype(str).str.lower().replace({'true': 1, 'false': 0}).astype(int)
df.rename(columns={'tree_coverage': 'green_cover'}, inplace=True)
df.drop(columns=['community_name', 'location', 'population'], inplace=True)

In [12]:
# 🔹 STEP 3: Dummy Score Generation
scaler = MinMaxScaler()
normalized = scaler.fit_transform(df)
df_norm = pd.DataFrame(normalized, columns=df.columns)

In [13]:
df['sustainability_score'] = (
    (1 - df_norm['energy_usage']) * 0.2 +
    (1 - df_norm['pollution_level']) * 0.2 +
    df_norm['recycling_rate'] * 0.2 +
    df_norm['green_cover'] * 0.2 +
    df_norm['rainwater_harvesting'] * 0.2
) * 10

df['sustainability_score'] = df['sustainability_score'].round(2)

In [14]:
# 🔹 STEP 4: Model Training
X = df.drop(columns=['sustainability_score'])
y = df['sustainability_score']

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

In [17]:
y_pred = model.predict(X_test)
print("MSE:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))

MSE: 0.5994051519999992
R² Score: 0.7298906088900108


In [18]:
# 🔹 STEP 5: Save the model
joblib.dump(model, "sustainability_model.pkl")
files.download("sustainability_model.pkl")  # ⬅️ This lets you download the model

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [19]:
# # Load model and scaler
# loaded_model = joblib.load("sustainability_model.pkl")

In [35]:
# # 🔸 Example input (replace values as needed)
# sample_input = pd.DataFrame([{
#     'energy_usage': 65.0,
#     'pollution_level': 30.0,
#     'recycling_rate': 70.0,
#     'green_cover': 25.0,
#     'rainwater_harvesting': 1
# }])

In [36]:
# predicted_score = loaded_model.predict(sample_input)[0]
# print(f"Predicted Sustainability Score: {round(predicted_score, 2)} / 10")

Predicted Sustainability Score: 7.44 / 10
