In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
mine = pd.read_excel(r'/content/SIH-FINAL-DATASET.xlsx')

In [None]:
mine

Unnamed: 0,Material,Amount (tons),Route,Energy Source,Transport Distance (km),Carbon Footprint (kg CO2/kg),Circularity Score (0-100)
0,Aluminium,163,Both,Both,313,8.54,51
1,Aluminium,178,Ore,Electricity,133,8.57,10
2,Aluminium,127,Both,Coal,330,8.63,52
3,Aluminium,182,Recycled,Coal,412,2.21,93
4,Aluminium,107,Ore,Electricity,212,8.61,10
5,Aluminium,185,Ore,Coal,496,16.95,10
6,Aluminium,199,Recycled,Coal,193,1.99,94
7,Aluminium,174,Ore,Coal,373,16.87,10
8,Aluminium,134,Recycled,Coal,347,2.15,94
9,Aluminium,161,Recycled,Electricity,88,0.89,90


In [None]:
mine_encoded = pd.get_dummies(mine, columns=['Material','Route','Energy Source']).astype(int)

In [None]:
mine_encoded

Unnamed: 0,Amount (tons),Transport Distance (km),Carbon Footprint (kg CO2/kg),Circularity Score (0-100),Material_Aluminium,Material_Copper,Route_Both,Route_Ore,Route_Recycled,Energy Source_Both,Energy Source_Coal,Energy Source_Electricity
0,163,313,8,51,1,0,1,0,0,1,0,0
1,178,133,8,10,1,0,0,1,0,0,0,1
2,127,330,8,52,1,0,1,0,0,0,1,0
3,182,412,2,93,1,0,0,0,1,0,1,0
4,107,212,8,10,1,0,0,1,0,0,0,1
5,185,496,16,10,1,0,0,1,0,0,1,0
6,199,193,1,94,1,0,0,0,1,0,1,0
7,174,373,16,10,1,0,0,1,0,0,1,0
8,134,347,2,94,1,0,0,0,1,0,1,0
9,161,88,0,90,1,0,0,0,1,0,0,1


In [None]:
x = mine_encoded.drop(['Carbon Footprint (kg CO2/kg)', 'Circularity Score (0-100)'], axis=1)
y = mine_encoded[['Carbon Footprint (kg CO2/kg)', 'Circularity Score (0-100)']]

In [None]:
scaler_y = MinMaxScaler()
y_scaled = scaler_y.fit_transform(y)

In [None]:
x_train, x_test, y_train_scaled, y_test_scaled = train_test_split(x, y_scaled, test_size=0.2, random_state=55)

In [None]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
multi_model = MultiOutputRegressor(model)
multi_model.fit(x_train, y_train_scaled)

In [None]:
y_pred_scaled = multi_model.predict(x_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_test_orig = scaler_y.inverse_transform(y_test_scaled)

In [None]:
mse_cf = mean_squared_error(y_test_orig[:,0], y_pred[:,0])
r2_cf = r2_score(y_test_orig[:,0], y_pred[:,0])

In [None]:
mse_cs = mean_squared_error(y_test_orig[:,1], y_pred[:,1])
r2_cs = r2_score(y_test_orig[:,1], y_pred[:,1])

In [None]:
print(f'Carbon Footprint -> MSE: {mse_cf:.2f}, R2: {r2_cf:.2f}')
print(f'Circularity Score -> MSE: {mse_cs:.2f}, R2: {r2_cs:.2f}')

Carbon Footprint -> MSE: 0.36, R2: 0.99
Circularity Score -> MSE: 0.38, R2: 1.00


In [None]:
new_data = pd.DataFrame({
    'Amount (tons)': [150],
    'Transport Distance (km)': [200],
    'Material_Aluminium': [1],
    'Material_Copper': [0],
    'Route_Ore': [0],
    'Route_Recycled': [0],
    'Route_Both': [1],
    'Energy Source_Coal': [0],
    'Energy Source_Electricity': [1],
    'Energy Source_Both': [0]
})

In [None]:
new_data = new_data.reindex(columns=x_train.columns, fill_value=0)

In [None]:
new_pred_scaled = multi_model.predict(new_data)
new_pred = scaler_y.inverse_transform(new_pred_scaled)

In [None]:
print(f'Predicted Carbon Footprint: {new_pred[0][0]:.2f} kg CO2/kg')
print(f'Predicted Circularity Score: {new_pred[0][1]:.2f}')

Predicted Carbon Footprint: 8.00 kg CO2/kg
Predicted Circularity Score: 49.21


In [None]:
import joblib

In [None]:
#multi output model
joblib.dump(multi_model, 'SIH_predict.pkl')

#target scaler(needed to inverse transform predictions)
joblib.dump(scaler_y, 'predict_scaler.pkl')

['predict_scaler.pkl']