In [7]:
!pip install pygad
!pip install streamlit
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import xgboost as xgb
import pygad
import streamlit as st

# Load Dataset
dataset_path = 'expanded_carbon_dataset.csv'  # Replace with your actual dataset path
df = pd.read_csv(dataset_path)

# Preprocessing: Handling missing values, normalization
scaler = MinMaxScaler()

# Convert columns to numeric if possible before filling NaN
for col in df.columns:
    if df[col].dtype == 'object':  # Check if the column is of object type
        try:
            df[col] = pd.to_numeric(df[col], errors='coerce')  # Convert to numeric, invalid parsing will be set as NaN
        except:
            pass  # Skip if column cannot be converted

df.fillna(df.mean(), inplace=True)  # Now fill NaN after attempting conversion

scaled_features = scaler.fit_transform(df.drop(columns=['id', 'timestamp', 'user_type', 'country', 'sector', 'diet_impact']))

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(scaled_features, df['emissions_tons'], test_size=0.2,
                                                    random_state=42)

# LSTM Model for Time-Series Data
lstm_model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
    Dropout(0.2),
    LSTM(50),
    Dense(1)
])
lstm_model.compile(loss='mse', optimizer='adam')
lstm_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# ANN Model for Non-Linear Data
ann_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1)
])
ann_model.compile(loss='mse', optimizer='adam')
ann_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# XGBoost Model for Enhanced Predictions
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100)
xgb_model.fit(X_train, y_train)

# PyGAD Optimization to Suggest Reduction Strategies
def fitness_function(ga_instance, solution, solution_idx):
    """
    Calculates the fitness of a solution.

    Args:
        ga_instance: The instance of the pygad.GA class.
        solution: The solution to evaluate.
        solution_idx: The index of the solution within the population.

         Returns:
        The fitness value of the solution (negative prediction to minimize emissions).
    """
    prediction = xgb_model.predict([solution])[0]
    return -prediction  # Minimizing emissions


ga_instance = pygad.GA(num_generations=10, num_parents_mating=5, fitness_func=fitness_function,
                        sol_per_pop=10, num_genes=X_train.shape[1])
ga_instance.run()

# Final Model Evaluation
final_prediction = xgb_model.predict(X_test)
mse = np.mean((final_prediction - y_test) ** 2)
print(f"Final Model MSE: {mse}")




  super().__init__(**kwargs)


Epoch 1/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - loss: 1469.0291 - val_loss: 830.9280
Epoch 2/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - loss: 832.6765 - val_loss: 830.8151
Epoch 3/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 6ms/step - loss: 825.4932 - val_loss: 71.2400
Epoch 4/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 7ms/step - loss: 36.5412 - val_loss: 3.4308
Epoch 5/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 6ms/step - loss: 10.6980 - val_loss: 1.7979
Epoch 6/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 6ms/step - loss: 7.7518 - val_loss: 1.4021
Epoch 7/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 6ms/step - loss: 6.0187 - val_loss: 0.9178
Epoch 8/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - loss: 5.0461 - val_loss: 1.1157
Ep

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 715.6452 - val_loss: 0.9653
Epoch 2/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 19.9543 - val_loss: 0.1566
Epoch 3/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 15.4448 - val_loss: 0.4812
Epoch 4/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - loss: 12.3216 - val_loss: 0.0941
Epoch 5/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 9.7897 - val_loss: 0.2369
Epoch 6/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - loss: 7.4803 - val_loss: 0.3188
Epoch 7/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - loss: 5.5722 - val_loss: 0.2285
Epoch 8/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - loss: 3.9060 - val_loss: 0.1666
Epoch 9/10
[1m2500/2500[0

If you do not want to mutate any gene, please set mutation_type=None.


In [8]:
print("Expected features:", scaler.n_features_in_)


Expected features: 6


In [9]:

!pip install streamlit pyngrok --quiet

In [10]:
from pyngrok import ngrok

In [14]:
%%writefile app.py
import streamlit as st
import numpy as np
import joblib
import shap
import matplotlib.pyplot as plt
import pandas as pd
import google.generativeai as genai
import os

# ✅ Set Gemini API Key (Replace with your actual key)
os.environ["GEMINI_API_KEY"] = "AIzaSyCYF1gM_i6H9Gt9YuNsG1Vc-PlNRINQfe8"

# ✅ Configure Gemini API
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

# ✅ Load Model & Scaler
try:
    scaler = joblib.load("scaler.pkl")
    xgb_model = joblib.load("xgb_model.pkl")
except Exception as e:
    st.error(f"Error loading models: {e}")

# ✅ Define feature names
feature_names = ["Energy Consumption (kWh)", "Transport Distance (km)", "Waste Generated (kg)",
                 "Diet Impact Score", "Sector Impact Score", "Industrial Energy Usage (kWh)"]

# ✅ Streamlit UI Configuration
st.set_page_config(page_title="🌍 Carbon Footprint Tracker & Sustainability Chatbot", layout="centered")

# ✅ **App Title**
st.title("🌍 Carbon Footprint Tracker")

# ✅ **User Instructions**
st.markdown("""
### ℹ️ **How to Use This App**
1. **Enter your details** such as energy usage, transport distance, and waste generation.
2. **Understand the scores:**
   - **Diet Impact Score** 🥗: Measures the environmental impact of your diet (0 = low impact, 1 = high impact).
   - **Sector Impact Score** 🏭: Reflects emissions based on your industry or business sector.
3. Click on **🔍 Predict Carbon Footprint** to get your estimated CO2 emissions.
4. Explore **📊 SHAP Analysis** to see which factors contribute most to your footprint.
5. Use **🚀 Reduction Strategies** to lower your emissions.
6. Chat with **💬 Gemini AI** for sustainability tips!
""")

# ✅ **User Inputs**
st.sidebar.header("🌱 **Enter Your Details**")

user_type = st.sidebar.selectbox("Select User Type", ["Individual", "Business"])
energy_usage = st.sidebar.number_input("⚡ Energy Consumption (kWh)", min_value=0.0)
transport_distance = st.sidebar.number_input("🚗 Transport Distance (km)", min_value=0.0)
waste_generated = st.sidebar.number_input("♻️ Waste Generated (kg)", min_value=0.0)
diet_impact = st.sidebar.slider("🥗 Diet Impact Score (0 = Low, 1 = High)", min_value=0.0, max_value=1.0, step=0.1)
sector_impact = st.sidebar.slider("🏭 Sector Impact Score (0 = Low, 1 = High)", min_value=0.0, max_value=1.0, step=0.1)
industrial_usage = st.sidebar.number_input("⚙️ Industrial Energy Usage (kWh)", min_value=0.0)

# ✅ **Carbon Footprint Prediction**
if st.sidebar.button("🔍 Predict Carbon Footprint"):
    try:
        # Prepare input data
        user_input = np.array([[energy_usage, transport_distance, waste_generated, diet_impact, sector_impact, industrial_usage]])
        user_input_scaled = scaler.transform(user_input)

        # Convert to DataFrame for SHAP compatibility
        user_input_df = pd.DataFrame(user_input_scaled, columns=feature_names)

        # Make prediction
        prediction = xgb_model.predict(user_input_scaled)
        st.subheader(f"🌱 **Estimated Carbon Footprint: {prediction[0]:.2f} tons CO2/year**")

        # Generate SHAP Explainer
        X_train_sample = pd.DataFrame(np.random.randn(10, len(feature_names)), columns=feature_names)
        explainer = shap.Explainer(xgb_model, X_train_sample)
        shap_values = explainer(user_input_df)

        # ✅ **SHAP Waterfall Plot**
        st.subheader("📊 SHAP Waterfall Plot - Feature Contribution")
        fig, ax = plt.subplots(figsize=(8, 6))
        shap.waterfall_plot(shap_values[0], show=False)
        st.pyplot(fig)

        # ✅ **SHAP Beeswarm Plot**
        st.subheader("📊 SHAP Beeswarm Plot - Feature Importance")
        fig, ax = plt.subplots(figsize=(8, 6))
        shap.summary_plot(shap_values, user_input_df, feature_names=feature_names, show=False)
        st.pyplot(fig)

        # ✅ **Personalized Reduction Strategies**
        st.subheader("🚀 Reduction Strategies")
        strategies = []

        if energy_usage > 300:
            strategies.append("🔋 Reduce energy consumption by switching to renewable sources like solar or wind.")
        if transport_distance > 200:
            strategies.append("🚲 Use public transport, carpool, or switch to electric vehicles.")
        if waste_generated > 50:
            strategies.append("♻️ Reduce waste by recycling, composting, and minimizing single-use plastics.")
        if diet_impact > 0.6:
            strategies.append("🥗 Consider a plant-based diet or reducing meat consumption to lower emissions.")
        if industrial_usage > 1000:
            strategies.append("🏭 Optimize industrial energy use with smart grid technology and energy-efficient machinery.")

        if strategies:
            for strategy in strategies:
                st.write(strategy)
        else:
            st.write("✅ Your carbon footprint is already low! Keep up the sustainable practices. 🎉")

    except Exception as e:
        st.error(f"Error during prediction: {e}")

# ✅ **Chatbot Section with Session State Fix**
with st.expander("💬 **Ask Gemini AI About Sustainability**"):
    st.write("Ask anything related to sustainability, climate change, and carbon footprint reduction.")

    # ✅ Initialize chat history in session state
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = []

    user_query = st.text_input("Type your question:")

    if st.button("Ask Gemini"):
        if user_query:
            def gemini_chatbot(question):
                try:
                    model = genai.GenerativeModel("gemini-pro")
                    response = model.generate_content(question)
                    return response.text if hasattr(response, "text") else "Sorry, I couldn't generate a response."
                except Exception as e:
                    return f"Error: {e}"

            # ✅ Get AI response
            response = gemini_chatbot(user_query)

            # ✅ Store conversation in session state
            st.session_state.chat_history.append(("You: " + user_query, "🤖 Gemini AI: " + response))

    # ✅ Display chat history
    for user_msg, ai_msg in st.session_state.chat_history:
        st.write(user_msg)
        st.write(ai_msg)

Overwriting app.py


In [8]:
%%writefile app.py
import streamlit as st
import numpy as np
import joblib
import shap
import matplotlib.pyplot as plt
import pandas as pd
import google.generativeai as genai
import os

# ✅ Set Gemini API Key (Replace with your actual key)
os.environ["GEMINI_API_KEY"] = "AIzaSyCYF1gM_i6H9Gt9YuNsG1Vc-PlNRINQfe8"

# ✅ Configure Gemini API
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

# ✅ Load Model & Scaler
try:
    scaler = joblib.load("scaler.pkl")
    xgb_model = joblib.load("xgb_model.pkl")
except Exception as e:
    st.error(f"Error loading models: {e}")

# ✅ Define feature names
feature_names = ["Energy Consumption (kWh)", "Transport Distance (km)", "Waste Generated (kg)",
                 "Diet Impact Score", "Sector Impact Score", "Industrial Energy Usage (kWh)"]

# ✅ Streamlit UI
st.set_page_config(page_title="🌍 Carbon Footprint Tracker & Sustainability Chatbot", layout="centered")

st.title("🌍 Carbon Footprint Tracker")

user_type = st.selectbox("Select User Type", ["Individual", "Business"])
energy_usage = st.number_input("⚡ Energy Consumption (kWh)", min_value=0.0)
transport_distance = st.number_input("🚗 Transport Distance (km)", min_value=0.0)
waste_generated = st.number_input("♻️ Waste Generated (kg)", min_value=0.0)
diet_impact = st.number_input("🥗 Diet Impact Score", min_value=0.0)
sector_impact = st.number_input("🏭 Sector Impact Score", min_value=0.0)
industrial_usage = st.number_input("⚙️ Industrial Energy Usage (kWh)", min_value=0.0)

if st.button("🔍 Predict Carbon Footprint"):
    try:
        # Prepare input data
        user_input = np.array([[energy_usage, transport_distance, waste_generated, diet_impact, sector_impact, industrial_usage]])
        user_input_scaled = scaler.transform(user_input)

        # Convert to DataFrame for SHAP compatibility
        user_input_df = pd.DataFrame(user_input_scaled, columns=feature_names)

        # Make prediction
        prediction = xgb_model.predict(user_input_scaled)
        st.write(f"🌱 **Estimated Carbon Footprint: {prediction[0]:.2f} tons CO2/year**")

        # Generate SHAP Explainer
        X_train_sample = pd.DataFrame(np.random.randn(10, len(feature_names)), columns=feature_names)
        explainer = shap.Explainer(xgb_model, X_train_sample)
        shap_values = explainer(user_input_df)

        # SHAP Waterfall Plot
        st.subheader("📊 SHAP Waterfall Plot - Feature Contribution")
        fig, ax = plt.subplots(figsize=(8, 6))
        shap.waterfall_plot(shap_values[0], show=False)
        st.pyplot(fig)

        # SHAP Beeswarm Plot
        st.subheader("📊 SHAP Beeswarm Plot - Feature Importance")
        fig, ax = plt.subplots(figsize=(8, 6))
        shap.summary_plot(shap_values, user_input_df, feature_names=feature_names, show=False)
        st.pyplot(fig)

        # Provide Personalized Reduction Strategies
        st.subheader("🚀 Reduction Strategies")
        strategies = []

        if energy_usage > 300:
            strategies.append("🔋 Reduce energy consumption by switching to renewable sources like solar or wind.")
        if transport_distance > 200:
            strategies.append("🚲 Use public transport, carpool, or switch to electric vehicles.")
        if waste_generated > 50:
            strategies.append("♻️ Reduce waste by recycling, composting, and minimizing single-use plastics.")
        if diet_impact > 0.6:
            strategies.append("🥗 Consider a plant-based diet or reducing meat consumption to lower emissions.")
        if industrial_usage > 1000:
            strategies.append("🏭 Optimize industrial energy use with smart grid technology and energy-efficient machinery.")

        if strategies:
            for strategy in strategies:
                st.write(strategy)
        else:
            st.write("✅ Your carbon footprint is already low! Keep up the sustainable practices. 🎉")

    except Exception as e:
        st.error(f"Error during prediction: {e}")

# ✅ Chatbot Section with Session State Fix
with st.expander("💬 **Ask Gemini AI About Sustainability**"):
    st.write("Ask anything related to sustainability, climate change, and carbon footprint reduction.")

    # ✅ Initialize chat history in session state
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = []

    user_query = st.text_input("Type your question:")

    if st.button("Ask Gemini"):
        if user_query:
            def gemini_chatbot(question):
                try:
                    model = genai.GenerativeModel("gemini-pro")
                    response = model.generate_content(question)
                    return response.text if hasattr(response, "text") else "Sorry, I couldn't generate a response."
                except Exception as e:
                    return f"Error: {e}"

            # ✅ Get AI response
            response = gemini_chatbot(user_query)

            # ✅ Store conversation in session state
            st.session_state.chat_history.append(("You: " + user_query, "🤖 Gemini AI: " + response))

    # ✅ Display chat history
    for user_msg, ai_msg in st.session_state.chat_history:
        st.write(user_msg)
        st.write(ai_msg)

Overwriting app.py


In [15]:

import joblib
joblib.dump(scaler, "scaler.pkl")
joblib.dump(xgb_model, "xgb_model.pkl")

['xgb_model.pkl']

In [16]:
!streamlit run app.py & npx localtunnel --port 8501




Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[1G[0K⠸[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.125.231.7:8501[0m
[0m
[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0Kyour url is: https://five-chairs-begin.loca.lt
[34m  Stopping...[0m
E0000 00:00:1740283004.546871   12165 init.cc:232] grpc_wait_for_shutdown_with_timeout() timed out.
^C
