In [1]:
!pip install streamlit



In [2]:


%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor

# -----------------------------
# Load Data
# -----------------------------
@st.cache_data
def load_data():
    df = pd.read_csv("combined_output.csv")
    df['datetime'] = pd.to_datetime(df[['year', 'month', 'day', 'hour']])
    df['PM2.5'].fillna(df['PM2.5'].median(), inplace=True)
    features = ['TEMP', 'PRES', 'DEWP', 'WSPM', 'SO2', 'NO2', 'CO', 'O3']
    df[features] = df[features].fillna(df[features].median())
    df = df[df['PM2.5'] < 500]
    return df

df = load_data()
# ==========================
# 📄 Overview Page
# ==========================
def show_overview(df):
    st.title("📄 Dataset Overview")
    st.subheader("🧾 Sample Data")
    st.dataframe(df.head(), use_container_width=True)

    st.subheader("📏 Dataset Shape")
    st.info(f"✅ {df.shape[0]} Rows & {df.shape[1]} Columns")

    st.subheader("❗️ Missing Values")
    missing = df.isnull().sum()
    st.bar_chart(missing[missing > 0])

# -----------------------------
# Train the model
# -----------------------------
features = ['TEMP', 'PRES', 'DEWP', 'WSPM', 'SO2', 'NO2', 'CO', 'O3']
X = df[features]
y = df['PM2.5']
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X, y)

# -----------------------------
# App Layout
# -----------------------------
st.title("🌫️ PM2.5 Predictor & Air Quality Visual Dashboard")
st.markdown("**📍 Use sliders to predict PM2.5 and explore trends with charts.**")

# -----------------------------
# User Input
# -----------------------------
st.sidebar.header("🧪 Input Values")

temp = st.sidebar.slider("Temperature (°C)", -8.0, 50.0, 20.0)
pres = st.sidebar.slider("Pressure (hPa)", 980.0, 1040.0, 1010.0)
dewp = st.sidebar.slider("Dew Point", -40.0, 30.0, 5.0)
wspm = st.sidebar.slider("Wind Speed", 0.0, 10.0, 2.0)
so2 = st.sidebar.slider("SO₂", 0.0, 100.0, 10.0)
no2 = st.sidebar.slider("NO₂", 0.0, 150.0, 30.0)
co = st.sidebar.slider("CO", 0.0, 5000.0, 900.0)
o3 = st.sidebar.slider("O₃", 0.0, 300.0, 50.0)

input_data = np.array([[temp, pres, dewp, wspm, so2, no2, co, o3]])
predicted_pm25 = model.predict(input_data)[0]

# -----------------------------
# Prediction Output
# -----------------------------
st.subheader("🔮 Predicted PM2.5 Level:")
st.success(f"{predicted_pm25:.2f} µg/m³")

# -----------------------------
# 📊 Section: Charts & Visuals
# -----------------------------
st.header("📈 Data Visualisations")

# Line Chart – Monthly PM2.5 Trend
st.markdown("### 📅 Monthly PM2.5 Trend")
df.set_index("datetime", inplace=True)
monthly_pm = df['PM2.5'].resample('M').mean()

st.line_chart(monthly_pm)

# Histogram
st.markdown("### 📊 PM2.5 Distribution Histogram")
fig1, ax1 = plt.subplots()
sns.histplot(df['PM2.5'], bins=50, kde=True, ax=ax1, color='skyblue')
plt.xlabel("PM2.5 (µg/m³)")
plt.ylabel("Frequency")
plt.title("Distribution of PM2.5")
st.pyplot(fig1)

# Heatmap
st.markdown("### 🔥 Correlation Heatmap")
fig2, ax2 = plt.subplots(figsize=(10, 8))
sns.heatmap(df[features + ['PM2.5']].corr(), annot=True, fmt=".2f", cmap='coolwarm', ax=ax2)
plt.title("Correlation Matrix")
st.pyplot(fig2)
#ddddddddddddddfdfdddddddddddddddd

st.title("EDA")


st.header("Correlation Heatmap")
fig2, ax2 = plt.subplots(figsize=(10, 6))
selected_cols = ['PM2.5', 'PM10', 'SO2', 'NO2', 'CO', 'O3', 'TEMP', 'PRES', 'DEWP', 'WSPM']
corr = df[selected_cols].corr()
sns.heatmap(corr, annot=True, cmap="YlOrRd", ax=ax2, linewidths=0.5)
st.pyplot(fig2)

st.header("Trend of PM2.5")
df["date"] = pd.to_datetime(df.index, errors='coerce')
df = df.set_index("date")
monthly_avg = df["PM2.5"].resample("M").mean()
fig3, ax3 = plt.subplots(figsize=(10, 4))
monthly_avg.plot(ax=ax3, color="pink", marker="o", linestyle="--")
ax3.set_title("Monthly Average PM2.5 (µg/m³)", fontsize=14)
ax3.set_ylabel("PM2.5 (µg/m³)")
st.pyplot(fig3)

st.header("Dynamic Pollutant Trends")
pollutant = st.selectbox("Select a pollutant to visualize its trend", selected_cols[1:])
if pollutant:
    fig4, ax4 = plt.subplots(figsize=(10, 4))
    df[pollutant].resample("M").mean().plot(ax=ax4, color="teal", linestyle="-", marker=".")
    ax4.set_title(f"Monthly Average {pollutant}", fontsize=14)
    ax4.set_ylabel(f"{pollutant} (µg/m³ or mg/m³)")
    st.pyplot(fig4)





Overwriting app.py


In [3]:
!wget -q -O - ipv4.icanhazip.com

34.48.151.15


In [None]:

!streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[1G[0K⠸[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.48.151.15:8501[0m
[0m
[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0Kyour url is: https://forty-weeks-search.loca.lt
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['PM2.5'].fillna(df['PM2.5'].median(), inplace=True)
  monthly_pm = df['PM2.5'].resample('M').mean()
  monthly_avg = df["PM2.5"].resample("M").mean(