<a href="https://colab.research.google.com/github/bhuguvi26/Census-Data-Standardization-and-Analysis-Pipeline/blob/main/Census_Data_Standardization_and_Analysis_Pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:

# =========================
# Colab Single-Cell Runner
# =========================

# 1) Install dependencies (Cloudflared avoids ngrok auth token requirement)
!pip -q install pandas requests python-docx pymongo streamlit openpyxl
!wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O cloudflared
!chmod +x cloudflared

# 2) MongoDB Atlas credentials (provided)
MONGO_URI = "mongodb+srv://bhuvan:12345@cluster0.obh30fm.mongodb.net/censusdb?retryWrites=true&w=majority"

# 3) Write full pipeline + Streamlit dashboard to app.py
#    IMPORTANT: use r''' ... ''' for a raw, triple-quoted multi-line string
import pandas as pd
import numpy as np
import requests
from io import BytesIO
from docx import Document
from pymongo import MongoClient
import sqlite3
import streamlit as st

# =============================
# CONFIGURATION
# =============================
EXCEL_URL = "https://raw.githubusercontent.com/bhuguvi26/Census-Data-Standardization-and-Analysis-Pipeline/main/census_2011%20(1).xlsx"
TELANGANA_DOCX_URL = "https://raw.githubusercontent.com/bhuguvi26/Census-Data-Standardization-and-Analysis-Pipeline/main/Telangana%20(1).docx"

# MongoDB credentials
MONGO_URI = "mongodb+srv://bhuvan:12345@cluster0.obh30fm.mongodb.net/censusdb?retryWrites=true&w=majority"
MONGO_DB = "censusdb"
MONGO_COLLECTION = "census"

# SQLite database file
SQLITE_DB = "census.db"
FACTS_TABLE = "census_clean"

# =============================
# UTILITY FUNCTIONS
# =============================
def fetch_bytes(url: str) -> BytesIO:
    resp = requests.get(url, timeout=60)
    resp.raise_for_status()
    return BytesIO(resp.content)

def format_state_name(name: str) -> str:
    if pd.isna(name): return name
    name = str(name).replace("&", "and")
    words = name.split()
    return " ".join([w.capitalize() if w.lower() != "and" else "and" for w in words])

def safe_rename_columns(df: pd.DataFrame, mapping: dict) -> pd.DataFrame:
    existing = {k: v for k, v in mapping.items() if k in df.columns}
    df = df.rename(columns=existing)
    df.columns = [c[:60] for c in df.columns]  # <= 60 chars
    return df

def compute_missing_pct(df: pd.DataFrame) -> pd.Series:
    return df.isna().mean().mul(100).round(2)

def fill_from_sum(df: pd.DataFrame, target: str, parts: list):
    if target not in df.columns: return
    if not set(parts).issubset(df.columns): return
    sum_series = df[parts].sum(axis=1, min_count=len(parts))
    df[target] = df[target].fillna(sum_series)

# =============================
# DATA INGESTION
# =============================
excel_bytes = fetch_bytes(EXCEL_URL)
docx_bytes  = fetch_bytes(TELANGANA_DOCX_URL)

df = pd.read_excel(excel_bytes, engine="openpyxl")
telangana_doc = Document(docx_bytes)
telangana_districts = [p.text.strip() for p in telangana_doc.paragraphs if p.text.strip()]

# =============================
# TASK 1: Rename Column Names
# =============================
rename_map = {
    "State name": "State_UT",
    "District name": "District",
    "Male_Literate": "Literate_Male",
    "Female_Literate": "Literate_Female",
    "Rural_Households": "Households_Rural",
    "Urban_Households": "Households_Urban",
    "Urban_ Households": "Households_Urban",  # handle stray space
    "Age_Group_0_29": "Young_and_Adult",
    "Age_Group_30_49": "Middle_Aged",
    "Age_Group_50": "Senior_Citizen",
    "Age not stated": "Age_Not_Stated",
}
df = safe_rename_columns(df, rename_map)

# =============================
# TASK 2: Rename State/UT Names
# =============================
if "State_UT" in df.columns:
    df["State_UT"] = df["State_UT"].apply(format_state_name)

# =============================
# TASK 3: New State/UT Formation
# =============================
# Telangana from Andhra Pradesh (2014)
if {"District","State_UT"}.issubset(df.columns):
    df.loc[(df["District"].isin(telangana_districts)) & (df["State_UT"].str.strip().str.lower()=="andhra pradesh"), "State_UT"] = "Telangana"

# Ladakh from Jammu and Kashmir (2019)
ladakh_districts = ["Leh", "Kargil"]
df.loc[(df["District"].isin(ladakh_districts)) & (df["State_UT"].str.strip().str.lower()=="jammu and kashmir"), "State_UT"] = "Ladakh"

# =============================
# TASK 4: Missing Data Handling
# =============================
missing_before = compute_missing_pct(df)
fill_from_sum(df, "Population", ["Male", "Female"])
fill_from_sum(df, "Literate", ["Literate_Male", "Literate_Female"])
fill_from_sum(df, "Population", ["Young_and_Adult", "Middle_Aged", "Senior_Citizen", "Age_Not_Stated"])
fill_from_sum(df, "Households", ["Households_Rural", "Households_Urban"])
missing_after = compute_missing_pct(df)

print("== Missing data BEFORE (%) ==")
print(missing_before)
print("\n== Missing data AFTER (%) ==")
print(missing_after)
print("\n== Sample of processed data ==")
print(df.head())

# =============================
# TASK 5: Save Data to MongoDB
# =============================
try:
    mongo_client = MongoClient(MONGO_URI)
    mongo_db = mongo_client[MONGO_DB]
    mongo_coll = mongo_db[MONGO_COLLECTION]
    # deterministic _id for upserts
    if {"State_UT","District"}.issubset(df.columns):
        df["_id"] = df["State_UT"].astype(str).str.strip() + "||" + df["District"].astype(str).str.strip()
    else:
        df["_id"] = df.index.astype(str)
    mongo_coll.delete_many({})
    mongo_coll.insert_many(df.to_dict(orient="records"))
    print("‚úÖ Data uploaded to MongoDB successfully.")
except Exception as e:
    print("‚ùå MongoDB upload failed:", e)

# =============================
# TASK 6: Upload to SQLite with constraints
# =============================
conn = sqlite3.connect(SQLITE_DB)
conn.execute("PRAGMA foreign_keys = ON;")
df.to_sql(FACTS_TABLE, conn, if_exists="replace", index=False)

try:
    # States dimension
    conn.execute("""CREATE TABLE IF NOT EXISTS states (State_UT TEXT PRIMARY KEY);""")
    # Districts dim with FK
    conn.execute("""
        CREATE TABLE IF NOT EXISTS districts (
            District TEXT NOT NULL,
            State_UT TEXT NOT NULL,
            PRIMARY KEY(State_UT, District),
            FOREIGN KEY(State_UT) REFERENCES states(State_UT)
        );
    """)
    states = (pd.DataFrame({"State_UT": sorted(df["State_UT"].dropna().unique())})
              if "State_UT" in df.columns else pd.DataFrame({"State_UT": []}))
    states.to_sql("states", conn, if_exists="replace", index=False)
    districts = (df[["State_UT","District"]].drop_duplicates()
                 if {"State_UT","District"}.issubset(df.columns)
                 else pd.DataFrame(columns=["State_UT","District"]))
    districts.to_sql("districts", conn, if_exists="replace", index=False)

    # Enforce uniqueness on facts
    conn.execute(f"CREATE UNIQUE INDEX IF NOT EXISTS idx_{FACTS_TABLE}_state_district ON {FACTS_TABLE}(State_UT, District);")
    print("‚úÖ Data uploaded to SQLite with dimensions and constraints.")
except Exception as e:
    print("‚ö†Ô∏è SQLite constraint setup warning:", e)

# =============================
# TASK 7: Streamlit Dashboard ‚Äî ALL QUERIES
# =============================
def get_available_columns(conn, table_name):
    cols = pd.read_sql_query(f"PRAGMA table_info({table_name});", conn)["name"].tolist()
    return set(cols)

def choose_sum_cols(available, candidates, alias_map=None):
    exprs = []
    for c in candidates:
        if c in available:
            alias = alias_map.get(c, c) if alias_map else c
            exprs.append(f"SUM(COALESCE({c},0)) AS {alias}")
    return exprs

def run_dashboard():
    st.title("üß≠ Census Data Analysis Dashboard (Colab)")

    conn = sqlite3.connect(SQLITE_DB)
    available = get_available_columns(conn, FACTS_TABLE)

    with st.expander("üîé Filters"):
        # Optional dropdown populated from DB
        states_list = pd.read_sql_query(f"SELECT DISTINCT State_UT FROM {FACTS_TABLE} ORDER BY State_UT;", conn)["State_UT"].tolist() if "State_UT" in available else [""]
        state_filter = st.selectbox("Filter by State/UT", [""] + states_list, index=0)
        district_filter = st.text_input("Filter by District (optional)")

    clauses, params = [], []
    if state_filter.strip(): clauses.append("State_UT = ?"); params.append(state_filter.strip())
    if district_filter.strip(): clauses.append("District = ?"); params.append(district_filter.strip())
    where_sql = (" WHERE " + " AND ".join(clauses)) if clauses else ""

    candidates = {
        "workers": ["Workers_Male", "Workers_Female"],
        "lpg_png": ["Households_LPG_PNG", "Households_Cooking_LPG_PNG"],
        "religion": ["Hindus", "Muslims", "Christians", "Sikhs", "Buddhists", "Jains", "Others_Religions", "Religion_Not_Stated"],
        "internet": ["Households_Internet", "Households_Internet_Access"],
        "education": ["Below_Primary","Primary","Middle","Secondary","Higher_Secondary","Graduate","Post_Graduate","Literates","Illiterates"],
        "transport_media": ["Households_Bicycle","Households_Car","Households_Radio","Households_Television","Households_Two_Wheeler","Households_Computer","Households_Mobile"],
        "house_conditions": ["Houses_Dilapidated","Houses_Separate_Kitchen","Houses_Bathing_Facility","Houses_Latrine_Facility"],
        "hh_size": ["HH_Size_1","HH_Size_2","HH_Size_3_to_5","HH_Size_6_to_8","HH_Size_9_plus"],
        "latrine_within": ["Households_Latrine_Within","Households_Latrine_Within_Premises"],
        "tenure": ["Households_Owned","Households_Rented"],
        "latrine_types": ["Latrine_Pit","Latrine_Flush","Latrine_Others"],
        "water_near": ["Households_Water_Near","Households_Drinking_Water_Near"],
        "power_parity": [
            "Power_Parity_Rs_330000_425000",
            "Power_Parity_Rs_425000_545000",
            "Power_Parity_Rs_330000_545000",
            "Power_Parity_Above_Rs_545000",
            "Total_Power_Parity"
        ],
        "married_couples": ["Married_Couples_Size_1","Married_Couples_Size_2","Married_Couples_Size_3_to_5","Married_Couples_Size_6_plus"],
        "bpl": ["BPL_Households","Households_Below_Poverty_Line"]
    }

    labels = [
        "Total population of each district",
        "Literate males and females in each district",
        "Percentage of workers (male + female) in each district",
        "Households with LPG/PNG as cooking fuel in each district",
        "Religious composition of each district",
        "Households with internet access in each district",
        "Educational attainment distribution in each district",
        "Households with transport/media assets (bicycle, car, radio, TV, etc.) in each district",
        "Condition of occupied census houses (kitchen, bath, latrine, dilapidated) in each district",
        "Household size distribution in each district",
        "Total number of households in each state",
        "Households with latrine facility within premises in each state",
        "Average household size in each state",
        "Owned vs rented households in each state",
        "Distribution of latrine types in each state",
        "Households with drinking water sources near premises in each state",
        "Average household income distribution (Power Parity) in each state",
        "Percentage of married couples by household size in each state",
        "Households below the poverty line in each state (Power Parity/BPL)",
        "Overall literacy rate (percentage of literate population) in each state"
    ]
    selected = st.selectbox("üìå Select a query", labels)

    if selected == "Total population of each district":
        population_expr = "COALESCE(Population, COALESCE(Male,0)+COALESCE(Female,0))"
        q = f"SELECT District, SUM({population_expr}) AS Total_Population FROM {FACTS_TABLE}{where_sql} GROUP BY District ORDER BY District;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Literate males and females in each district":
        exprs = choose_sum_cols(available, ["Literate_Male","Literate_Female"])
        select_sql = ", ".join(["District"] + (exprs or ["SUM(0) AS Literate_Male","SUM(0) AS Literate_Female"]))
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY District ORDER BY District;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Percentage of workers (male + female) in each district":
        workers_expr = "+".join([f"COALESCE({c},0)" for c in candidates["workers"] if c in available]) or "NULL"
        population_expr = "COALESCE(Population, COALESCE(Male,0)+COALESCE(Female,0))"
        q = f"""SELECT District,
                       CASE WHEN SUM({population_expr})>0 AND {workers_expr} IS NOT NULL
                            THEN ROUND(100.0 * SUM({workers_expr}) / SUM({population_expr}), 2)
                            ELSE NULL END AS Workers_Percentage
                FROM {FACTS_TABLE}{where_sql} GROUP BY District ORDER BY District;"""
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Households with LPG/PNG as cooking fuel in each district":
        exprs = choose_sum_cols(available, candidates["lpg_png"], alias_map={"Households_Cooking_LPG_PNG":"Households_LPG_PNG"})
        select_sql = ", ".join(["District"] + (exprs or ["SUM(0) AS Households_LPG_PNG"]))
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY District ORDER BY District;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Religious composition of each district":
        exprs = choose_sum_cols(available, candidates["religion"])
        select_sql = ", ".join(["District"] + (exprs or ["SUM(0) AS No_Religion_Columns"]))
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY District ORDER BY District;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Households with internet access in each district":
        exprs = choose_sum_cols(available, candidates["internet"])
        select_sql = ", ".join(["District"] + (exprs or ["SUM(0) AS Households_Internet"]))
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY District ORDER BY District;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Educational attainment distribution in each district":
        exprs = choose_sum_cols(available, candidates["education"])
        select_sql = ", ".join(["District"] + (exprs or ["SUM(0) AS Education_Data"]))
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY District ORDER BY District;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Households with transport/media assets (bicycle, car, radio, TV, etc.) in each district":
        exprs = choose_sum_cols(available, candidates["transport_media"])
        select_sql = ", ".join(["District"] + (exprs or ["SUM(0) AS Transport_Media"]))
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY District ORDER BY District;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Condition of occupied census houses (kitchen, bath, latrine, dilapidated) in each district":
        exprs = choose_sum_cols(available, candidates["house_conditions"])
        select_sql = ", ".join(["District"] + (exprs or ["SUM(0) AS House_Conditions"]))
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY District ORDER BY District;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Household size distribution in each district":
        exprs = choose_sum_cols(available, candidates["hh_size"])
        select_sql = ", ".join(["District"] + (exprs or ["SUM(0) AS HH_Size"]))
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY District ORDER BY District;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Total number of households in each state":
        households_expr = "COALESCE(Households, COALESCE(Households_Rural,0)+COALESCE(Households_Urban,0))"
        q = f"SELECT State_UT, SUM({households_expr}) AS Total_Households FROM {FACTS_TABLE}{where_sql} GROUP BY State_UT ORDER BY State_UT;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Households with latrine facility within premises in each state":
        exprs = choose_sum_cols(available, candidates["latrine_within"], alias_map={"Households_Latrine_Within_Premises":"Households_Latrine_Within"})
        select_sql = ", ".join(["State_UT"] + (exprs or ["SUM(0) AS Households_Latrine_Within"]))
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY State_UT ORDER BY State_UT;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Average household size in each state":
        households_expr = "COALESCE(Households, COALESCE(Households_Rural,0)+COALESCE(Households_Urban,0))"
        population_expr = "COALESCE(Population, COALESCE(Male,0)+COALESCE(Female,0))"
        q = f"""SELECT State_UT,
                       CASE WHEN SUM({households_expr})>0
                            THEN ROUND(1.0 * SUM({population_expr}) / SUM({households_expr}), 2)
                            ELSE NULL END AS Avg_HH_Size
                FROM {FACTS_TABLE}{where_sql} GROUP BY State_UT ORDER BY State_UT;"""
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Owned vs rented households in each state":
        exprs = choose_sum_cols(available, ["Households_Owned","Households_Rented"])
        select_sql = ", ".join(["State_UT"] + (exprs or ["SUM(0) AS Owned","SUM(0) AS Rented"]))
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY State_UT ORDER BY State_UT;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Distribution of latrine types in each state":
        exprs = choose_sum_cols(available, ["Latrine_Pit","Latrine_Flush","Latrine_Others"])
        select_sql = ", ".join(["State_UT"] + (exprs or ["SUM(0) AS Latrine_Types"]))
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY State_UT ORDER BY State_UT;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Households with drinking water sources near premises in each state":
        exprs = choose_sum_cols(available, ["Households_Water_Near","Households_Drinking_Water_Near"])
        select_sql = ", ".join(["State_UT"] + (exprs or ["SUM(0) AS Water_Near"]))
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY State_UT ORDER BY State_UT;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Average household income distribution (Power Parity) in each state":
        exprs = choose_sum_cols(available, [
            "Power_Parity_Rs_330000_425000",
            "Power_Parity_Rs_425000_545000",
            "Power_Parity_Rs_330000_545000",
            "Power_Parity_Above_Rs_545000",
            "Total_Power_Parity"
        ])
        select_sql = ", ".join(["State_UT"] + (exprs or ["SUM(0) AS Power_Parity"]))
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY State_UT ORDER BY State_UT;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Percentage of married couples by household size in each state":
        couples_cols = [c for c in ["Married_Couples_Size_1","Married_Couples_Size_2","Married_Couples_Size_3_to_5","Married_Couples_Size_6_plus"] if c in available]
        base_sum = "+".join([f"COALESCE({c},0)" for c in couples_cols]) or "NULL"
        parts = ["State_UT"]
        if couples_cols:
            for c in couples_cols:
                parts.append(f"CASE WHEN SUM({base_sum})>0 THEN ROUND(100.0 * SUM(COALESCE({c},0)) / SUM({base_sum}),2) ELSE NULL END AS {c}_Pct")
        else:
            parts.append("NULL AS Married_Couples_Pct")
        select_sql = ", ".join(parts)
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY State_UT ORDER BY State_UT;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Households below the poverty line in each state (Power Parity/BPL)":
        exprs = choose_sum_cols(available, ["BPL_Households","Households_Below_Poverty_Line"])
        select_sql = ", ".join(["State_UT"] + (exprs or ["SUM(0) AS BPL_Households"]))
        q = f"SELECT {select_sql} FROM {FACTS_TABLE}{where_sql} GROUP BY State_UT ORDER BY State_UT;"
        df_res = pd.read_sql_query(q, conn, params=params)

    elif selected == "Overall literacy rate (percentage of literate population) in each state":
        literate_expr = "COALESCE(Literate, COALESCE(Literate_Male,0)+COALESCE(Literate_Female,0))"
        population_expr = "COALESCE(Population, COALESCE(Male,0)+COALESCE(Female,0))"
        q = f"""SELECT State_UT,
                       CASE WHEN SUM({population_expr}) > 0
                            THEN ROUND(100.0 * SUM({literate_expr}) / SUM({population_expr}), 2)
                            ELSE NULL END AS Literacy_Rate_Percent
                FROM {FACTS_TABLE}{where_sql} GROUP BY State_UT ORDER BY State_UT;"""
        df_res = pd.read_sql_query(q, conn, params=params)

    else:
        df_res = pd.DataFrame()

    st.subheader("Results")
    if df_res.empty:
        st.info("No results or columns missing for this query.")
    else:
        st.dataframe(df_res)

    if not df_res.empty:
        st.download_button(
            "Download results as CSV",
            df_res.to_csv(index=False),
            file_name=f"{selected.replace(' ','_')}.csv",
            mime="text/csv"
        )

# Run dashboard when script executed by Streamlit
run_dashboard()

with open("app.py", "w") as f:
    f.write(app_code)

# 4) Start Streamlit in the background (port 8501)
import os, subprocess, time, re
os.environ["STREAMLIT_SERVER_PORT"] = "8501"
streamlit_proc = subprocess.Popen(["streamlit", "run", "app.py"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# 5) Start Cloudflared tunnel to expose the Streamlit server
cloudflared_proc = subprocess.Popen(["./cloudflared", "tunnel", "--url", "http://localhost:8501", "--no-autoupdate"],
                                    stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

print("‚è≥ Starting Streamlit & Cloudflared...")
public_url = None
start_time = time.time()
while time.time() - start_time < 60:  # wait up to 60 seconds
    line = cloudflared_proc.stdout.readline()
    if not line:
        time.sleep(0.5)
        continue
    m = re.search(r'https://[-\w\.]+\.trycloudflare\.com', line)
    if m:
        public_url = m.group(0)
        break

if public_url:
    print("üåê Public URL:", public_url)
    print("‚úÖ Open this URL to view the Streamlit dashboard.")
else:
    print("‚ùå Could not obtain public URL from Cloudflared logs. Scroll up to see log output.")
    print("Tip: Re-run the cell; sometimes Cloudflared takes a moment to provision the URL.")

print("‚ÑπÔ∏è To stop: Runtime ‚Üí Interrupt execution, or run:")
print("    streamlit_proc.terminate(); cloudflared_proc.terminate()")


cloudflared: Text file busy
== Missing data BEFORE (%) ==
District code                    0.00
State_UT                         0.00
District                         0.00
Population                       4.69
Male                             4.69
                                 ... 
Power_Parity_Rs_330000_425000    5.16
Power_Parity_Rs_425000_545000    4.69
Power_Parity_Rs_330000_545000    3.59
Power_Parity_Above_Rs_545000     4.69
Total_Power_Parity               5.00
Length: 118, dtype: float64

== Missing data AFTER (%) ==
District code                    0.00
State_UT                         0.00
District                         0.00
Population                       0.16
Male                             4.69
                                 ... 
Power_Parity_Rs_330000_425000    5.16
Power_Parity_Rs_425000_545000    4.69
Power_Parity_Rs_330000_545000    3.59
Power_Parity_Above_Rs_545000     4.69
Total_Power_Parity               5.00
Length: 118, dtype: float64

== Sample of proces



‚úÖ Data uploaded to MongoDB successfully.
‚úÖ Data uploaded to SQLite with dimensions and constraints.


2025-11-30 22:31:37.306 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-11-30 22:31:37.322 Session state does not function when running a script without `streamlit run`


‚è≥ Starting Streamlit & Cloudflared...
üåê Public URL: https://boys-specials-photographer-creature.trycloudflare.com
‚úÖ Open this URL to view the Streamlit dashboard.
‚ÑπÔ∏è To stop: Runtime ‚Üí Interrupt execution, or run:
    streamlit_proc.terminate(); cloudflared_proc.terminate()
