## Connecting to the database

In [12]:
# Connect to the database and create a cursor
import psycopg2
import pandas as pd
import numpy as np

db_params = {
    "dbname": "nsf_cosea_local",
    "user": "linnerlek",
    "password": "",
    "host": "localhost", 
    "port": "5432"
}

conn = psycopg2.connect(**db_params)
cur = conn.cursor()


## Calculating Weighted Median Income

In [None]:
query = """
SELECT 
    cbg."GEOID", 
    cbg."UNIQUESCHOOLID", 
    cbg."Total_Population", 
    income."SE_A14024_001" AS median_income
FROM "2024".tbl_cbg_finalassignment cbg
JOIN census.household_income income
ON cbg."GEOID" = income."Geo_FIPS" 
WHERE cbg."Total_Population" IS NOT NULL AND income."SE_A14024_001" IS NOT NULL;
"""


# Load data into DataFrame
df = pd.read_sql(query, conn)

# Close connection after retrieving data
conn.close()

# Ensure numeric types
df["Total_Population"] = pd.to_numeric(df["Total_Population"], errors="coerce")
df["median_income"] = pd.to_numeric(df["median_income"], errors="coerce")

# Drop any rows with missing values after conversion
df = df.dropna()

# Compute Weighted Median Income per School
def weighted_median_income(group):
    sorted_group = group.sort_values("median_income")
    sorted_group["cumulative_population"] = sorted_group["Total_Population"].cumsum()
    total_population = sorted_group["Total_Population"].sum()

    # Find where cumulative population crosses 50% of total
    median_threshold = 0.5 * total_population
    Ilow = sorted_group[sorted_group["cumulative_population"] <= median_threshold].iloc[-1]
    Ihigh = sorted_group[sorted_group["cumulative_population"] > median_threshold].iloc[0]

    # Compute interpolated WMI
    WMI = Ilow["median_income"] + ((median_threshold - Ilow["cumulative_population"]) /
                                    (Ihigh["cumulative_population"] - Ilow["cumulative_population"])) * \
                                    (Ihigh["median_income"] - Ilow["median_income"])
    return WMI

# Compute metrics per school
school_wmi = df.groupby("UNIQUESCHOOLID").apply(weighted_median_income).reset_index()
school_wmi.columns = ["UNIQUESCHOOLID", "Weighed_Median_Income"]

# Compute average median income per school (weighted mean)
school_avg_income = df.groupby("UNIQUESCHOOLID").apply(lambda g: np.average(g["median_income"], weights=g["Total_Population"])).reset_index()
school_avg_income.columns = ["UNIQUESCHOOLID", "Avg_median_income"]

# Compute standard deviation per school (weighted std deviation)
def weighted_std(group):
    mean = np.average(group["median_income"], weights=group["Total_Population"])
    variance = np.average((group["median_income"] - mean) ** 2, weights=group["Total_Population"])
    return np.sqrt(variance)

school_std_income = df.groupby("UNIQUESCHOOLID").apply(weighted_std).reset_index()
school_std_income.columns = ["UNIQUESCHOOLID", "StdDev_median_income"]

# Merge results
final_results = school_wmi.merge(school_avg_income, on="UNIQUESCHOOLID").merge(school_std_income, on="UNIQUESCHOOLID")

# Print the results for review
print(final_results)


  df = pd.read_sql(query, conn)


    UNIQUESCHOOLID  Weighed_Median_Income  Avg_median_income  \
0         06010103           20248.000000       23408.360197   
1         06020103           23811.928962       22692.115728   
2         06030302           19980.000000       21573.481766   
3         06050189           26905.974232       31702.210411   
4         06060199           24248.049027       29014.584508   
..             ...                    ...                ...   
358       07910301           19760.636964       20551.129335   
359       07920273           26388.678523       27652.278570   
360       78200108           33927.928335       36331.002979   
361       78200613           26537.000000       34655.016984   
362       78200618           27767.776971       32763.911208   

     StdDev_median_income  
0             7591.176540  
1             4048.253395  
2             9649.968596  
3            27886.299641  
4             8895.236542  
..                    ...  
358           8579.610815  
359    

  school_wmi = df.groupby("UNIQUESCHOOLID").apply(weighted_median_income).reset_index()
  school_avg_income = df.groupby("UNIQUESCHOOLID").apply(lambda g: np.average(g["median_income"], weights=g["Total_Population"])).reset_index()
  school_std_income = df.groupby("UNIQUESCHOOLID").apply(weighted_std).reset_index()


In [None]:

# Reconnect to database for inserting results
conn = psycopg2.connect(**db_params)
cursor = conn.cursor()

# Update tbl_approvedschools with computed values
for _, row in final_results.iterrows():
    cursor.execute("""
        UPDATE "2024".tbl_approvedschools
        SET 
            "Weighed_Median_Income" = %s,
            "Avg_median_income" = %s,
            "StdDev_median_income" = %s
        WHERE "UNIQUESCHOOLID" = %s;
    """, (row["Weighed_Median_Income"], row["Avg_median_income"], row["StdDev_median_income"], row["UNIQUESCHOOLID"]))

# Commit changes and close connection
conn.commit()
cursor.close()
conn.close()

print("✅ Weighted Median Income calculations complete and stored in tbl_approvedschools.")