In [9]:
import pandas as pd


In [10]:
# Loading average EJscreen data (tract-based)
avg_df = pd.read_csv("final_avg_EJ_LA_2018_2022.csv", dtype={"ID": str})

# Load crosswalk
crosswalk = pd.read_csv("LA_ZIP_TRACT_2022.csv", dtype={"TRACT": str, "ZIP": str})

# Convert ID to string and create TRACT column
avg_df["ID"] = avg_df["ID"].astype(str)
avg_df['TRACT'] = avg_df['ID'].str[:-1]

# Convert to tract level
avg_df['TRACT'] = avg_df['ID'].str[:-1]

# Strip whitespace from both dataframes (common issue!)
avg_df['TRACT'] = avg_df['TRACT'].str.strip()
crosswalk['TRACT'] = crosswalk['TRACT'].str.strip()
crosswalk['ZIP'] = crosswalk['ZIP'].str.strip()

# Check for matching
print("EJScreen TRACT sample:", avg_df['TRACT'].head(3).tolist())
print("Crosswalk TRACT sample:", crosswalk['TRACT'].head(3).tolist())
common = set(avg_df['TRACT']).intersection(set(crosswalk['TRACT']))
print(f"Common tracts: {len(common)}")

EJScreen TRACT sample: ['06037101110', '06037101110', '06037101110']
Crosswalk TRACT sample: ['06037191720', '06037192300', '06037192001']
Common tracts: 690


In [11]:
# Merge
merged = avg_df.merge(crosswalk, on="TRACT", how="left")

# Check merge results
print(f"\nMerged shape: {merged.shape}")
print(f"Rows with valid ZIP: {merged['ZIP'].notna().sum()}")
print(f"Rows with missing ZIP: {merged['ZIP'].isna().sum()}")

# Show sample of successful merges
print("\nSample of rows WITH ZIP codes:")
print(merged[merged['ZIP'].notna()].head())

# Show sample of failed merges
print("\nSample of rows WITHOUT ZIP codes:")
print(merged[merged['ZIP'].isna()][['ID', 'TRACT']].head())


Merged shape: (8897, 9)
Rows with valid ZIP: 3063
Rows with missing ZIP: 5834

Sample of rows WITH ZIP codes:
                ID  avg_traffic_pct  avg_diesel_pm  avg_cancer_risk  \
987   060371397021        53.437251       0.514058        38.788648   
988   060371397022        52.375992       0.514058        38.788648   
989   060371397023        54.382954       0.514058        38.788648   
990   060371397024        35.863009       0.514058        38.788648   
1025  060371415001        92.437016       0.656463        38.528720   

      avg_resp_hazard  avg_ej_index        TRACT    ZIP  RES_RATIO  
987          0.871111      0.161290  06037139702  90049   0.000401  
988          0.871111      0.091209  06037139702  90049   0.000401  
989          0.871111      0.099937  06037139702  90049   0.000401  
990          0.871111      0.127068  06037139702  90049   0.000401  
1025         0.881323      0.109297  06037141500  90049   0.000301  

Sample of rows WITHOUT ZIP codes:
             

In [12]:
# List of environmental variables
env_vars = [
    "avg_traffic_pct",
    "avg_diesel_pm",
    "avg_cancer_risk",
    "avg_resp_hazard",
    "avg_ej_index"
]

# Only proceed with rows that have valid ZIP codes
merged_valid = merged[merged['ZIP'].notna()].copy()

# Multiply each variable by residential ratio
for col in env_vars:
        merged_valid[col + "_weighted"] = merged_valid[col] * merged_valid["RES_RATIO"]


In [13]:
 # Sum of weighted variables per ZIP
weighted_sum = merged_valid.groupby("ZIP")[[col + "_weighted" for col in env_vars]].sum()
    
# Sum of weights per ZIP
weight_sum = merged_valid.groupby("ZIP")["RES_RATIO"].sum()
    
# Divide weighted sums by total weights
zip_results = weighted_sum.div(weight_sum, axis=0)
print(f"\nFinal ZIP-level results: {len(zip_results)} ZIP codes")
print(zip_results.head())


Final ZIP-level results: 88 ZIP codes
       avg_traffic_pct_weighted  avg_diesel_pm_weighted  \
ZIP                                                       
90001                 73.045085                0.929436   
90002                 82.047265                0.942855   
90003                 89.026533                1.014228   
90004                 83.429716                0.848405   
90005                 73.108820                0.911116   

       avg_cancer_risk_weighted  avg_resp_hazard_weighted  \
ZIP                                                         
90001                 44.828290                  1.200063   
90002                 44.056525                  1.233882   
90003                 44.795534                  1.245435   
90004                 46.020603                  1.073375   
90005                 47.856590                  1.152458   

       avg_ej_index_weighted  
ZIP                           
90001               0.724996  
90002               0.7426

In [14]:
zip_results.to_csv("cleaned_ZIP_level_EJ_Weighted.csv", index=True)
print("cleaned_ZIP_level_EJ_Weighted.csv")

cleaned_ZIP_level_EJ_Weighted.csv
