# Census aggregation race data
By: Sandhya Kambhampati

In [None]:
#load libraries
import pandas as pd

In [None]:
# load tract to neighborhood lookup
shapes = pd.read_csv("../data/output/la_tract_to_neighborhood_portions.csv",dtype={"tract": str})

In [None]:
len(shapes)

In [None]:
shapes.head(1)

In [None]:
# bring race data in for LA 
race = pd.read_csv("../data/input/acs5_2022_race_la_tracts.csv",dtype={"tract": str})

In [None]:
race.universe.sum()

In [None]:
race.head(1)

In [None]:
race.columns

In [None]:
col = ['universe','white_alone', 'black_alone','american_indian_and_alaska_native', 'asian_alone',
       'native_hawaiian_and_pacific_islander','some_other_race','two_or_more_races',
       'latino_alone']

In [None]:
n = pd.merge(shapes,race,on="tract",how="left")

In [None]:
len(shapes)

In [None]:
n.universe.sum()

In [None]:
n.tract.nunique()

In [None]:
race.tract.nunique()

In [None]:
shapes.tract.nunique()

In [None]:
assert len(n) == len(shapes)

In [None]:
def by_percent(row):
    for c in col:
        row[c] = row[c]*row["tract_percent"]
    return row
    

In [None]:
race_cleaned = n.apply(by_percent, axis=1)

In [None]:
race_cleaned = race_cleaned.groupby(["neighborhood"]).sum().reset_index()

In [None]:
race_cleaned[["neighborhood","universe"]]

In [None]:
race_cleaned

In [None]:
race_cleaned.universe.sum()

In [None]:
# calculate percentages for each race group         
race_cleaned["white_percent"] = race_cleaned.white_alone/race_cleaned.universe
race_cleaned["black_percent"] = race_cleaned.black_alone/race_cleaned.universe
race_cleaned["american_indian_percent"] = race_cleaned.american_indian_and_alaska_native/race_cleaned.universe
race_cleaned["asian_percent"] = race_cleaned.asian_alone/race_cleaned.universe
race_cleaned["native_hawaiian_percent"] = race_cleaned.native_hawaiian_and_pacific_islander/race_cleaned.universe
race_cleaned["other_percent"] = race_cleaned.some_other_race/race_cleaned.universe
race_cleaned["two_or_more_races_percent"] = race_cleaned.two_or_more_races/race_cleaned.universe
race_cleaned["latino_percent"] = race_cleaned.latino_alone/race_cleaned.universe
race_cleaned["non_latino_percent"]= 1-race_cleaned.latino_percent

In [None]:
race_cleaned["nonwhite_percent"]=(1-race_cleaned.white_percent)

In [None]:
race_cleaned["majority_nonwhite"] = race_cleaned.nonwhite_percent > 0.5
race_cleaned["majority_white"] = race_cleaned.white_percent  > 0.5
race_cleaned["majority_black"]= race_cleaned.black_percent > 0.5
race_cleaned["majority_asian"]= race_cleaned.asian_percent > 0.5
race_cleaned["majority_latino"] = race_cleaned.latino_percent > 0.5

In [None]:
# add in diversity index from https://www.usatoday.com/story/news/nation/2014/10/21/diversity-index-data-how-we-did-report/17432103/ & https://github.com/censusreporter/nicar16-diversity-index/blob/master/Walkthrough.ipynb
race_cleaned["diversity_index"]= 1-( 
              (race_cleaned.white_percent **2 + race_cleaned.black_percent**2 + race_cleaned.american_indian_percent**2 + race_cleaned.asian_percent**2 + race_cleaned.native_hawaiian_percent**2 ) * 
              (race_cleaned.latino_percent**2 + race_cleaned.non_latino_percent**2)
           )

In [None]:
len(race_cleaned)

In [None]:
list(race_cleaned)

In [None]:
# cut the df 
race_neighborhoods = race_cleaned[[
"neighborhood",
"universe",                           
'white_alone',
'black_alone',
'american_indian_and_alaska_native', 
'asian_alone',
'native_hawaiian_and_pacific_islander',
'some_other_race',
'two_or_more_races',
'latino_alone',
'white_percent',
'black_percent',
'american_indian_percent',
'asian_percent',
'native_hawaiian_percent',
'other_percent',
'two_or_more_races_percent',
'latino_percent',
'nonwhite_percent',
'majority_nonwhite',
'majority_white',
'majority_black',
'majority_asian',
'majority_latino',
'non_latino_percent',
'diversity_index']]

In [None]:
race_neighborhoods.rename(columns={"universe": "total_population"},inplace=True)

In [None]:
race_neighborhoods.to_csv("../data/output/race_neighborhoods.csv", index=False)

## Get top neighborhoods

In [None]:
len(race_neighborhoods)

In [None]:
race_neighborhoods.columns

### Plurality Black neighborhoods

In [None]:
race_neighborhoods.loc[
    (race_neighborhoods["black_percent"] > race_neighborhoods["white_percent"]) & 
    (race_neighborhoods["black_percent"] > race_neighborhoods["latino_percent"]) &
    (race_neighborhoods["black_percent"] > race_neighborhoods["asian_percent"])
]

In [None]:
race_neighborhoods.loc[
    (race_neighborhoods["black_percent"] > race_neighborhoods["white_percent"]) & 
    (race_neighborhoods["black_percent"] > race_neighborhoods["latino_percent"]) &
    (race_neighborhoods["black_percent"] > race_neighborhoods["asian_percent"])
]["neighborhood"].tolist()

### Majority Latino neighborhoods

In [None]:
len(race_neighborhoods.loc[race_neighborhoods["majority_latino"] == True])

In [None]:
race_neighborhoods.loc[race_neighborhoods["majority_latino"] == True]["neighborhood"].tolist()

### Majority white neighborhoods

In [None]:
len(race_neighborhoods.loc[race_neighborhoods["majority_white"] == True])

In [None]:
race_neighborhoods.loc[race_neighborhoods["majority_white"] == True]["neighborhood"].tolist()