In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import locale
from sklearn.cluster import KMeans

Assumptions / Simplifications:
1. Census data from 2016 (for Race and Ethnicity)
2. Income data from 2018
3. Averaging multiple zip codes within each neighborhood to get an average income and race data.
4. COVID data valid as of 2/10/2021


End Goal is to have a dataset that looks like this :

Neighborhood/City, %Native American, %Asian, %Black, %Hispanic, %Pacific Islanders, %Other,, %2 or More, '%White' Income, adj COVID Cases, adj COVID Deaths

# Import Data

### 1. Import Census Data (https://usc.data.socrata.com/Los-Angeles/Race-Ethnicity-LA-/jxw5-xxv5)

In [2]:
locale.setlocale( locale.LC_ALL, 'en_US.UTF-8' ) # To remove commas from numbers
dtypes = np.array([int, str, int, float, str])
Race = pd.read_csv('Race___Ethnicity__LA_.csv', dtype = dtypes)
Race.head()

Unnamed: 0,Year,Variable,Count,Percent,Neighborhood
0,2010,American Indian/Native Population,0,0.0,Tujunga
1,2010,American Indian/Native Population,31,0.83043128,Tujunga
2,2010,American Indian/Native Population,17,0.92341119,Shadow Hills
3,2010,American Indian/Native Population,0,0.0,Shadow Hills
4,2010,American Indian/Native Population,0,0.0,Sun Valley


### 2. Import Income Data (http://www.laalmanac.com/employment/em12c.php)

# Pre-Processing

### 1. Census Data

Next, we will change numerical data to ints or floats. The for loop in the end removes commas from the "counts" columns and covnerts the string to an int.

In [3]:
Data = Race.to_numpy()
temp = Data[:,[0,3]].astype(float)
Data[:,0] = temp[:,0]
Data[:,3] = temp[:,1]
# Data = ['Year (int)', 'Race (Str)', 'Count (Str)', 'Percent (float)', 'Neighborhood (Str)']

for i in range(Data.shape[0]):

    Data[i,2] = locale.atoi(Data[i,2])
    

Extracting 2016 Census Data

In [4]:
Year_2016 = np.where(Data[:,0]==2016,1,0)

In [5]:
Data_2016 = []
for i in range(Data.shape[0]):    
    if Data[i,0] == 2016 and Data[i,2] != 0:
        Data_2016 = np.append(Data_2016, Data[i,:]) 
        
Data_2016 = np.reshape( Data_2016, (int(Data_2016.shape[0]/5), 5) )
print(Data_2016.shape)

(13231, 5)


In [6]:
Neighborhoods = np.unique(Data_2016[:,4])
Races = np.unique(Data_2016[:,1])
# We have 8 races: 
#.    Black Population (2161), Asian Population (2205), White Population (2306), Hispanic Population (2322)..., 
#  ...American Indian/Native Population (745), Native Hawaiian/ Other Pacific Islander Population (576)...
# ..."Population of Two or More Races(2032)", "Other Race Population (884)"    

# We have 260 Neighborhoods/ cities

In [7]:
Temp = np.zeros((Neighborhoods.shape[0],Races.shape[0]))


for N in range(Neighborhoods.shape[0]):                   # For each neighbhorhoods
    
    # Counter for each race's population
    
    t = 0 # Total
    b = 0 # Black
    h = 0 # Hispanic
    o = 0 # Other
    m = 0 # Mixed
    w = 0 # White
    p = 0 # Pacific Islanders
    n = 0 # Native Americans
    a = 0 # Asians

    for i in range(Data_2016.shape[0]):        # Go through all the dataset 
        if Data_2016[i,4] == Neighborhoods[N]: # For a particular neighborhood

            t += np.sum(Data_2016[i,2])        # Calculate total number of people in that neighborhood
            
            if Data_2016[i,1] == Races[0]:     # Find total number of Native Americans in that neighborhood
                n += np.sum(Data_2016[i,2])

            if Data_2016[i,1] == Races[1]:     # Find total number of Asian Americans in that neighborhood
                a += np.sum(Data_2016[i,2])

                
            if Data_2016[i,1] == Races[2]:     # Find total number of African Americans in that neighborhood
                b += np.sum(Data_2016[i,2])

                
            if Data_2016[i,1] == Races[3]:     # Find total number of Hispanic Americans in that neighborhood
                h += np.sum(Data_2016[i,2])

                
            if Data_2016[i,1] == Races[4]:     # Find total number of Pacific Islander in that neighborhood
                p += np.sum(Data_2016[i,2])

                
            if Data_2016[i,1] == Races[5]:     # Find total number of Other Race Americans in that neighborhood
                o += np.sum(Data_2016[i,2])

                
            if Data_2016[i,1] == Races[6]:     # Find total number of Mixed race Americans in that neighborhood
                m += np.sum(Data_2016[i,2])

                
            if Data_2016[i,1] == Races[7]:     # Find total number of White Americans in that neighborhood
                w += np.sum(Data_2016[i,2])

                
        assert(t == (n+a+b+h+p+o+m+w))      

        
    
    Temp[N,:] = np.array([n/t*100, a/t*100, b/t*100, h/t*100, p/t*100, o/t*100, m/t*100, w/t*100])
    

Check results for some specific neighborhoods

In [11]:
neighborhood = 'Santa Fe Springs'
index = np.where(Neighborhoods==neighborhood)[0][0]

for race in range(Races.shape[0]):
    
    print('Percentage of', Races[race], 'in', Neighborhoods[index], 'is ', np.round(Temp[index,race],3), '%') 



Percentage of American Indian/Native Population in Santa Fe Springs is  0.317 %
Percentage of Asian Population in Santa Fe Springs is  7.275 %
Percentage of Black Population in Santa Fe Springs is  3.516 %
Percentage of Hispanic Population in Santa Fe Springs is  79.388 %
Percentage of Native Hawaiian/Other Pacific Islander Population in Santa Fe Springs is  0.0 %
Percentage of Other Race Population in Santa Fe Springs is  0.915 %
Percentage of Population of Two or More Races in Santa Fe Springs is  0.454 %
Percentage of White Population in Santa Fe Springs is  8.134 %


### 2. Income Data