In [58]:
import pandas as pd
import numpy as np
import re

In [59]:
years = np.arange(2014,2023)
all_years = np.arange(2007,2023)
homelessVets = np.arange(2011,2023)
homelessRace = np.arange(2015, 2023)
homelessAge = np.arange(2014, 2023)

colNames = [
    'CoC Number',
    'CoC Name',
    'Overall Homeless',
    'Overall Homeless - Under 18',
    'Overall Homeless - Age 18 to 24',
    'Overall Homeless - Over 24',
    'Overall Homeless - Non-Hispanic/Non-Latino',
    'Overall Homeless - Hispanic/Latino',
    'Overall Homeless - White',
    'Overall Homeless - Black, African American, or African',
    'Overall Homeless - Asian or Asian American',
    'Overall Homeless - American Indian, Alaska Native, or Indigenous',
    'Overall Homeless - Native Hawaiian or Other Pacific Islander',
    'Overall Homeless - Multiple Races',
    'Sheltered Total Homeless',
    'Unsheltered Homeless',
    'Overall Homeless Veterans',
]

In [60]:
corrected_columns = {
        'Overall Homeless - Black or African American': 'Overall Homeless - Black, African American, or African',
        'Overall Homeless - Asian': 'Overall Homeless - Asian or Asian American',
        'Overall Homeless - American Indian or Alaska Native': 'Overall Homeless - American Indian, Alaska Native, or Indigenous'
    }

In [61]:
def curr_corrected_columns(year):
    output = {}
    for k, v in corrected_columns.items():
        output[k+f', {year}'] = v+f', {year}'
    return output

In [62]:
def edit_colNames(colNames, year):
    cols = []
    for i, col in enumerate(colNames):
        if i > 1:
            cols.append(col + f', {year}')
        else:
            cols.append(col)
    return cols

In [63]:
data = []
for y in all_years:
    original = pd.read_csv(f'data/{y}.csv')[:-4]
    edited_columns = edit_colNames(colNames, y)
    curr = original[original['CoC Number'].str.contains('CA')]
    curr_corrected_cols = curr_corrected_columns(y)
    curr = curr.rename(curr_corrected_cols, axis=1)
    curr = curr.loc[:, curr.columns.isin(edited_columns)].reset_index(drop=True)
    curr.replace(',','', regex=True, inplace=True)
    curr[curr.iloc[:,2:].columns] = curr.iloc[:,2:].astype(int)
    curr.columns = list(map(lambda x: x[:-6] if re.search(r'\d{4}', x)!=None else x, curr.columns))
    curr['Year'] = y
    data.append(curr)


In [64]:
final_data = pd.concat(data)
final_data.to_csv('data/homeless_ca.csv', index=False)

In [65]:
final_data[final_data['Year'] == 2015]

Unnamed: 0,CoC Number,CoC Name,Overall Homeless,Sheltered Total Homeless,Unsheltered Homeless,Year,Overall Homeless Veterans,Overall Homeless - Under 18,Overall Homeless - Age 18 to 24,Overall Homeless - Over 24,Overall Homeless - Non-Hispanic/Non-Latino,Overall Homeless - Hispanic/Latino,Overall Homeless - White,"Overall Homeless - Black, African American, or African",Overall Homeless - Asian or Asian American,"Overall Homeless - American Indian, Alaska Native, or Indigenous",Overall Homeless - Native Hawaiian or Other Pacific Islander,Overall Homeless - Multiple Races
0,CA-500,San Jose/Santa Clara City & County CoC,6556,1929,4627,2015,703.0,587.0,897.0,5072.0,3907.0,2649.0,2889.0,1172.0,176.0,451.0,84.0,1784.0
1,CA-501,San Francisco CoC,6775,2417,4358,2015,557.0,444.0,1389.0,4942.0,5525.0,1250.0,2639.0,2232.0,189.0,273.0,133.0,1309.0
2,CA-502,Oakland Berkeley/Alameda County CoC,4040,1643,2397,2015,388.0,541.0,414.0,3085.0,3280.0,760.0,1279.0,2143.0,106.0,104.0,43.0,365.0
3,CA-503,Sacramento City & County CoC,2659,1711,948,2015,313.0,452.0,303.0,1904.0,2208.0,451.0,1485.0,921.0,22.0,56.0,34.0,141.0
4,CA-504,Santa Rosa Petaluma/Sonoma County CoC,3097,1037,2060,2015,215.0,263.0,647.0,2187.0,2498.0,599.0,2048.0,199.0,42.0,227.0,68.0,513.0
5,CA-505,Richmond/Contra Costa County CoC,2031,704,1327,2015,122.0,249.0,173.0,1609.0,1604.0,427.0,1042.0,659.0,37.0,148.0,24.0,121.0
6,CA-506,Salinas/Monterey San Benito Counties CoC,2959,852,2107,2015,176.0,322.0,310.0,2327.0,1883.0,1076.0,1643.0,374.0,42.0,52.0,42.0,806.0
7,CA-507,Marin County CoC,1318,483,835,2015,66.0,144.0,377.0,797.0,1091.0,227.0,795.0,256.0,20.0,65.0,9.0,173.0
8,CA-508,Watsonville/Santa Cruz City & County CoC,1964,610,1354,2015,151.0,297.0,274.0,1393.0,1219.0,745.0,1253.0,115.0,48.0,142.0,24.0,382.0
9,CA-509,Mendocino County CoC,947,152,795,2015,39.0,63.0,102.0,782.0,820.0,127.0,718.0,12.0,1.0,103.0,14.0,99.0


In [310]:
final_data.groupby('Year').sum()

  final_data.groupby('Year').sum()


Unnamed: 0_level_0,Overall Homeless,Sheltered Total Homeless,Unsheltered Homeless,Overall Homeless Veterans,Overall Homeless - Under 18,Overall Homeless - Age 18 to 24,Overall Homeless - Over 24,Overall Homeless - Non-Hispanic/Non-Latino,Overall Homeless - Hispanic/Latino,Overall Homeless - White,Overall Homeless - Native Hawaiian or Other Pacific Islander,Overall Homeless - Multiple Races,"Overall Homeless - Black, African American, or African",Overall Homeless - Asian or Asian American,"Overall Homeless - American Indian, Alaska Native, or Indigenous"
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2007,138986,48511,90475,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2008,136531,46965,89566,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009,123678,50777,72901,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2010,123480,50899,72581,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011,125128,50691,74437,16783.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2012,120098,45890,74208,14611.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013,118552,45554,72998,12895.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2014,113952,42515,71437,12096.0,15057.0,13853.0,85042.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015,115738,42039,73699,11311.0,13657.0,11645.0,90436.0,84183.0,31555.0,64340.0,1225.0,11003.0,0.0,0.0,0.0
2016,118142,39752,78390,9612.0,12828.0,12160.0,93154.0,85437.0,32705.0,67669.0,1284.0,10740.0,0.0,0.0,0.0


In [311]:
counties = final_data['CoC Name'].unique()
counties.sort()
counties

array(['Alpine Inyo Mono Counties CoC',
       'Amador Calaveras Mariposa Tuolumne Counties CoC',
       'Bakersfield/Kern County CoC', 'Chico Paradise/Butte County CoC',
       'Colusa Glenn Trinity Counties CoC', 'Daly/San Mateo County CoC',
       'Davis Woodland/Yolo County CoC', 'Del Norte County CoC',
       'El Dorado County CoC', 'Fresno City & County/Madera County CoC',
       'Glendale CoC', 'Humboldt County CoC', 'Imperial County CoC',
       'Lake County CoC', 'Long Beach CoC',
       'Los Angeles City & County CoC', 'Marin County CoC',
       'Mendocino County CoC', 'Merced City & County CoC',
       'Napa City & County CoC', 'Nevada County CoC',
       'Oakland Berkeley/Alameda County CoC',
       'Oxnard San Buenaventura/Ventura County CoC', 'Pasadena CoC',
       'Redding/Shasta Siskiyou Lassen Plumas Del Norte Modoc Sierra Counties CoC',
       'Richmond/Contra Costa County CoC', 'Riverside City & County CoC',
       'Roseville Rocklin/Placer County CoC',
       'Sacra