In [23]:
# Dependencies and setup
import pandas as pd
import numpy as np
import requests
from pathlib import Path
import matplotlib.pyplot as plt

from api_keys import census_key

In [26]:
# Import unemployment rate csv file, create DataFrame, and set index to Area Type
unemployment_data = Path('resources/Local_Area_Unemployment_Statistics__LAUS___Annual_Average.csv')
unemployment_df = pd.read_csv(unemployment_data)
unemployment_df = unemployment_df.set_index('Area Type')
unemployment_df

# Filter Area Type to only include County and State data and reduce columns
area_type_df = unemployment_df.loc[['State','County'],
                                   ['Area Name', 'Year', 'Labor Force', 'Employment', 'Unemployment','Unemployment Rate']]
area_type_df

# Group by Year and calculate averages
year_grouped = area_type_df.groupby(['Year', 'Area Name']).mean()
year_grouped

# Reduce to only 2017-2022
recent = year_grouped.loc[[2017, 2018, 2019, 2020, 2021, 2022]]
recent

Unnamed: 0_level_0,Unnamed: 1_level_0,Labor Force,Employment,Unemployment,Unemployment Rate
Year,Area Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017,Alameda County,838200.0,807100.0,31100.0,3.7
2017,Alpine County,550.0,510.0,30.0,6.2
2017,Amador County,14690.0,13960.0,740.0,5.0
2017,Butte County,102000.0,96100.0,5900.0,5.8
2017,Calaveras County,21020.0,20010.0,1000.0,4.8
...,...,...,...,...,...
2022,Tulare County,207500.0,190500.0,16900.0,8.2
2022,Tuolumne County,19880.0,18990.0,890.0,4.5
2022,Ventura County,413600.0,398400.0,15200.0,3.7
2022,Yolo County,109000.0,104600.0,4400.0,4.0


In [3]:
# Import age csv file and create DataFrame
age_data = Path('resources/experiencing_homelessness_age_demographics.csv')
age_df = pd.read_csv(age_data)
age_df

Unnamed: 0,CALENDAR_YEAR,COC_ID,COC_NAME,AGE_GROUP_PUBLIC,EXPERIENCING_HOMELESSNESS_CNT
0,2017,All,California,18-24,15862
1,2017,All,California,25-34,28339
2,2017,All,California,35-44,25556
3,2017,All,California,45-54,27441
4,2017,All,California,55-64,23220
...,...,...,...,...,...
2136,2022,CA-614,San Luis Obispo County CoC,45-54,389
2137,2022,CA-614,San Luis Obispo County CoC,55-64,360
2138,2022,CA-614,San Luis Obispo County CoC,65+,174
2139,2022,CA-614,San Luis Obispo County CoC,Under 18,742


In [4]:
# Create clean DataFrame by dropping rows with '*' in count column
clean_age_df = age_df.loc[age_df['EXPERIENCING_HOMELESSNESS_CNT']!='*']
clean_age_df

Unnamed: 0,CALENDAR_YEAR,COC_ID,COC_NAME,AGE_GROUP_PUBLIC,EXPERIENCING_HOMELESSNESS_CNT
0,2017,All,California,18-24,15862
1,2017,All,California,25-34,28339
2,2017,All,California,35-44,25556
3,2017,All,California,45-54,27441
4,2017,All,California,55-64,23220
...,...,...,...,...,...
2136,2022,CA-614,San Luis Obispo County CoC,45-54,389
2137,2022,CA-614,San Luis Obispo County CoC,55-64,360
2138,2022,CA-614,San Luis Obispo County CoC,65+,174
2139,2022,CA-614,San Luis Obispo County CoC,Under 18,742


In [5]:
# Change homeless count data type to integer
clean_age_df['EXPERIENCING_HOMELESSNESS_CNT'] = clean_age_df["EXPERIENCING_HOMELESSNESS_CNT"].astype(int)

# Group by year and COC Name and sum homeless counts
total_homeless_count = clean_age_df.groupby(['CALENDAR_YEAR', 'COC_NAME'])['EXPERIENCING_HOMELESSNESS_CNT'].sum().reset_index()
total_homeless_count

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_age_df['EXPERIENCING_HOMELESSNESS_CNT'] = clean_age_df["EXPERIENCING_HOMELESSNESS_CNT"].astype(int)


Unnamed: 0,CALENDAR_YEAR,COC_NAME,EXPERIENCING_HOMELESSNESS_CNT
0,2017,Alameda County CoC,6713
1,2017,"Alpine, Inyo, Mono Counties CoC",151
2,2017,"Amador, Calaveras, Mariposa, Tuolumne Counties...",368
3,2017,Butte County CoC,1400
4,2017,California,184234
...,...,...,...
263,2022,Stanislaus County CoC,6482
264,2022,Tehama County CoC,670
265,2022,Ventura County CoC,2919
266,2022,Yolo County CoC,1419


In [6]:
# Find all Continuums of Care in Los Angeles County
la_county_coc = total_homeless_count.loc[total_homeless_count["COC_NAME"].str.contains("Los Angeles County", case = False)]
la_county_coc

Unnamed: 0,CALENDAR_YEAR,COC_NAME,EXPERIENCING_HOMELESSNESS_CNT
9,2017,Glendale CoC (Los Angeles County),341
15,2017,Long Beach CoC (Los Angeles County),2754
16,2017,Los Angeles County CoC,57002
24,2017,Pasadena CoC (Los Angeles County),1328
53,2018,Glendale CoC (Los Angeles County),362
59,2018,Long Beach CoC (Los Angeles County),2746
60,2018,Los Angeles County CoC,76037
68,2018,Pasadena CoC (Los Angeles County),1352
97,2019,Glendale CoC (Los Angeles County),431
103,2019,Long Beach CoC (Los Angeles County),3265


In [7]:
# Use replace to group all of the Los Angeles County CoCs
cocs_to_replace = ["Glendale CoC (Los Angeles County)",
                   "Pasadena CoC (Los Angeles County)", 
                   "Long Beach CoC (Los Angeles County)"
                  ]
total_homeless_count['COC_NAME'] = total_homeless_count['COC_NAME'].replace(cocs_to_replace, "Los Angeles County CoC")
total_homeless_count

Unnamed: 0,CALENDAR_YEAR,COC_NAME,EXPERIENCING_HOMELESSNESS_CNT
0,2017,Alameda County CoC,6713
1,2017,"Alpine, Inyo, Mono Counties CoC",151
2,2017,"Amador, Calaveras, Mariposa, Tuolumne Counties...",368
3,2017,Butte County CoC,1400
4,2017,California,184234
...,...,...,...
263,2022,Stanislaus County CoC,6482
264,2022,Tehama County CoC,670
265,2022,Ventura County CoC,2919
266,2022,Yolo County CoC,1419


In [8]:
# Use Groupby again to group new Los Angeles County CoC rows
total_homeless_count = total_homeless_count.groupby(['CALENDAR_YEAR','COC_NAME'])['EXPERIENCING_HOMELESSNESS_CNT'].sum().reset_index()
total_homeless_count

Unnamed: 0,CALENDAR_YEAR,COC_NAME,EXPERIENCING_HOMELESSNESS_CNT
0,2017,Alameda County CoC,6713
1,2017,"Alpine, Inyo, Mono Counties CoC",151
2,2017,"Amador, Calaveras, Mariposa, Tuolumne Counties...",368
3,2017,Butte County CoC,1400
4,2017,California,184234
...,...,...,...
245,2022,Stanislaus County CoC,6482
246,2022,Tehama County CoC,670
247,2022,Ventura County CoC,2919
248,2022,Yolo County CoC,1419


In [9]:
# Import california population csv file and create DataFrame
population_data = Path('resources/california_population.csv')
population_df = pd.read_csv(population_data)
population_df

Unnamed: 0,COUNTY,Year,Total Population,Total Housing Units,Single Detached,Single Attached,Two to Four,Five Plus,Mobile Homes,Occupied,Vacancy Rate,Persons per Household
0,Alameda,2017,1644303,596937,315395,46333,65721,161630,7858,564303,5.50%,2.84
1,Alpine,2017,1161,1780,1052,18,45,631,34,483,72.90%,2.35
2,Amador,2017,36900,18189,14905,570,605,690,1419,15140,16.80%,2.18
3,Butte,2017,225468,98871,61689,3112,8976,10855,14239,90791,8.20%,2.42
4,Calaveras,2017,45170,27908,24256,366,676,392,2218,18371,34.20%,2.43
...,...,...,...,...,...,...,...,...,...,...,...,...
349,Tuolumne,2022,54465,31522,25225,311,1211,1437,3338,23014,27.00%,2.23
350,Ventura,2022,831533,294989,187223,33143,16048,47700,10875,281120,4.70%,2.91
351,Yolo,2022,221639,81945,49312,5107,6572,17390,3564,78135,4.60%,2.62
352,Yuba,2022,82193,30214,21764,570,2640,2356,2884,27775,8.10%,2.88


In [10]:
# Reduce columns to just county, year, and total population
population_df = population_df[['COUNTY','Year','Total Population']]
population_df

Unnamed: 0,COUNTY,Year,Total Population
0,Alameda,2017,1644303
1,Alpine,2017,1161
2,Amador,2017,36900
3,Butte,2017,225468
4,Calaveras,2017,45170
...,...,...,...
349,Tuolumne,2022,54465
350,Ventura,2022,831533
351,Yolo,2022,221639
352,Yuba,2022,82193


In [11]:
# Rename COUNTY column
population_df = population_df.rename(columns = {'COUNTY': 'County'})
population_df

Unnamed: 0,County,Year,Total Population
0,Alameda,2017,1644303
1,Alpine,2017,1161
2,Amador,2017,36900
3,Butte,2017,225468
4,Calaveras,2017,45170
...,...,...,...
349,Tuolumne,2022,54465
350,Ventura,2022,831533
351,Yolo,2022,221639
352,Yuba,2022,82193


In [29]:
#add 'County' to the county names
population_df['County'] = population_df['County'].astpye(str) + str(' County')
population_df

KeyError: 'County'