## Homeless Encampments in LA

##### Spring 2025
##### Amanda Alonzo
##### I513 Usable AI
##### Indiana University, Bloomington
 


## Add LA County Parks Data 
Source: https://geohub.lacity.org/datasets/840b3da17e844486b3bafaae6eda87d4_0/about

Feature engineer to add distance from homeless encampments to parks using latitude and longitude

In [62]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import re
import numpy as np
import math

In [63]:
park_data_path = "../Datafiles/"

generated_data_path = "../Datagen/"

In [64]:
#this is generated at enf of 1_eda.ipynb
# it has some features added for dates and close time in days
#write to file with new features added for next notebook: visualization
df = pd.read_csv(generated_data_path+'homeless_camp_closed_all_ftr_eng_1.csv', low_memory=False)
df.head(1)

Unnamed: 0,SRNumber,CreatedDate,UpdatedDate,ActionTaken,Owner,RequestType,Status,RequestSource,CreatedByUserOrganization,MobileOS,...,CD,CDMember,NC,NCName,PolicePrecinct,ClosedDate_DT,CreatedDate_DT,close_time,close_time_days,CreatedDate_YR
0,1-1523590871,12/31/2019 11:26:00 PM,01/14/2020 07:52:00 AM,SR Created,BOS,Homeless Encampment,Closed,Mobile App,Self Service,iOS,...,3.0,Bob Blumenfield,13.0,CANOGA PARK NC,TOPANGA,2020-01-14 07:51:00,2019-12-31 11:26:00,13 days 20:25:00,13.0,2019


In [65]:
df_parks = pd.read_csv(park_data_path  + "Countywide_Parks_and_Open_Space_(Public_-_Hosted).csv")

In [66]:
df_parks.head(1)

Unnamed: 0,OBJECTID,UNIT_ID,LMS_ID,PARK_NAME,PARK_LBL,ACCESS_TYP,RPT_ACRES,GIS_ACRES,AGNCY_NAME,AGNCY_LEV,...,CENTER_LAT,CENTER_LON,ADDRESS,CITY,ZIP,HOURS,PHONES,IS_COUNTY,Shape__Area,Shape__Length
0,1,30.0,23603.0,El Barrio Park,El Barrio Park,Open Access,2.093,2.092992,"Claremont, City of",City,...,34.097081,-117.704423,400 Claremont Blvd,Claremont,91711.0,,,No,91170.362305,1451.547906


In [67]:
# calculate distance between lat and long park and homeless camp
# main idea captured from Bing search engine results 

def calculate_distance (lat1, lon1, lat2, lon2):
    # Convert latitude and longitude from degrees to radians
    lat1 = math.radians(lat1)
    lon1 = math.radians(lon1)
    lat2 = math.radians(lat2)
    lon2 = math.radians(lon2)

    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    r = 6371  # Radius of Earth in kilometers. Use 3956 for miles.

    # Calculate the distance
    distance = r * c

    return distance



In [68]:

# Example usage
lat1, lon1 = 34.0522, -118.2437  # Los Angeles, CA
lat2, lon2 = 36.1699, -115.1398  # Las Vegas, NV

distance = calculate_distance(lat1, lon1, lat2, lon2)
print(f"The distance between the points is {distance:.2f} kilometers.")


The distance between the points is 367.61 kilometers.


In [69]:
df_parks.shape

(3012, 52)

In [70]:
df_parks.columns

Index(['OBJECTID', 'UNIT_ID', 'LMS_ID', 'PARK_NAME', 'PARK_LBL', 'ACCESS_TYP',
       'RPT_ACRES', 'GIS_ACRES', 'AGNCY_NAME', 'AGNCY_LEV', 'AGNCY_TYP',
       'AGNCY_WEB', 'MNG_AGENCY', 'COGP_TYP', 'NDS_AN_TYP', 'NEEDS_ANLZ',
       'TKIT_SUM', 'AMEN_RPT', 'PRKINF_CND', 'AM_OPNSP', 'AM_TRLS', 'TRLS_MI',
       'TENIS', 'BSKTB', 'BASEB', 'SOCCR', 'MPFLD', 'FITZN', 'SK8PK', 'PCNIC',
       'PLGND', 'POOLS', 'SPPAD', 'DGPRK', 'GYM', 'COMCT', 'SNRCT', 'RSTRM',
       'TOTAL_GOOD', 'TOTAL_FAIR', 'TOTAL_POOR', 'TYPE', 'CENTER_LAT',
       'CENTER_LON', 'ADDRESS', 'CITY', 'ZIP', 'HOURS', 'PHONES', 'IS_COUNTY',
       'Shape__Area', 'Shape__Length'],
      dtype='object')

In [71]:
df.columns

Index(['SRNumber', 'CreatedDate', 'UpdatedDate', 'ActionTaken', 'Owner',
       'RequestType', 'Status', 'RequestSource', 'CreatedByUserOrganization',
       'MobileOS', 'Anonymous', 'AssignTo', 'ServiceDate', 'ClosedDate',
       'AddressVerified', 'ApproximateAddress', 'Address', 'HouseNumber',
       'Direction', 'StreetName', 'Suffix', 'ZipCode', 'Latitude', 'Longitude',
       'Location', 'TBMPage', 'TBMColumn', 'TBMRow', 'APC', 'CD', 'CDMember',
       'NC', 'NCName', 'PolicePrecinct', 'ClosedDate_DT', 'CreatedDate_DT',
       'close_time', 'close_time_days', 'CreatedDate_YR'],
      dtype='object')

In [72]:
df.ZipCode.unique()

array(['91303.0', '91307.0', '91367.0', '91364.0', '90004.0', '90007.0',
       '91345.0', '91356.0', '90068.0', '91602.0', '91352.0', '90043.0',
       '90064.0', '91405.0', '90029.0', '90014.0', '90230.0', '90048.0',
       '91403.0', '90025.0', '91604.0', '90047.0', '90033.0', '91316.0',
       '91342.0', '90291.0', '91344.0', '91605.0', '91335.0', '90744.0',
       '90057.0', '90015.0', '91505.0', '90016.0', '90028.0', '90063.0',
       '91411.0', nan, '90036.0', '90038.0', '91402.0', '91331.0',
       '90019.0', '90003.0', '90011.0', '90039.0', '90008.0', '90018.0',
       '90034.0', '91606.0', '90010.0', '90020.0', '90005.0', '90046.0',
       '90032.0', '91601.0', '90710.0', '90021.0', '91040.0', '91607.0',
       '90023.0', '90026.0', '91306.0', '91406.0', '90037.0', '90066.0',
       '90012.0', '91423.0', '90041.0', '90002.0', '90027.0', '90006.0',
       '90059.0', '91343.0', '90044.0', '90062.0', '90058.0', '90035.0',
       '90013.0', '90731.0', '92605.0', '91401.0', '90094

In [73]:
df_parks.ZIP.unique()

array([91711., 93535., 90022., 91307., 90230., 90265., 90068., 90032.,
       90011., 90065., 90012., 90039., 91750., 91316., 91401., 90026.,
       90033., 91042., 91352., 91741., 90255., 91203., 90703., 91604.,
       90241., 90016., 91311., 91767., 90042., 91770., 91755., 91506.,
       93536., 91702., 91205., 91748., 90242., 91344., 91803., 90640.,
       90040., 90210., 91201., 90605., 90631., 91768., 90036., 91040.,
       90005., 91405., 90047., 91754., 91706., 90027., 91301., 93551.,
       91436., 91722., 90028., 90038., 90024., 90007., 91789., 90405.,
       91331., 91321., 91746., 90064., 90232., 93543., 93534., 91406.,
       90201., 91303., 90650., 91342., 91724., 91723., 90211., 91744.,
       91302., 90057., 91304., 91384., 90670., 91390., 91732., 90041.,
       93544., 90272., 91340.,    nan, 90008., 90029., 90018., 91326.,
       90031., 91387., 91364., 91773., 90019., 91745., 91740., 90262.,
       91766., 93591., 90712., 90066., 90062., 90270., 91504., 90606.,
      

In [74]:
df_parks[df_parks.ZIP==90638].head(5)

Unnamed: 0,OBJECTID,UNIT_ID,LMS_ID,PARK_NAME,PARK_LBL,ACCESS_TYP,RPT_ACRES,GIS_ACRES,AGNCY_NAME,AGNCY_LEV,...,CENTER_LAT,CENTER_LON,ADDRESS,CITY,ZIP,HOURS,PHONES,IS_COUNTY,Shape__Area,Shape__Length
881,882,6758.0,23302.0,Creek Park,Creek Park,Open Access,15.948,15.949055,"La Mirada, City of",City,...,33.920901,-117.998104,15101-,La Mirada,90638.0,,,No,694738.1,6411.736832
882,883,6759.0,,Frontier Park (City of La Mirada),Frontier Park,Open Access,4.167,4.167478,"La Mirada, City of",City,...,33.910834,-118.037415,13212 Marquardt Ave,La Mirada,90638.0,,,No,181534.6,1972.680031
883,884,6760.0,23500.0,Neff Park,Neff Park,Open Access,9.263,9.263671,"La Mirada, City of",City,...,33.898147,-118.025861,14300 San Cristobal Dr,La Mirada,90638.0,,,No,403523.9,2565.194944
886,887,6765.0,23286.0,Windermere Park,Windermere Park,Open Access,3.516,3.515921,"La Mirada, City of",City,...,33.891153,-118.006551,15261 Cheshire St,La Mirada,90638.0,,,No,153152.9,2138.232883
889,890,6769.0,23291.0,Behringer Park,Behringer Park,Restricted Access,26.694,26.695708,"La Mirada, City of",City,...,33.902452,-117.987304,15900 Alicante Rd,La Mirada,90638.0,,,No,1162860.0,6658.234183


In [75]:
df[df.ZipCode.isin(['90019.0'])].head(1)

Unnamed: 0,SRNumber,CreatedDate,UpdatedDate,ActionTaken,Owner,RequestType,Status,RequestSource,CreatedByUserOrganization,MobileOS,...,CD,CDMember,NC,NCName,PolicePrecinct,ClosedDate_DT,CreatedDate_DT,close_time,close_time_days,CreatedDate_YR
88,1-1523242081,12/31/2019 12:10:00 PM,01/04/2020 08:27:00 AM,SR Created,BOS,Homeless Encampment,Closed,Call,ITA,,...,4.0,David Ryu,119.0,GREATER WILSHIRE NC,WILSHIRE,2020-01-04 08:24:00,2019-12-31 12:10:00,3 days 20:14:00,3.0,2019


In [76]:
df['ZipCodeFmt'] = df['ZipCode'].apply(lambda x: str(x)[:5] )

In [77]:
df[df['ZipCodeFmt']=='VE'].head()

Unnamed: 0,SRNumber,CreatedDate,UpdatedDate,ActionTaken,Owner,RequestType,Status,RequestSource,CreatedByUserOrganization,MobileOS,...,CDMember,NC,NCName,PolicePrecinct,ClosedDate_DT,CreatedDate_DT,close_time,close_time_days,CreatedDate_YR,ZipCodeFmt
204638,1-2778903201,10/31/2022 10:12:01 AM,01/24/2023 02:45:34 PM,SR Created,LASAN,Homeless Encampment,Closed,Mobile App,Self Service,Android,...,Curren D. Price Jr.,87.0,Empowerment Congress Southeast,SOUTHEAST,2023-01-24 02:44:22,2022-10-31 10:12:01,84 days 16:32:21,84.0,2022,VE


In [78]:
df_parks['ZipCodeFmt'] = df_parks['ZIP'].apply(lambda x: str(x)[:5] )

In [79]:
df_parks['ZipCodeFmt'].value_counts()

ZipCodeFmt
90265    120
90290     83
91302     63
91301     54
91702     38
        ... 
90071      1
90048      1
90010      1
91759      1
90056      1
Name: count, Length: 279, dtype: int64

In [80]:
df_merge = pd.merge(df, df_parks, how = "left",  on="ZipCodeFmt") 

In [81]:
df_merge.head(1)

Unnamed: 0,SRNumber,CreatedDate,UpdatedDate,ActionTaken,Owner,RequestType,Status,RequestSource,CreatedByUserOrganization,MobileOS,...,CENTER_LAT,CENTER_LON,ADDRESS,CITY,ZIP,HOURS,PHONES,IS_COUNTY,Shape__Area,Shape__Length
0,1-1523590871,12/31/2019 11:26:00 PM,01/14/2020 07:52:00 AM,SR Created,BOS,Homeless Encampment,Closed,Mobile App,Self Service,iOS,...,34.19838,-118.587826,7008 De Soto Ave,Canoga Park,91303.0,,(818) 883-6641,No,185566.538086,1858.825878


In [82]:
df_merge [ df_merge["SRNumber"]=='1-1523590871' ]['ZipCodeFmt'].head()

0    91303
1    91303
Name: ZipCodeFmt, dtype: object

In [58]:
df_merge.columns

Index(['SRNumber', 'CreatedDate', 'UpdatedDate', 'ActionTaken', 'Owner',
       'RequestType', 'Status', 'RequestSource', 'CreatedByUserOrganization',
       'MobileOS', 'Anonymous', 'AssignTo', 'ServiceDate', 'ClosedDate',
       'AddressVerified', 'ApproximateAddress', 'Address', 'HouseNumber',
       'Direction', 'StreetName', 'Suffix', 'ZipCode', 'Latitude', 'Longitude',
       'Location', 'TBMPage', 'TBMColumn', 'TBMRow', 'APC', 'CD', 'CDMember',
       'NC', 'NCName', 'PolicePrecinct', 'ClosedDate_DT', 'CreatedDate_DT',
       'close_time', 'close_time_days', 'CreatedDate_YR', 'ZipCodeFmt',
       'OBJECTID', 'UNIT_ID', 'LMS_ID', 'PARK_NAME', 'PARK_LBL', 'ACCESS_TYP',
       'RPT_ACRES', 'GIS_ACRES', 'AGNCY_NAME', 'AGNCY_LEV', 'AGNCY_TYP',
       'AGNCY_WEB', 'MNG_AGENCY', 'COGP_TYP', 'NDS_AN_TYP', 'NEEDS_ANLZ',
       'TKIT_SUM', 'AMEN_RPT', 'PRKINF_CND', 'AM_OPNSP', 'AM_TRLS', 'TRLS_MI',
       'TENIS', 'BSKTB', 'BASEB', 'SOCCR', 'MPFLD', 'FITZN', 'SK8PK', 'PCNIC',
       'PLGN

In [87]:
# latitude, longitude to CENTER_LAT, CENTER_LON
# calculate distance to each park in same zip code
# crashed on full data set, so narrow down to year

df_merge_yr = df_merge[ df_merge["CreatedDate_YR"]==2023 ].copy()




In [88]:
df_merge_yr.size

43316544

In [90]:
df_merge_yr["park_distance"] = df_merge_yr.apply(lambda row: calculate_distance ( row["Latitude"], row["Longitude"], row["CENTER_LAT"], row["CENTER_LON"] ), axis=1 )


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_merge_yr["park_distance"] = df_merge_yr.apply(lambda row: calculate_distance ( row["Latitude"], row["Longitude"], row["CENTER_LAT"], row["CENTER_LON"] ), axis=1 )


In [93]:
df_merge_yr.park_distance.max()

np.float64(100.11088974523992)

In [94]:
df_merge_yr[ df_merge_yr["park_distance"] == 100.11088974523992 ]


Unnamed: 0,SRNumber,CreatedDate,UpdatedDate,ActionTaken,Owner,RequestType,Status,RequestSource,CreatedByUserOrganization,MobileOS,...,CENTER_LON,ADDRESS,CITY,ZIP,HOURS,PHONES,IS_COUNTY,Shape__Area,Shape__Length,park_distance
1722413,1-3513861371,02/09/2023 10:27:10 AM,10/31/2023 12:20:09 PM,SR Created,LASAN,Homeless Encampment,Closed,Mobile App,Self Service,Android,...,-118.325638,Avalon Ave,,,,,No,49300.750977,3045.071778,100.11089


In [96]:
df_merge_yr[ df_merge_yr["park_distance"] == df_merge_yr.park_distance.min() ]


Unnamed: 0,SRNumber,CreatedDate,UpdatedDate,ActionTaken,Owner,RequestType,Status,RequestSource,CreatedByUserOrganization,MobileOS,...,CENTER_LON,ADDRESS,CITY,ZIP,HOURS,PHONES,IS_COUNTY,Shape__Area,Shape__Length,park_distance
1978662,1-4449339521,08/30/2023 11:06:32 AM,09/09/2023 08:56:01 AM,SR Created,LASAN,Homeless Encampment,Closed,Self Service,Self Service,,...,-118.229515,Los Angeles,Los Angeles,90031.0,,,No,9163.552734,580.48664,0.004306
