In [1]:
import requests
import pandas as pd
import json
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

In [2]:
endpoint = "https://data.nashville.gov/resource/2u6v-ujjs.json"

params = {
    '$where' : "incident_reported between '2022-01-01T00:00:00' and '2022-09-30T23:59:59'",
    'offense_nibrs' : '220',
    '$limit' : 5000
}

response = requests.get(endpoint, params = params)

res = response.json()
all_burglary = pd.DataFrame(res)

In [3]:
agg_burglary = all_burglary.loc[
    all_burglary['offense_description'].str.upper().str.contains('AGGRAVATED')
].copy()

agg_burglary['geometry'] = gpd.points_from_xy(
    agg_burglary['longitude'], 
    agg_burglary['latitude']
)

In [4]:
with open('..\census_api.json') as fi:
    credentials = json.load(fi)
    
api_key = credentials['api_key']

In [5]:
endpoint = f'https://api.census.gov/data/2020/acs/acs5?get=NAME,B01001_001E&for=tract:*&in=state:47&in=county:037&key={api_key}'

response = requests.get(endpoint)

res = response.json()
population = pd.DataFrame(res)

In [6]:
population = (population
 .rename(columns = population.iloc[0])
 .drop(population.index[0])
 .rename(columns={'NAME' : 'name', 'B01001_001E' : 'population'})
)

In [7]:
endpoint = f'https://api.census.gov/data/2020/acs/acs5/subject?get=NAME,S1901_C01_012E&for=tract:*&in=state:47&in=county:037&key={api_key}'

response = requests.get(endpoint)

res = response.json()
median_income = pd.DataFrame(res)

In [8]:
median_income = (median_income
 .rename(columns = median_income.iloc[0])
 .drop(median_income.index[0])
 .rename(columns = {'NAME' : 'name', 'S1901_C01_012E' : 'median_income'})
)

In [9]:
census_tract = gpd.read_file('../data/tl_2020_47_tract/tl_2020_47_tract.shp')

In [10]:
agg_burg_geo = gpd.GeoDataFrame(
    agg_burglary, 
    crs = census_tract.crs, 
    geometry = agg_burglary['geometry']
).drop_duplicates(subset = ['incident_number'])

In [11]:
agg_burg_tract = (
    gpd.sjoin(agg_burg_geo, census_tract, how = 'right', predicate = 'within')
).rename(columns = {'TRACTCE' : 'tract'})

agg_burg_tract = agg_burg_tract.merge(
    population, 
    how = 'inner', 
    on = 'tract'
)

agg_burg_tract = agg_burg_tract.merge(
    median_income, 
    how = 'inner', 
    on = 'tract'
)

agg_burg_tract['median_income'] = agg_burg_tract['median_income'].astype('float')

agg_burg_tract = agg_burg_tract.loc[
   agg_burg_tract['median_income'] >= 0
]

agg_burg_tract['population'] = agg_burg_tract['population'].astype('int')

agg_burg_tract = agg_burg_tract.loc[
    agg_burg_tract['population'] != 0
]

Which census tract had the highest number of burglaries?

In [12]:
agg_burg_tract['tract'].value_counts().nlargest(1)

014300    41
Name: tract, dtype: int64

Which census tract had the highest number of burglaries per 1000 residents?

In [13]:
rate = (agg_burg_tract
        .groupby(['tract', 'population'])
        .count()
        .reset_index()[['tract', 'population', 'incident_number']]
        #.apply(lambda rate: rate['population'] /1000)
       )

rate['rate'] = rate['incident_number'] / (rate['population'] / 1000)

rate.sort_values(
    by = 'rate', 
    ascending = False
).head(1)

Unnamed: 0,tract,population,incident_number,rate
53,14300,1617,41,25.355597


In [15]:
rate = (agg_burg_tract
        .groupby(['tract', 'population'])
        .count()
        .reset_index()[['tract', 'population', 'incident_number']]
        .apply(lambda rate: rate['population'] /1000, axis = 1)
       )