In [3]:
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm, trange

storage = "/Volumes/easystore/Drones/"
calls_for_service = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/calls-for-service-with-census-data.csv",
                chunksize=100000,
                dtype=str,
            ),
            desc="Loading data",
        )
    ]
)
flight_data = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/gpx-with-census-data.csv", chunksize=100000, dtype=str
            ),
            desc="Loading data",
        )
    ]
)

# flight_data = pd.read_csv("/Volumes/easystore/Drones/compiled-flight-data-gpx.csv",dtype=str)
# flight_data.head()

Loading data: 0it [00:00, ?it/s]

Loading data: 0it [00:00, ?it/s]

In [4]:
shp_file = gpd.read_file(
    "/Volumes/easystore/Drones/census/tl_2022_06_tabblock20/tl_2022_06_tabblock20.shp"
)

In [5]:
san_deigo_county = shp_file[shp_file["GEOID20"].str.contains("06073")]

In [6]:
call_for_service_block_count = calls_for_service.groupby(["Full FIPS (block)"]).count()
call_for_service_block_count = call_for_service_block_count.reset_index()[
    ["Full FIPS (block)", "Incident No."]
]
call_for_service_block_count.columns = ["GEOID20", "call_count"]
call_for_service_block_count.head()

Unnamed: 0,GEOID20,call_count
0,60250102002026,1
1,60650306022000,2
2,60650445182017,1
3,60650451232003,1
4,60650453021004,2


In [7]:
call_for_service_block_count_with_shps = pd.merge(
    san_deigo_county, call_for_service_block_count, how="left", on=["GEOID20"]
)

In [8]:
call_for_service_block_count_with_shps["GEOID20"].apply(
    lambda x: len(x)
).drop_duplicates()


0    15
Name: GEOID20, dtype: int64

In [9]:
call_for_service_block_count_with_shps[
    "call_count"
] = call_for_service_block_count_with_shps["call_count"].replace(np.nan, 0)



In [10]:
flight_data["geoid"] = flight_data["geoid"].astype(str)
flight_data["len"] = flight_data["geoid"].apply(lambda x: len(x))
flight_data.loc[flight_data["len"] == 14, "geoid"] = "0" + flight_data["geoid"]

In [11]:
flight_data.head()

Unnamed: 0,id,type,incident_id,address_map,sequence,longitude,latitude,altitude,success,geoid,block,block_group,tract,county,state,county_name,state_name,population,housing,len
0,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:43:58+00:00,-117.0827,32.64,22.1684303,True,60730123021013,1013,1,12302,73,6,San Diego County,California,306,130,14
1,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:43:59+00:00,-117.0827,32.64,22.1684303,True,60730123021013,1013,1,12302,73,6,San Diego County,California,306,130,14
2,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:44:00+00:00,-117.0827,32.64,22.1684303,True,60730123021013,1013,1,12302,73,6,San Diego County,California,306,130,14
3,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:59:34+00:00,-117.0827,32.64,22.2684303,True,60730123021013,1013,1,12302,73,6,San Diego County,California,306,130,14
4,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:59:34+00:00,-117.0827,32.64,22.3684303,True,60730123021013,1013,1,12302,73,6,San Diego County,California,306,130,14


In [None]:
# for flight_id in tqdm(flight_data['id'].drop_duplicates().values,total=len(flight_data['id'].drop_duplicates().values)):
#     df = flight_data[flight_data['id']==flight_id]
#     times = pd.to_datetime(df["sequence"])
#     grouped_by_block_and_second = flight_data.groupby(
#         [df["geoid"], times.dt.hour, times.dt.minute, times.dt.second]
#     ).count()
#     grouped_by_block_and_second.index.names = ["geoid", "hour", "minute", "second"]
#     grouped_by_block_and_second = grouped_by_block_and_second.reset_index()[
#         ["geoid", "hour", "minute", "second", "id"]
#     ]
#     grouped_by_block_and_second.columns = ["geoid", "hour", "minute", "second", "count"]


In [12]:
times = pd.to_datetime(flight_data["sequence"])
grouped_by_block_and_second = flight_data.groupby(
    ["id","geoid", times.dt.hour, times.dt.minute, times.dt.second]
).count()
grouped_by_block_and_second.index.names = ["id","geoid", "hour", "minute", "second"]
grouped_by_block_and_second = grouped_by_block_and_second.reset_index()[
    ["id","geoid", "hour", "minute", "second", "type"]
]
grouped_by_block_and_second.columns = ["id","geoid", "hour", "minute", "second", "count"]


In [38]:
unique_seconds_in_block = grouped_by_block_and_second.groupby(["geoid"]).count()
unique_seconds_in_block = unique_seconds_in_block.reset_index()[["geoid", "hour"]]
unique_seconds_in_block.columns = ["GEOID20", "seconds"]
unique_seconds_in_block.head()

Unnamed: 0,GEOID20,seconds
0,60070001021001,403
1,60070001021002,1659
2,60070001021006,113
3,60070001021007,191
4,60070001021008,3


In [16]:
unique_seconds_in_block_with_shps = pd.merge(
    san_deigo_county, unique_seconds_in_block, how="left", on=["GEOID20"]
)
unique_seconds_in_block_with_shps["seconds"] = unique_seconds_in_block_with_shps[
    "seconds"
].replace(np.nan, 0)

In [17]:
unique_seconds_in_block_with_shps["weight"] = (
    unique_seconds_in_block_with_shps["seconds"]
    / unique_seconds_in_block_with_shps["ALAND20"]
)

In [21]:
call_for_service_block_count_with_shps['weight'] = call_for_service_block_count_with_shps['call_count']/call_for_service_block_count_with_shps['POP20']

In [22]:
call_for_service_block_count_with_shps.to_csv('../data/outputs_cfs-block-count.csv',index=False)

In [41]:
fm = unique_seconds_in_block_with_shps[unique_seconds_in_block_with_shps['seconds']>0]
fm.to_csv('../data/outputs_drone-seconds-block-count.csv',index=False)

In [24]:
unique_seconds_in_block_with_shps

Unnamed: 0,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,MTFCC20,UR20,UACE20,UATYPE20,FUNCSTAT20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,HOUSING20,POP20,geometry,seconds,weight
0,06,073,016615,2007,060730166152007,Block 2007,G5040,U,78661,U,S,491755,0,+32.8416420,-116.9740851,6,1200,"POLYGON ((-116.97932 32.83933, -116.97930 32.8...",0.0,0.0
1,06,073,016503,1003,060730165031003,Block 1003,G5040,U,78661,U,S,15125,0,+32.8224139,-116.9599247,64,183,"POLYGON ((-116.96076 32.82294, -116.96045 32.8...",0.0,0.0
2,06,073,021001,3138,060730210013138,Block 3138,G5040,R,,,S,1011607,0,+33.0940003,-116.0887615,0,0,"POLYGON ((-116.10349 33.09636, -116.10278 33.0...",0.0,0.0
3,06,073,018700,1245,060730187001245,Block 1245,G5040,U,78661,U,S,48666,0,+33.2564482,-117.4134796,105,176,"POLYGON ((-117.41531 33.25678, -117.41528 33.2...",0.0,0.0
4,06,073,018700,1308,060730187001308,Block 1308,G5040,R,,,S,76722,0,+33.2224261,-117.3972530,0,0,"POLYGON ((-117.39944 33.22227, -117.39895 33.2...",0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28687,06,113,010607,3002,061130106073002,Block 3002,G5040,U,22420,U,S,55919,0,+38.5406704,-121.7003191,46,114,"POLYGON ((-121.70190 38.54086, -121.70142 38.5...",0.0,0.0
28688,06,097,150607,3014,060971506073014,Block 3014,G5040,U,68887,U,S,8270,0,+38.2607100,-122.6205559,12,38,"POLYGON ((-122.62126 38.26090, -122.62115 38.2...",0.0,0.0
28689,06,097,150607,3004,060971506073004,Block 3004,G5040,U,68887,U,S,16724,0,+38.2592186,-122.6182938,17,47,"POLYGON ((-122.61906 38.25888, -122.61906 38.2...",0.0,0.0
28690,06,059,110607,3003,060591106073003,Block 3003,G5040,U,51445,U,S,3497,0,+33.8827190,-117.9933140,0,0,"POLYGON ((-117.99419 33.88347, -117.99411 33.8...",0.0,0.0


In [14]:
shp_file = gpd.read_file(
    "/Volumes/easystore/Drones/census/tl_2022_06_tabblock20/tl_2022_06_tabblock20.shp"
)

In [18]:
calls_for_service = calls_for_service[
    [
        "Incident No.",
        "Date",
        "Description",
        "Priority",
        "Disposition",
        "Call Source",
        "Address",
        "Full FIPS (block)",
    ]
]

In [22]:
calls_for_service["GEOID20"] = calls_for_service["Full FIPS (block)"]

# a = pd.merge(calls_for_service, shp_file, on=["GEOID20"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  calls_for_service['GEOID20'] = calls_for_service['Full FIPS (block)']


In [26]:
grouped_calls_for_service = (
    calls_for_service.groupby("GEOID20").count().reset_index()[["GEOID20", "Date"]]
)

In [None]:
grouped_calls_for_service_with_shapes = pd.merge(
    grouped_calls_for_service, shp_file, on=["GEOID20"]
)
grouped_calls_for_service_with_shapes.columns

Index(['GEOID20', 'Date', 'STATEFP20', 'COUNTYFP20', 'TRACTCE20', 'BLOCKCE20',
       'NAME20', 'MTFCC20', 'UR20', 'UACE20', 'UATYPE20', 'FUNCSTAT20',
       'ALAND20', 'AWATER20', 'INTPTLAT20', 'INTPTLON20', 'HOUSING20', 'POP20',
       'geometry'],
      dtype='object')

In [34]:
grouped_calls_for_service_with_shapes["count"] = grouped_calls_for_service_with_shapes[
    "Date"
]
grouped_calls_for_service_with_shapes["cpc"] = (
    grouped_calls_for_service_with_shapes["count"]
    / grouped_calls_for_service_with_shapes["POP20"]
)

In [41]:
flight_data["dt"] = pd.to_datetime(flight_data["sequence"])

In [42]:
times = pd.to_datetime(flight_data['sequence'])
a = row.groupby([flight_data["geoid"], times.dt.hour, times.dt.minute, times.dt.second])

Unnamed: 0,id,type,incident_id,address_map,sequence,longitude,latitude,altitude,success,geoid,...,block_group,tract,county,state,county_name,state_name,population,housing,Full FIPS (block),dt
0,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:43:58+00:00,-117.0827,32.64,22.1684303,True,60730123021013,...,1,12302,73,6,San Diego County,California,306,130,060730123021013,2021-04-30 21:43:58+00:00
1,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:43:59+00:00,-117.0827,32.64,22.1684303,True,60730123021013,...,1,12302,73,6,San Diego County,California,306,130,060730123021013,2021-04-30 21:43:59+00:00
2,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:44:00+00:00,-117.0827,32.64,22.1684303,True,60730123021013,...,1,12302,73,6,San Diego County,California,306,130,060730123021013,2021-04-30 21:44:00+00:00
3,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:59:34+00:00,-117.0827,32.64,22.2684303,True,60730123021013,...,1,12302,73,6,San Diego County,California,306,130,060730123021013,2021-04-30 21:59:34+00:00
4,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:59:34+00:00,-117.0827,32.64,22.3684303,True,60730123021013,...,1,12302,73,6,San Diego County,California,306,130,060730123021013,2021-04-30 21:59:34+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20936023,d4f4383cdf1780a348affea6b1a57176,Outside assist - missing person,2307030008,Forest Ranch,2023-07-03 08:43:37+00:00,-121.6514,39.8626,752.2800293,True,60070016011081,...,1,1601,7,6,Butte County,California,44,35,060070016011081,2023-07-03 08:43:37+00:00
20936024,d4f4383cdf1780a348affea6b1a57176,Outside assist - missing person,2307030008,Forest Ranch,2023-07-03 08:43:37+00:00,-121.6514,39.8626,752.1800293,True,60070016011081,...,1,1601,7,6,Butte County,California,44,35,060070016011081,2023-07-03 08:43:37+00:00
20936025,d4f4383cdf1780a348affea6b1a57176,Outside assist - missing person,2307030008,Forest Ranch,2023-07-03 08:43:37+00:00,-121.6514,39.8626,752.0800293,True,60070016011081,...,1,1601,7,6,Butte County,California,44,35,060070016011081,2023-07-03 08:43:37+00:00
20936026,d4f4383cdf1780a348affea6b1a57176,Outside assist - missing person,2307030008,Forest Ranch,2023-07-03 08:43:38+00:00,-121.6514,39.8626,752.0800293,True,60070016011081,...,1,1601,7,6,Butte County,California,44,35,060070016011081,2023-07-03 08:43:38+00:00


TypeError: string indices must be integers

In [22]:
calls_for_service

Unnamed: 0,Incident No.,Date,Description,Priority,Disposition,Call Source,Zip Code,Block Location,Case No,Address,...,Census Block Group,Full FIPS (block),Full FIPS (tract),Metro/Micro Statistical Area Name,Metro/Micro Statistical Area Code,Metro/Micro Statistical Area Type,Combined Statistical Area Name,Combined Statistical Area Code,Metropolitan Division Area Name,Metropolitan Division Area Code
0,210701L00054165,2021-07-01 00:17:06.233,Extra Patrol,5,HANDLED NO REPORT,officer,91910,300 BLOCK MOON VIEW DR,,"300 BLOCK MOON VIEW DR, Chula Vista, CA, 91910",...,1,060730134121000,06073013412,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
1,210707L00056325,2021-07-07 16:18:56.693,Narcotics,3,HANDLED NO REPORT,citizen,91910,300 BLOCK MOON VIEW DR,,"300 BLOCK MOON VIEW DR, Chula Vista, CA, 91910",...,1,060730134121000,06073013412,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
2,210715L00058869,2021-07-15 22:49:08.137,Pedestrian Stop,5,CANNIBIS RELATED,officer,91910,300 BLOCK MOON VIEW DR,,"300 BLOCK MOON VIEW DR, Chula Vista, CA, 91910",...,1,060730134121000,06073013412,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
3,210806L00065252,2021-08-06 04:12:02.110,Disturbance - General,3,HANDLED NO REPORT,citizen,91910,300 BLOCK MOON VIEW DR,,"300 BLOCK MOON VIEW DR, Chula Vista, CA, 91910",...,1,060730134121000,06073013412,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
4,210904L00074465,2021-09-04 20:27:35.460,Extra Patrol,5,HANDLED NO REPORT,officer,91910,300 BLOCK MOON VIEW DR,,"300 BLOCK MOON VIEW DR, Chula Vista, CA, 91910",...,1,060730134121000,06073013412,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
184649,230901L00074883,2023-09-01 04:31:34.893,Arrest Felony,2,ASSISTED,citizen,91910,W MOUNTAIN VIEW DR / D ST,2311846.0,"W MOUNTAIN VIEW DR / D ST, Chula Vista, CA, 91910",...,2,060730123032008,06073012303,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
184650,230901L00075001,2023-09-01 13:25:04.573,Reckless Driver,3,HANDLED NO REPORT,citizen,91913,OLYMPIC PW & SR125,,"OLYMPIC PW & SR125, Chula Vista, CA, 91913",...,1,060730133171001,06073013317,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
184651,230901L00075082,2023-09-01 17:37:37.240,Disabled Vehicle,2,HANDLED NO REPORT,citizen,91913,OLYMPIC PW & SR125,,"OLYMPIC PW & SR125, Chula Vista, CA, 91913",...,1,060730133171001,06073013317,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
184652,230901L00075022,2023-09-01 14:35:15.460,Disturbance - General,2,HANDLED NO REPORT,citizen,91910,BRIGHTWOOD AV & I ST,,"BRIGHTWOOD AV & I ST, Chula Vista, CA, 91910",...,1,060730130001009,06073013000,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,


In [None]:
t = merged.sample(4000)
times = pd.to_datetime(t.sequence)
t.groupby([times.dt.hour, times.dt.minute]).value_col.sum()

In [17]:
shp_file = gpd.read_file(
    "/Volumes/easystore/Drones/census/tl_2022_06_tabblock20/tl_2022_06_tabblock20.shp"
)

In [18]:
shp_file["GEOID20"].sample(3)

367334    060375300072013
247244    060650412012004
121163    060710035032003
Name: GEOID20, dtype: object

In [22]:
merged["GEOID20"] = "0" + merged["geoid"].astype(str)
grouped = merged.groupby("GEOID20").count()

In [24]:
grouped = grouped.reset_index()[["GEOID20", "sequence"]]
grouped.columns = ["GEOID20", "count"]

In [30]:
merged.columns

Index(['sequence', 'longitude_x', 'latitude_x', 'altitude', 'id', 'type',
       'incident_id', 'address_map', 'rounded-latitude', 'rounded-longitude',
       'success', 'latitude_y', 'longitude_y', 'geoid', 'block', 'block_group',
       'tract', 'county', 'state', 'county_name', 'state_name', 'population',
       'housing', 'GEOID20'],
      dtype='object')

In [29]:
chula = shp_file[shp_file["GEOID20"].isin(grouped["GEOID20"])]
a = pd.merge(grouped, chula, on=["GEOID20"])
grouped.columns

Index(['GEOID20', 'count'], dtype='object')

In [34]:
a["cpa"] = a["count"] / a["ALAND20"]

In [35]:
a.to_csv("./test.csv")

In [10]:
chula = shp_file[shp_file["GEOID20"].isin(days_on_block["GEOID20"])]
a - pd.merge(days_on_block, chula, on=["GEOID20"])

In [45]:
geocoded_flights = pd.merge(geocoded_flights, days_on_block, on="Full FIPS (block)")
geocoded_flights.shape[0]

11530

In [46]:
days_on_block_group = (
    geocoded_flights.groupby(["date", "block-group"])
    .count()
    .reset_index()[["date", "block-group"]]
    .drop_duplicates()
)
days_on_block_group = days_on_block_group.groupby("block-group").count().reset_index()
days_on_block_group.columns = ["block-group", "days on block-group"]

days_on_block_group.head()

Unnamed: 0,block-group,days on block-group
0,60050003011,1
1,60070001021,4
2,60070001023,2
3,60070001032,5
4,60070001041,2


In [41]:
geocoded_flights = pd.merge(geocoded_flights, days_on_block_group, on="block-group")
geocoded_flights.shape[0]

11530

In [58]:
demographic_coi = [
    "block-group",
    "ACS Demographics/Population by age range/Total/Value",
    "ACS Demographics/Race and ethnicity/Hispanic or Latino/Value",
    "ACS Demographics/Race and ethnicity/Not Hispanic or Latino: Black or African American alone/Value",
    "ACS Economics/Median household income/Total/Value",
    "dept",
]
cols = [item for sublist in [demographic_coi, ["lat"]] for item in sublist]
block_groups = geocoded_flights.groupby(demographic_coi).count().reset_index()[cols]
block_groups.columns = [
    "block-group",
    "total-pop",
    "hispanic",
    "black",
    "household-income",
    "dept",
    "count",
]
block_groups

Unnamed: 0,block-group,total-pop,hispanic,black,household-income,dept,count
0,060050003011,951,180,0,63250,spduas,1
1,060070001021,1500,469,4,63875,chicopd,5
2,060070001023,1659,59,0,39667,chicopd,8
3,060070001032,3202,395,26,107462,chicopd,6
4,060070001041,1711,0,0,84485,chicopd,5
...,...,...,...,...,...,...,...
521,530610519371,1291,119,0,205919,bothellpd,1
522,530610519372,1625,140,0,113636,bothellpd,4
523,530610519381,1266,158,0,102470,bothellpd,2
524,530610519383,1334,40,87,124145,bothellpd,3


In [60]:
dfs = []
for d in block_groups["dept"].drop_duplicates().values:
    tdf = block_groups[block_groups["dept"] == d].copy()
    tdf["bin"] = pd.qcut(
        tdf["count"].astype(int), 3, labels=["low", "medium", "high"], duplicates="drop"
    )
    dfs.append(tdf)
assigned_and_grouped = pd.concat(dfs)

Unnamed: 0,block-group,total-pop,hispanic,black,household-income,dept,count,bin
0,060050003011,951,180,0,63250,spduas,1,low
67,060610206012,2479,154,0,156938,spduas,1,low
68,060610211291,2082,231,114,77857,spduas,4,medium
70,060670001001,2128,327,1,116544,spduas,1,low
71,060670003003,1468,144,30,108056,spduas,2,low
...,...,...,...,...,...,...,...,...
521,530610519371,1291,119,0,205919,bothellpd,1,low
522,530610519372,1625,140,0,113636,bothellpd,4,medium
523,530610519381,1266,158,0,102470,bothellpd,2,low
524,530610519383,1334,40,87,124145,bothellpd,3,medium


In [62]:
assigned_and_grouped["total-pop"] = assigned_and_grouped["total-pop"].astype(int)
assigned_and_grouped["hispanic"] = assigned_and_grouped["hispanic"].astype(int)
assigned_and_grouped["black"] = assigned_and_grouped["black"].astype(int)

assigned_and_grouped["household-income"] = assigned_and_grouped[
    "household-income"
].astype(int)

assigned_and_grouped["count"] = assigned_and_grouped["count"].astype(int)
assigned_and_grouped["black-latino"] = (
    assigned_and_grouped["hispanic"] + assigned_and_grouped["black"]
) / assigned_and_grouped["total-pop"]

In [63]:
means = assigned_and_grouped.groupby(["dept", "bin"]).mean().reset_index()
means.loc[means["dept"] == "bothellpd", "jurisdiction_black-latino"] = 12
means.loc[means["dept"] == "chicopd", "jurisdiction_black-latino"] = 21
means.loc[means["dept"] == "spduas", "jurisdiction_black-latino"] = 40
means.loc[means["dept"] == "cvpd", "jurisdiction_black-latino"] = 68

In [64]:
means.loc[means["dept"] == "bothellpd", "jurisdiction_household-income"] = 116578
means.loc[means["dept"] == "chicopd", "jurisdiction_household-income"] = 61850
means.loc[means["dept"] == "spduas", "jurisdiction_household-income"] = 75311
means.loc[means["dept"] == "cvpd", "jurisdiction_household-income"] = 91949

In [65]:
means[
    [
        "dept",
        "bin",
        "black-latino",
        "jurisdiction_black-latino",
        "household-income",
        "jurisdiction_household-income",
    ]
]

Unnamed: 0,dept,bin,black-latino,jurisdiction_black-latino,household-income,jurisdiction_household-income
0,bothellpd,low,0.071634,12.0,121155.111111,116578.0
1,bothellpd,medium,0.078377,12.0,118282.2,116578.0
2,bothellpd,high,0.126055,12.0,122796.857143,116578.0
3,chicopd,low,0.205096,21.0,54695.807692,61850.0
4,chicopd,medium,0.164034,21.0,58201.454545,61850.0
5,chicopd,high,0.199248,21.0,51289.388889,61850.0
6,cvpd,low,0.547859,68.0,86152.341772,91949.0
7,cvpd,medium,0.559954,68.0,93191.789474,91949.0
8,cvpd,high,0.743648,68.0,72085.461538,91949.0
9,spduas,low,0.406947,40.0,68147.069307,75311.0


In [66]:
assigned_and_grouped[assigned_and_grouped["count"] >= 100]

Unnamed: 0,block-group,total-pop,hispanic,black,household-income,dept,count,bin,black-latino
291,60730053012,464,206,68,0,cvpd,114,high,0.590517
345,60730123021,1550,1029,58,34735,cvpd,629,high,0.70129
350,60730124011,974,569,0,0,cvpd,159,high,0.584189
351,60730124012,1933,1365,133,79904,cvpd,109,high,0.774961
352,60730124021,1502,1180,191,41574,cvpd,128,high,0.912783
354,60730124023,2712,2294,49,55078,cvpd,167,high,0.863938
355,60730125011,2178,1715,88,31179,cvpd,143,high,0.827824
356,60730125012,1576,1484,0,56447,cvpd,220,high,0.941624
357,60730125021,1752,1522,84,54291,cvpd,103,high,0.916667
358,60730125022,1611,1367,31,34306,cvpd,183,high,0.867784


In [67]:
flights

Unnamed: 0.1,Unnamed: 0,bg,id,lat,lng,address,type,date,time,dept,Full FIPS (block),days on block
0,0,060730133011,8f0f4842f6dbf20b94ed2df3021b5ee5,32.6224144,-117.0504452,Inkopah St/ Monserate Ave,Traffic Collision,4-30-21,5:34pm,cvpd,060730133011014,2
1,1,060730133011,a2f351edd3ccd5c8b18529466bc88f5d,32.6235983,-117.0489902,200 E. Moss St.,Fight,5-26-23,7:35pm,cvpd,060730133011014,2
2,2,060730124012,c1795c8bb73272d54f70df7f7e228004,32.6501786,-117.0879108,20 Fourth Ave,Subject down,4-30-21,2:43pm,cvpd,060730124012000,29
3,3,060730124012,78fe86c377fbb8aab922ce4f69c47cba,32.6499964,-117.0873044,16 3RD,TRAFFIC HAZARD,4-03-21,11:42am,cvpd,060730124012000,29
4,4,060730124012,a8712fdeedee83fcae3a7b11f9087569,32.6499964,-117.0873044,16 3RD,TRAFFIC HAZARD,4-03-21,11:12am,cvpd,060730124012000,29
...,...,...,...,...,...,...,...,...,...,...,...,...
11525,11525,060070009031,c159c37e26acf5f147cc8509189a72d3,39.7136532504949,-121.788719324023,Skyway near Benatar,Suspicious subject,7-03-23,9:51pm,chicopd,060070009031014,1
11526,11526,060070016011,c5db9b97f8ef81953ee936138c13983b,39.873843,-121.6810778,Forest Ranch,Outside assist - missing person,7-03-23,2:03am,chicopd,060070016011073,1
11527,11527,060070016011,54d661d6397d991e091b4eeb2c3cd9dc,39.873843,-121.6810778,Forest Ranch,Outside assist - missing person,7-03-23,1:54am,chicopd,060070016011073,1
11528,11528,060070016011,0032a33d547b1b30a15c41c9af8bce48,39.873843,-121.6810778,Forest Ranch,Outside assist - missing person,7-03-23,1:44am,chicopd,060070016011073,1


In [72]:
gf = geocoded_flights[geocoded_flights["hid"].notna()]
gf[gf["hid"].str.contains("L056993")]

Unnamed: 0.1,Unnamed: 0,lat,lng,time,id,date,time_s,address,hid,type,...,"ACS Economics/Household income/$125,000 to $149,999/Margin of error","ACS Economics/Household income/$125,000 to $149,999/Percentage","ACS Economics/Household income/$150,000 to $199,999/Value","ACS Economics/Household income/$150,000 to $199,999/Margin of error","ACS Economics/Household income/$150,000 to $199,999/Percentage","ACS Economics/Household income/$200,000 or more/Value","ACS Economics/Household income/$200,000 or more/Margin of error","ACS Economics/Household income/$200,000 or more/Percentage",block-group,days on block
9016,383,32.6148494,-117.078613,2:17pm,9429819440354b55047665dd8da222e5,7-06-23,2:17pm,500 Moss Street,L056993,Restraining Order Violation,...,13,0,14,15,0.074,0,13,0,60730131042,6
