In [3]:
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm, trange

storage = "/Volumes/easystore/Drones/"
calls_for_service = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/calls-for-service-with-census-data.csv",
                chunksize=100000,
                dtype=str,
            ),
            desc="Loading data",
        )
    ]
)
flight_data = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/gpx-with-census-data.csv", chunksize=100000, dtype=str
            ),
            desc="Loading data",
        )
    ]
)

# flight_data = pd.read_csv("/Volumes/easystore/Drones/compiled-flight-data-gpx.csv",dtype=str)
# flight_data.head()

Loading data: 0it [00:00, ?it/s]

Loading data: 0it [00:00, ?it/s]

In [4]:
shp_file = gpd.read_file(
    "/Volumes/easystore/Drones/census/tl_2022_06_tabblock20/tl_2022_06_tabblock20.shp"
)

In [5]:
san_deigo_county = shp_file[shp_file["GEOID20"].str.contains("06073")]

In [6]:
call_for_service_block_count = calls_for_service.groupby(["Full FIPS (block)"]).count()
call_for_service_block_count = call_for_service_block_count.reset_index()[
    ["Full FIPS (block)", "Incident No."]
]
call_for_service_block_count.columns = ["GEOID20", "call_count"]
call_for_service_block_count.head()

Unnamed: 0,GEOID20,call_count
0,60250102002026,1
1,60650306022000,2
2,60650445182017,1
3,60650451232003,1
4,60650453021004,2


In [7]:
call_for_service_block_count_with_shps = pd.merge(
    san_deigo_county, call_for_service_block_count, how="left", on=["GEOID20"]
)

In [8]:
call_for_service_block_count_with_shps["GEOID20"].apply(
    lambda x: len(x)
).drop_duplicates()


0    15
Name: GEOID20, dtype: int64

In [9]:
call_for_service_block_count_with_shps[
    "call_count"
] = call_for_service_block_count_with_shps["call_count"].replace(np.nan, 0)



In [10]:
flight_data["geoid"] = flight_data["geoid"].astype(str)
flight_data["len"] = flight_data["geoid"].apply(lambda x: len(x))
flight_data.loc[flight_data["len"] == 14, "geoid"] = "0" + flight_data["geoid"]

In [11]:
flight_data.head()

Unnamed: 0,id,type,incident_id,address_map,sequence,longitude,latitude,altitude,success,geoid,block,block_group,tract,county,state,county_name,state_name,population,housing,len
0,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:43:58+00:00,-117.0827,32.64,22.1684303,True,60730123021013,1013,1,12302,73,6,San Diego County,California,306,130,14
1,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:43:59+00:00,-117.0827,32.64,22.1684303,True,60730123021013,1013,1,12302,73,6,San Diego County,California,306,130,14
2,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:44:00+00:00,-117.0827,32.64,22.1684303,True,60730123021013,1013,1,12302,73,6,San Diego County,California,306,130,14
3,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:59:34+00:00,-117.0827,32.64,22.2684303,True,60730123021013,1013,1,12302,73,6,San Diego County,California,306,130,14
4,c1795c8bb73272d54f70df7f7e228004,Subject down,CVL035567,20 Fourth Ave,2021-04-30 21:59:34+00:00,-117.0827,32.64,22.3684303,True,60730123021013,1013,1,12302,73,6,San Diego County,California,306,130,14


In [None]:
# for flight_id in tqdm(flight_data['id'].drop_duplicates().values,total=len(flight_data['id'].drop_duplicates().values)):
#     df = flight_data[flight_data['id']==flight_id]
#     times = pd.to_datetime(df["sequence"])
#     grouped_by_block_and_second = flight_data.groupby(
#         [df["geoid"], times.dt.hour, times.dt.minute, times.dt.second]
#     ).count()
#     grouped_by_block_and_second.index.names = ["geoid", "hour", "minute", "second"]
#     grouped_by_block_and_second = grouped_by_block_and_second.reset_index()[
#         ["geoid", "hour", "minute", "second", "id"]
#     ]
#     grouped_by_block_and_second.columns = ["geoid", "hour", "minute", "second", "count"]


In [12]:
times = pd.to_datetime(flight_data["sequence"])
grouped_by_block_and_second = flight_data.groupby(
    ["id","geoid", times.dt.hour, times.dt.minute, times.dt.second]
).count()
grouped_by_block_and_second.index.names = ["id","geoid", "hour", "minute", "second"]
grouped_by_block_and_second = grouped_by_block_and_second.reset_index()[
    ["id","geoid", "hour", "minute", "second", "type"]
]
grouped_by_block_and_second.columns = ["id","geoid", "hour", "minute", "second", "count"]


In [38]:
unique_seconds_in_block = grouped_by_block_and_second.groupby(["geoid"]).count()
unique_seconds_in_block = unique_seconds_in_block.reset_index()[["geoid", "hour"]]
unique_seconds_in_block.columns = ["GEOID20", "seconds"]
unique_seconds_in_block.head()

Unnamed: 0,GEOID20,seconds
0,60070001021001,403
1,60070001021002,1659
2,60070001021006,113
3,60070001021007,191
4,60070001021008,3


In [16]:
unique_seconds_in_block_with_shps = pd.merge(
    san_deigo_county, unique_seconds_in_block, how="left", on=["GEOID20"]
)
unique_seconds_in_block_with_shps["seconds"] = unique_seconds_in_block_with_shps[
    "seconds"
].replace(np.nan, 0)

In [17]:
unique_seconds_in_block_with_shps["weight"] = (
    unique_seconds_in_block_with_shps["seconds"]
    / unique_seconds_in_block_with_shps["ALAND20"]
)

In [21]:
call_for_service_block_count_with_shps['weight'] = call_for_service_block_count_with_shps['call_count']/call_for_service_block_count_with_shps['POP20']

In [22]:
call_for_service_block_count_with_shps.to_csv('../data/outputs_cfs-block-count.csv',index=False)

In [41]:
fm = unique_seconds_in_block_with_shps[unique_seconds_in_block_with_shps['seconds']>0]
fm.to_csv('../data/outputs_drone-seconds-block-count.csv',index=False)