# Build Analysis Data

In [16]:
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm, trange
from datetime import date
from multiprocess.pool import Pool


storage = "/Volumes/easystore/Drones/"
calls_for_service = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/calls-for-service-with-census-data.csv",
                chunksize=100000,
                dtype=str,
            ),
            desc="Loading data",
        )
    ]
)
flight_details = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/gpx-with-census-data.csv", 
                chunksize=100000, 
                dtype=str
            ),
            desc="Loading data",
        )
    ]
)
flights = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"../../data/all-flights-manifest.csv", 
                chunksize=100000, 
                dtype=str
            ),
            desc="Loading data",
        )
    ]
)

cv_blocks = pd.read_csv('../../data/outputs/outputs_cv-blocks-geometry.csv',dtype='str')

# flight_data = pd.read_csv("/Volumes/easystore/Drones/compiled-flight-data-gpx.csv",dtype=str)
# flight_data.head()

Loading data: 0it [00:00, ?it/s]

Loading data: 0it [00:00, ?it/s]

Loading data: 0it [00:00, ?it/s]

In [17]:
calls_for_service.shape[0]

184654

In [18]:
cv_blocks.shape[0]

1959

### Flights and Flight Details

In [19]:
cvpd_flights = flights[flights['department'] == 'cvpd'].copy()
cvpd_flights["date"] = pd.to_datetime(cvpd_flights["date"])

mask = (cvpd_flights['date'] >= '2021-07-01 00:00') & (cvpd_flights['date'] <= '2023-09-02 00:00')

cvpd_flights = cvpd_flights[mask].sort_values('date').copy()
cvpd_flights['clean-type'] = cvpd_flights['type'].str.lower()
cvpd_flights['clean-type'] = cvpd_flights['clean-type'].apply(lambda x: str(x).replace('  ','').strip())
cvpd_flights["match"] = cvpd_flights["incident_id"].astype(str).apply(lambda x: x.strip().replace(' ','').replace('L','0').replace('l','0')[-5:])

cvpd_flights.to_csv('../../data/outputs/outputs_cv-flight-manifest.csv',index=False)

  cvpd_flights["date"] = pd.to_datetime(cvpd_flights["date"])


In [20]:
flight_details["GEOID20"] = flight_details["geoid"].astype(str)
flight_details["len"] = flight_details["GEOID20"].apply(lambda x: len(x))
flight_details.loc[flight_details["len"] == 14, "GEOID20"] = "0" + flight_details["GEOID20"]
flight_details["len"] = flight_details["GEOID20"].apply(lambda x: len(x))


### Calls for Service

In [21]:
calls_for_service['len'] = calls_for_service['Full FIPS (block)'].astype(str).apply(lambda x: len(x))

calls_for_service['GEOID20'] = calls_for_service['Full FIPS (block)']
calls_for_service["match"] = (
    calls_for_service["Incident No."].astype(str).apply(lambda x: x.strip().replace(' ','').replace('l','0').replace('L','0')[-5:])
)
calls_for_service['clean-type'] = calls_for_service['Description'].str.lower()
calls_for_service['clean-type'] = calls_for_service['clean-type'].apply(lambda x: str(x).replace('  ','').strip())
calls_for_service.to_csv('../../data/outputs/outputs_cfs-data.csv',index=False)

In [22]:
calls_for_service.shape[0]

184654

## Group by Block

### Call for Service

In [27]:
call_for_service_block_count = calls_for_service.groupby(["GEOID20"]).count()
call_for_service_block_count = call_for_service_block_count.reset_index()[
    ["GEOID20", "Incident No."]
]
call_for_service_block_count.columns = ["GEOID20", "call_count"]
call_for_service_block_count.head()

Unnamed: 0,GEOID20,call_count
0,60250102002026,1
1,60650306022000,2
2,60650445182017,1
3,60650451232003,1
4,60650453021004,2


In [28]:
call_for_service_block_count_with_shps = pd.merge(
    cv_blocks, call_for_service_block_count, how="left", on=["GEOID20"]
)

In [29]:
call_for_service_block_count_with_shps["GEOID20"].apply(
    lambda x: len(x)
).drop_duplicates()


0    15
Name: GEOID20, dtype: int64

In [30]:
call_for_service_block_count_with_shps[
    "call_count"
] = call_for_service_block_count_with_shps["call_count"].replace(np.nan, 0)

call_for_service_block_count_with_shps['weight'] = call_for_service_block_count_with_shps['call_count'].astype(int)/call_for_service_block_count_with_shps['POP20'].astype(int)

call_for_service_block_count_with_shps[
    "weight"
] = call_for_service_block_count_with_shps["weight"].replace(np.nan, 0)
call_for_service_block_count_with_shps[
    "weight"
] = call_for_service_block_count_with_shps["weight"].replace(np.inf, 0)


In [31]:
call_for_service_block_count_with_shps.to_csv('../../data/outputs/outputs_cfs-data-with-shps.csv',index=False)

### Drone Seconds in Block

In [51]:
flight_details = flight_details[flight_details['id'].isin(cvpd_flights['id'])].copy()

In [3]:
# Strip the millisecond component from the 'sequence' timestamps
flight_details['sequence'] = pd.to_datetime(flight_details['sequence'])
flight_details['sequence_seconds'] = flight_details['sequence'].dt.floor('S')

# Now count the number of unique seconds in each block
unique_seconds_per_block_seconds = flight_details.groupby('GEOID20')['sequence_seconds'].nunique()

unique_seconds_per_block_seconds.head()


KeyboardInterrupt: 

In [40]:
unique_seconds_per_block_seconds = pd.DataFrame(unique_seconds_per_block_seconds).reset_index()

In [41]:
unique_seconds_per_block_seconds.columns = ['GEOID20','seconds']

In [42]:
unique_seconds_per_block_seconds

Unnamed: 0,GEOID20,seconds
0,060730032041009,32
1,060730032041012,35
2,060730032041013,5525
3,060730032041014,2675
4,060730032041015,217
...,...,...
1610,060730219001089,225
1611,060730219001090,26
1612,060730219001091,607
1613,060730219001092,4223


In [44]:
unique_seconds_in_block_with_shps = pd.merge(
    cv_blocks, unique_seconds_per_block_seconds, how="left", on=["GEOID20"]
)
unique_seconds_in_block_with_shps["seconds"] = unique_seconds_in_block_with_shps[
    "seconds"
].replace(np.nan, 0)


In [45]:
unique_seconds_in_block_with_shps['ALAND20'] = unique_seconds_in_block_with_shps['ALAND20'].astype(int)

In [46]:
unique_seconds_in_block_with_shps["weight"] = (
    unique_seconds_in_block_with_shps["seconds"]
    / unique_seconds_in_block_with_shps["ALAND20"]
)

In [47]:

unique_seconds_in_block_with_shps = pd.merge(unique_seconds_in_block_with_shps,call_for_service_block_count_with_shps[['GEOID20','call_count']],how='left',on='GEOID20')

In [52]:
unique_seconds_in_block_with_shps.to_csv('../../data/outputs/outputs_drone-seconds-block-count.csv',index=False)

### Drone Days / CFS in Block
- 792 days in analysis

In [38]:
from datetime import date
 
def numOfDays(date1, date2):
    if date2 > date1:   
        return (date2-date1).days
    else:
        return (date1-date2).days

start= date(2021, 7, 1)
end = date(2023, 9, 1)

days_in_analysis = numOfDays(start,end)

In [49]:
def calculate_summary(index):
    block = cv_blocks.iloc[index]
    drones_on_block = flight_details[flight_details['GEOID20']==block['GEOID20']] 
    calls_to_block = calls_for_service[calls_for_service['GEOID20']==block['GEOID20']] 

    block['num_flights'] = drones_on_block['id'].drop_duplicates().shape[0]
    block['flights_per_day'] = block['num_flights']/days_in_analysis
    block['num_calls'] = calls_to_block.shape[0]
    block['calls_per_day'] = block['num_calls']/days_in_analysis
    return block

In [55]:
from multiprocess.pool import Pool

with Pool(10) as pool:

    drones_cfs_pdpb = list(
        tqdm(pool.imap(calculate_summary, range(0, cv_blocks.shape[0])), total=cv_blocks.shape[0])
    )
    drones_cfs_pdpb = pd.DataFrame(drones_cfs_pdpb)

  0%|          | 0/1959 [00:00<?, ?it/s]

In [58]:
drones_cfs_pdpb.to_csv('../../data/outputs/output_cv-blocks-drones-calls.csv',index=False)

In [60]:
drones_cfs_pdpb.sort_values('num_flights').tail()

Unnamed: 0,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,MTFCC20,UR20,UACE20,UATYPE20,...,INTPTLAT20,INTPTLON20,HOUSING20,POP20,geometry,len,num_flights,flights_per_day,num_calls,calls_per_day
1920,6,73,12700,3000,60730127003000,Block 3000,G5040,U,78661.0,U,...,32.6382813,-117.0855165,363,837,"POLYGON ((-117.088058 32.639014, -117.087241 3...",15,1784,2.252525,2384,3.010101
174,6,73,12302,1012,60730123021012,Block 1012,G5040,U,78661.0,U,...,32.6392976,-117.0811867,94,216,"POLYGON ((-117.083646 32.639302, -117.083247 3...",15,1937,2.445707,346,0.436869
387,6,73,13102,2001,60730131022001,Block 2001,G5040,U,78661.0,U,...,32.61484,-117.0718154,516,1466,"POLYGON ((-117.074527 32.615993, -117.074092 3...",15,2408,3.040404,422,0.532828
1223,6,73,13102,2000,60730131022000,Block 2000,G5040,U,78661.0,U,...,32.6182878,-117.0731641,326,862,"POLYGON ((-117.075881 32.619439, -117.075064 3...",15,2960,3.737374,417,0.526515
1787,6,73,12302,1013,60730123021013,Block 1013,G5040,U,78661.0,U,...,32.6399935,-117.0828161,130,307,"POLYGON ((-117.083982 32.640144, -117.082001 3...",15,3519,4.443182,65,0.082071


In [24]:
unique_days_in_block

Unnamed: 0,GEOID20,days
0,060730032041009,1
1,060730032041012,2
2,060730032041013,35
3,060730032041014,17
4,060730032041015,2
...,...,...
1610,060730219001089,4
1611,060730219001090,5
1612,060730219001091,8
1613,060730219001092,36
