# Transparency

tktk

## Load Data

In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm, trange
from loguru import logger
import datetime
import geopy.distance
import fiona

fiona.drvsupport.supported_drivers['kml'] = 'rw'
fiona.drvsupport.supported_drivers['KML'] = 'rw'
import warnings
warnings.simplefilter(action='ignore')

storage = "/Volumes/easystore/Drones/"

cv_flights = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"../../data/outputs/outputs_cv-flight-manifest.csv",
                chunksize=100000,
                dtype=str,
            ),
            desc="Loading data",
        )
    ]
)
per_second = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"../../data/outputs/outputs_drone-seconds-block-count.csv",
                chunksize=100000,
                dtype=str,
            ),
            desc="Loading data",
        )
    ]
)
cfs = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"../../data/outputs/outputs_cfs-data.csv",
                chunksize=100000,
                dtype=str,
            ),
            desc="Loading data",
        )
    ]
)
cv_flights["drone-date"] =  pd.to_datetime(cv_flights['date'] + " " + cv_flights['time_s'])
cfs["cfs-date"] = pd.to_datetime(cfs["Date"])

# flight_data = pd.read_csv("/Volumes/easystore/Drones/compiled-flight-data-gpx.csv",dtype=str)
# flight_data.head()

Loading data: 0it [00:00, ?it/s]

Loading data: 0it [00:00, ?it/s]

Loading data: 0it [00:00, ?it/s]

In [2]:
logger.info(f"Flights from: {cv_flights.sort_values('drone-date').head()['drone-date'].values[-1]} until {cv_flights.sort_values('drone-date').tail()['drone-date'].values[-1]}")
logger.info(f"Calls from: {cfs.sort_values('cfs-date').head()['cfs-date'].values[-1]} until {cfs.sort_values('cfs-date').tail()['cfs-date'].values[-1]}")

[32m2024-05-20 12:46:02.764[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mFlights from: 2021-07-01T12:09:00.000000000 until 2023-09-02T23:27:00.000000000[0m
[32m2024-05-20 12:46:02.918[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mCalls from: 2021-07-01T00:40:26.430000000 until 2023-09-01T23:35:39.883000000[0m


## Flights not Connected to a Call for Service

### Matching Incident IDs

In [3]:
cv_flights["match"] = cv_flights["incident_id"].astype(str).apply(lambda x: x.strip().replace(' ','').replace('L','0').replace('l','0')[-5:])

cfs["match"] = (
    cfs["Incident No."].astype(str).apply(lambda x: x.strip().replace(' ','').replace('l','0').replace('L','0')[-5:])
)

matches = cv_flights[cv_flights["match"].isin(cfs["match"])]
matches_joined_cfs = pd.merge(matches, cfs, how="left", on="match",suffixes=['-drones','-cfs'])

matches_joined_cfs = matches_joined_cfs[
    matches_joined_cfs["drone-date"].dt.date == matches_joined_cfs["cfs-date"].dt.date
]
flights_with_cfs  = matches_joined_cfs.copy()

no_matching_cfs = cv_flights[~cv_flights['id'].isin(flights_with_cfs['id'])].copy()

logger.info(f"Total Flights: {cv_flights.shape[0]}")
logger.info(f"Flights with NaN Incident IDS: {no_matching_cfs[no_matching_cfs['incident_id'].astype(str).str.contains('nan',case=False)].shape[0]}")
logger.info(f"Total unmatched flights: {no_matching_cfs.shape[0]}")

[32m2024-05-20 12:46:12.628[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m17[0m - [1mTotal Flights: 9559[0m
[32m2024-05-20 12:46:12.630[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m18[0m - [1mFlights with NaN Incident IDS: 898[0m
[32m2024-05-20 12:46:12.630[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mTotal unmatched flights: 1871[0m


In [132]:
898/9559

0.09394288105450362

In [126]:
nan_flights = no_matching_cfs[no_matching_cfs['incident_id'].astype(str).str.contains('nan',case=False)].copy()

### Matching by Path and CFS Location

Find calls that occured within 90 minutes of a drone flight and record the top five blocks where they spent time

In [127]:
orphan_flight_blocks = flight_blocks[flight_blocks['id'].isin(nan_flights['id'])]
orphan_flight_blocks['seconds'] = orphan_flight_blocks['seconds'].astype(int)

possible_cfs = []
for _, row in tqdm(no_matching_cfs[no_matching_cfs['incident_id'].astype(str).str.contains('nan',case=False)].sample(frac=1).iterrows()):
    ceiling = row['drone-date'] + datetime.timedelta(minutes = 1)
    floor = row['drone-date'] - datetime.timedelta(minutes = 90)

    blocks = orphan_flight_blocks[orphan_flight_blocks['id']==row['id']]
    lingering_blocks = ','.join(blocks.sort_values('seconds').tail(5)['GEOID20'].values)
    
    mask = (cfs['cfs-date'] > floor) & (cfs['cfs-date'] < ceiling)
    possible_calls = cfs.loc[mask].copy()
    possible_calls['possible-drone-id'] = row['id']
    possible_calls['drone-incident-id'] = row['incident_id']
    possible_calls['drone-date'] = row['drone-date']
    possible_calls['drone-type'] = row['type']
    possible_calls['drone-type'] = row['type']
    possible_calls['drone-address'] = row['address_map']
    possible_calls['drone-FIPS'] = lingering_blocks

    # possible_calls = possible_calls[possible_calls['Full FIPS (block)']==row['Full FIPS (block)']].copy()
    
    possible_cfs.append(possible_calls)
possible_cfs = pd.concat(possible_cfs)  

0it [00:00, ?it/s]

#### Was one the drone, at any point, less than half a mile away from a call

In [134]:
possible_cfs_by_distance = []
for _, flight in tqdm(nan_flights.iterrows()):
#   find possible calls by time
    possible_cfs_for_flight = possible_cfs[possible_cfs['possible-drone-id']==flight['id']]
    
    orphan_flight = orphan_flight_blocks[orphan_flight_blocks['id']==flight['id']].copy()
    orphan_flight['seconds'] = orphan_flight['seconds'].astype(int)

    lingering_blocks = orphan_flight
    
    if lingering_blocks.shape[0]>0:
        for _, pcfs in possible_cfs_for_flight.iterrows():
            for _, block in lingering_blocks.iterrows():
                if str(block['INTPTLAT20']) != "nan":
                    try:
                        distance = geopy.distance.geodesic((pcfs['Latitude'],pcfs['Longitude']),(block['INTPTLAT20'],block['INTPTLON20'])).miles
                        if distance <= .5:
                            pcfs["lingering-block"] = block['GEOID20']
                            pcfs["distance"] = distance
                            possible_cfs_by_distance.append(pcfs)
                    except:
                        logger.error(f"{(pcfs['Latitude'],pcfs['Longitude']),(block['INTPTLAT20'],block['INTPTLON20'])}")

possible_cfs_by_distance = pd.DataFrame(possible_cfs_by_distance)         

0it [00:00, ?it/s]

In [136]:
nan_flights[~nan_flights['id'].isin(possible_cfs_by_distance['possible-drone-id'])]

Unnamed: 0,lat_map,lon_map,time,id,date,time_s,address_map,incident_id,type,department,clean-type,match,drone-date
89,32.6408719,-117.0989442,5:19pm,56cc862e1e47b73ed2a423685a0d781e,2021-07-07,5:19pm,,,,cvpd,,,2021-07-07 17:19:00
120,32.61739667,-117.07165328,6:18pm,d6fa0dbda343a1ed169f0999fa0ad40a,2021-07-09,6:18pm,330 Moss St,,,cvpd,,,2021-07-09 18:18:00
181,32.596614020211,-117.021563539355,2:46pm,f4861ddf2a3e4e2b698aeaf5c130747e,2021-07-13,2:46pm,739 Design Ct,,,cvpd,,,2021-07-13 14:46:00
183,32.5977906002576,-117.02117189926,3:21pm,cb6ac060e8a888984ec6f734a515abd4,2021-07-13,3:21pm,1800 Maxwell Rd,,,cvpd,,,2021-07-13 15:21:00
186,32.5978288675502,-117.021179795161,3:37pm,281e49b6f1d692c095a5ccde46af43d6,2021-07-13,3:37pm,1800 Maxwell Rd,,,cvpd,,,2021-07-13 15:37:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9017,32.6402645315427,-117.081045997832,10:31pm,65c3e998986414faaff0c76f4ecd6f2c,2023-08-02,10:31pm,332 F St,,,cvpd,,,2023-08-02 22:31:00
9031,32.645054433498,-116.995017439052,10:56am,a504dd706091aac3dff979c8f3d63e2d,2023-08-02,10:56am,1616 Columbia St,,,cvpd,,,2023-08-02 10:56:00
9042,32.63996862,-117.08271187,6:29pm,1ea1b7f6e87976c33c021c43f69dd8ad,2023-08-03,6:29pm,315 4th Ave,,,cvpd,,,2023-08-03 18:29:00
9112,32.6174121141753,-117.071669084666,8:50am,45e87cbd3073fcb14a8b2394f3f09836,2023-08-06,8:50am,330 Moss St,,,cvpd,,,2023-08-06 08:50:00


## Spit out mystery flights

In [129]:
compiled_flights =[] 

for _, row in tqdm(nan_flights[~nan_flights['id'].isin(possible_cfs_by_distance['possible-drone-id'])].iterrows()):
    
    d = gpd.read_file(f"/Volumes/easystore/Drones/flights/kml/{row['id']}.kml",driver='KML')
    d['drone-id'] = row['id']
    d['address_map'] = row['address_map']
    d['drone-date'] = row['drone-date']
    d['incident_id'] = row['incident_id']
    d['type']=row['type']
     
    compiled_flights.append(d)
    

0it [00:00, ?it/s]

In [133]:
nan_flights.to_csv('./nan-flights.csv',index=False)