# Flight Stats

On average, our analysis found that each drone flight passes above 12 census blocks, and potentially exposes approximately 4,700 of the residents below to a drone’s camera, .


## Import Data

In [6]:
import os

import fiona
import geopandas as gpd
import gpxpy
import numpy as np
import pandas as pd
from multiprocess.pool import Pool
from tqdm.notebook import tqdm, trange
import warnings
import geopy.distance
from loguru import logger 

warnings.filterwarnings('ignore')

fiona.drvsupport.supported_drivers['kml'] = 'rw'
fiona.drvsupport.supported_drivers['KML'] = 'rw'

storage = "/Volumes/easystore/Drones/"


flight_details = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/gpx-with-census-data.csv", chunksize=100000, dtype=str
            ),
            desc="Loading data",
        )
    ]
)
cv_flights = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"../../data/outputs/outputs_cv-flight-manifest.csv", chunksize=100000, dtype=str
            ),
            desc="Loading data",
        )
    ]
)


cv_blocks = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"../../data/outputs/outputs_cv-blocks-geometry.csv",
                chunksize=100000,
                dtype=str,
            ),
            desc="Loading data",
        )
    ]
)

# flight_data = pd.read_csv("/Volumes/easystore/Drones/compiled-flight-data-gpx.csv",dtype=str)
# flight_data.head()

Loading data: 0it [00:00, ?it/s]

Loading data: 0it [00:00, ?it/s]

Loading data: 0it [00:00, ?it/s]

## Limit Analysis

Limit the Flight Details we analyze to only include flights in the Chula Vista Analysis Manifest. In build data you can see which flights we include.  The rule is this:

- Limit Total Collected Data to Only CVPD : cvpd_flights = flights[flights['department'] == 'cvpd'].copy()
- Limit to this Date Range : mask = (cvpd_flights['date'] >= '2021-07-01 00:00') & (cvpd_flights['date'] <= '2023-09-02 00:00')}

In [7]:
flight_ids = flight_details[['id']].sample(frac=1).drop_duplicates()
flight_ids = flight_ids[flight_ids['id'].isin(cv_flights['id'])]['id']


## Calculate Flight Stats

A function to calculate the average number of blocks a flight travelled through as well as the number of households and people exposed.  This does not calculate the number of exposures.  Thats the next notebook. 

In [8]:

def exposure(index):
    flight_exposure = {}
    flight_id = flight_ids.iloc[index]
    flight = flight_details[flight_details['id']==flight_id]
    launch_date = pd.to_datetime(flight['sequence']).dt.date.values[0]
    
    # get unique geoid and census information for each block in the flight.  Theres a smarter way to do this, but i'm fried right now. 
    flight = flight[['id','geoid','population','housing']].drop_duplicates()
    flight_exposure['id'] = flight_id
    flight_exposure['population_exposed'] = flight['population'].astype(float).sum()
    flight_exposure['households_exposed'] = flight['housing'].astype(float).sum()
    flight_exposure['total_blocks'] = flight.shape[0]
    flight_exposure['date'] = launch_date

    return flight_exposure

In [9]:
exposure(21)

{'id': 'e586794645c566caffc7bc6244054e90',
 'population_exposed': 5219.0,
 'households_exposed': 1450.0,
 'total_blocks': 28,
 'date': datetime.date(2022, 3, 3)}

In [10]:
flight_exposure = {}
flight_id = "bb7d42abac90b498ceac2cf15b642458"
flight = flight_details[flight_details['id']==flight_id]
launch_date = pd.to_datetime(flight['sequence']).dt.date.values[0]

# get unique geoid and census information for each block in the flight.  Theres a smarter way to do this, but i'm fried right now. 
flight = flight[['id','geoid','population','housing']].drop_duplicates()
flight_exposure['id'] = flight_id
flight_exposure['population_exposed'] = flight['population'].astype(float).sum()
flight_exposure['households_exposed'] = flight['housing'].astype(float).sum()
flight_exposure['total_blocks'] = flight.shape[0]
flight_exposure['date'] = launch_date

flight_exposure


{'id': 'bb7d42abac90b498ceac2cf15b642458',
 'population_exposed': 2498.0,
 'households_exposed': 860.0,
 'total_blocks': 11,
 'date': datetime.date(2023, 7, 10)}

In [11]:
flight_details[flight_details['id']==flight_id]

Unnamed: 0,id,type,incident_id,address_map,sequence,longitude,latitude,altitude,success,geoid,block,block_group,tract,county,state,county_name,state_name,population,housing
18714193,bb7d42abac90b498ceac2cf15b642458,Noise Complaint,L057992,00 K St,2023-07-10 02:39:37+00:00,-117.0717,32.6174,34.9496422,True,60730131022000,2000,2,13102,73,6,San Diego County,California,861,326
18714194,bb7d42abac90b498ceac2cf15b642458,Noise Complaint,L057992,00 K St,2023-07-10 02:39:38+00:00,-117.0717,32.6174,34.9496422,True,60730131022000,2000,2,13102,73,6,San Diego County,California,861,326
18714195,bb7d42abac90b498ceac2cf15b642458,Noise Complaint,L057992,00 K St,2023-07-10 02:55:08+00:00,-117.0717,32.6175,35.1496422,True,60730131022000,2000,2,13102,73,6,San Diego County,California,861,326
18714196,bb7d42abac90b498ceac2cf15b642458,Noise Complaint,L057992,00 K St,2023-07-10 02:55:08+00:00,-117.0717,32.6175,35.2496422,True,60730131022000,2000,2,13102,73,6,San Diego County,California,861,326
18714197,bb7d42abac90b498ceac2cf15b642458,Noise Complaint,L057992,00 K St,2023-07-10 02:55:09+00:00,-117.0717,32.6175,35.3496422,True,60730131022000,2000,2,13102,73,6,San Diego County,California,861,326
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18717107,bb7d42abac90b498ceac2cf15b642458,Noise Complaint,L057992,00 K St,2023-07-10 03:17:52+00:00,-117.0717,32.6174,34.7496422,True,60730131022000,2000,2,13102,73,6,San Diego County,California,861,326
18717108,bb7d42abac90b498ceac2cf15b642458,Noise Complaint,L057992,00 K St,2023-07-10 03:17:52+00:00,-117.0717,32.6174,34.8496422,True,60730131022000,2000,2,13102,73,6,San Diego County,California,861,326
18717109,bb7d42abac90b498ceac2cf15b642458,Noise Complaint,L057992,00 K St,2023-07-10 03:17:53+00:00,-117.0717,32.6174,34.8496422,True,60730131022000,2000,2,13102,73,6,San Diego County,California,861,326
18717110,bb7d42abac90b498ceac2cf15b642458,Noise Complaint,L057992,00 K St,2023-07-10 03:17:54+00:00,-117.0717,32.6174,34.8496422,True,60730131022000,2000,2,13102,73,6,San Diego County,California,861,326


### Run the Code 

Multi Threaded to speed up the calculations.  I'm certain theres a better way to do this, but its not worth figuring out right now.  It takes 15 minutes to spit out the exposure dataframe.  Get a coffee.

In [12]:
# flight_ids = stable_flights.sample(1000)['id'].drop_duplicates()
flight_ids = flight_details[['id']].sample(frac=1).drop_duplicates()
flight_ids = flight_ids[flight_ids['id'].isin(cv_flights['id'])]['id']

with Pool(10) as pool:

    exposures = list(
        tqdm(pool.imap(exposure, range(0, flight_ids.shape[0])), total=flight_ids.shape[0])
    )
    exposures = pd.DataFrame(exposures)

  0%|          | 0/8742 [00:00<?, ?it/s]

In [15]:
pd.merge(cv_flights,exposures, on='id',how='left').to_csv('../viz/scatter.csv',index=False)

In [4]:
logger.info(f"On average each flight exposes {exposures['population_exposed'].mean()} people to a police drone")

NameError: name 'exposures' is not defined

In [5]:
logger.info(f"On average each flight passes through {exposures['total_blocks'].mean()} blocks")

NameError: name 'exposures' is not defined