# Finding Anomolies

## Setup

### Import Data

In [1]:
import os

import fiona
import geopandas as gpd
import gpxpy
import numpy as np
import pandas as pd
from multiprocess.pool import Pool
from tqdm.notebook import tqdm, trange
import warnings
import geopy.distance

warnings.filterwarnings('ignore')

fiona.drvsupport.supported_drivers['kml'] = 'rw'
fiona.drvsupport.supported_drivers['KML'] = 'rw'

storage = "/Volumes/easystore/Drones/"

flight_details = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/gpx-with-census-data.csv", chunksize=100000, dtype=str
            ),
            desc="Loading data",
        )
    ]
)

Loading data: 0it [00:00, ?it/s]

In [2]:
geocodio_flights = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/geocodio/all-flights-manifest_geocodio.csv",
                chunksize=100000,
                dtype=str,
            ),
            desc="Loading data",
        )
    ]
)
# a = pd.read_csv('../../data/outputs/outputs_cv-flight-manifest.csv')

Loading data: 0it [00:00, ?it/s]

In [41]:
geocodio_flights[geocodio_flights['type'].astype(str).str.contains('transient',case=False)]


Unnamed: 0,lat_map,lon_map,time,id,date,time_s,address_map,incident_id,type,department,...,Census Block Group,Full FIPS (block),Full FIPS (tract),Metro/Micro Statistical Area Name,Metro/Micro Statistical Area Code,Metro/Micro Statistical Area Type,Combined Statistical Area Name,Combined Statistical Area Code,Metropolitan Division Area Name,Metropolitan Division Area Code
429,32.6426023,-117.081293,10:52am,3ccf2a53828e1338bdd335ee5e60b67c,2021-04-03,10:52am,251 LANDIS,L27099,NAKED TRANSIENT,cvpd,...,1,60730123021005,6073012302,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
553,32.64015576,-116.99921456,9:20am,f6111833fdeada6b0dc59e5cac3a93a6,2021-05-27,9:20am,"Racquetball courts, 900 Otay Lakes Rd",CVL043701,TRANSIENT DISTURBANCE,cvpd,...,1,60730134151000,6073013415,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
681,32.6378566,-117.092163,12:52pm,cf2796701c8aa22febd7e5120ef85e6b,2021-05-16,12:52pm,300 Broadway,CVL040456,Disturbance-Transient,cvpd,...,2,60730127002000,6073012700,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
708,32.6447203,-117.0812257,11:52am,e821d939b251cb266c74ba8028dce8c7,2021-05-14,11:52am,200 block of Third Ave.,L039788,disturbance of transient sleeping near front d...,cvpd,...,1,60730123021001,6073012302,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
916,32.6474041,-117.0648355,12:30pm,0acb4bbed89ead37f8e05e0242e22f42,2021-05-01,12:30pm,88 Bonita Rd,CVL035883,Transient Causing Disturbance,cvpd,...,2,60730123042002,6073012304,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
917,32.6048519,-117.0790391,11:11am,4cda8e4a33d600ecfb2a2bdd9887d8b6,2021-05-01,11:11am,1255 Broadway,CVL035878,Transient Causing Disturbance,cvpd,...,2,60730132062000,6073013206,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
1420,32.6928306,-117.1272633,10:15am,bb238996189303c894ec6f33f48f1990,2021-07-29,10:15am,3100 MAIN,L063009,TRANSIENT CAUSING PROBLEMS,cvpd,...,3,60730039023017,6073003902,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
1432,32.6097324,-117.0805137,5:03pm,c2d0438498be9d3428076197a85df4ac,2021-07-28,5:03pm,1111 BROADWAY,L062748,TRANSIENT CAUSING PROBLEMS,cvpd,...,3,60730131043001,6073013104,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
1437,32.6119528,-117.0821423,3:56pm,d229776d7d8f37a07d5703a8b564ae74,2021-07-28,3:56pm,600 CRESTED BUTTE,L062764,TRANSIENT CAUSING PROBLEMS,cvpd,...,2,60730131032010,6073013103,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,
1447,32.6463843,-117.0905977,11:15am,c166aaa405f3d14ba05b18013ea51ecc,2021-07-28,11:15am,00 5TH,L062678,TRANSIENT CAUSING PROBLEMS,cvpd,...,2,60730124012001,6073012401,"San Diego-Chula Vista-Carlsbad, CA",41740,metropolitan,,,,


In [3]:
flight_details["geoid"] = flight_details["geoid"].astype(str)
flight_details["len"] = flight_details["geoid"].apply(lambda x: len(x))
flight_details.loc[flight_details["len"] == 14, "geoid"] = "0" + flight_details["geoid"]


In [4]:
geocodio_flights["date"] = pd.to_datetime(geocodio_flights["date"]).dt.date


In [5]:
flight_details['altitude'] = flight_details['altitude'].astype(float)

In [6]:
flight_details['altitude'].mean()

130.50396990973857

In [27]:
# flight_ids = stable_flights.sample(1000)['id'].drop_duplicates()
potentially_low_boys = flight_details[flight_details['altitude'] <= 12]
geocodio_flights = geocodio_flights[geocodio_flights['department']=='cvpd']

flight_ids = potentially_low_boys[['id']].sample(frac=1).drop_duplicates()
flight_ids = flight_ids[flight_ids['id'].isin(geocodio_flights['id'])]['id']


### Launch Sites
- BayView Hospital = 60730131022000
- CVPD = 60730123021013
- SharpChula Medical Center = 60730133273000
- SWestern College = 60730134151000


In [33]:
potentially_low_boys = potentially_low_boys[potentially_low_boys['id'].isin(flight_ids)]
# t[['id','type','incident_id','address_map','sequence','latitude','longitude','altitude']].drop_duplicates().to_csv('./test.csv')

bad_ids = potentially_low_boys.groupby('geoid').count().sort_values('id').tail(5).reset_index()[['geoid']]
potentially_low_boys[~potentially_low_boys['geoid'].isin(bad_ids['geoid'])][['id','type']].drop_duplicates().sort_values('type').head(30)

Unnamed: 0,id,type
311625,c05f388ec44bf6c1c5a1d166436e7a80,415 Subject
4569140,6857da3156777fbdf6e01add0f6a93c5,Assist Hot Team
100441,b1610ced3d0f7e2315e2f9880867fa08,DUI Checkpoint
1463550,d9dd948fc30c1551fc4ca1a1cda05548,DUI Checkpoint
3784914,344689fff6665350cf6326ff2c06a57d,Domestic Violence
285742,3568e1f67005c9422221f4db425cd078,Homeless Outreach
286742,30308672f8dcdbbafb721303f83c132d,Homeless Outreach
289375,35de6467bec612fb498eb60aa43ca7b6,Homeless Outreach
290216,5d653410cd752592043ef2281db14028,Homeless Outreach
292235,1dda6c8eb1304675ac3a4f397eb943d1,Homeless Outreach


In [93]:
potentially_low_boys['altitude']=round(potentially_low_boys['altitude'])
potentially_low_boys=potentially_low_boys[potentially_low_boys['geoid']!='060730123021013']

In [103]:
potentially_low_boys = potentially_low_boys[potentially_low_boys['sequence'].astype(str).str.contains('2023')]

In [110]:
potentially_low_boys[['id','type','incident_id','address_map','latitude','longitude','altitude','sequence']].drop_duplicates().to_csv('./low-boys.csv',index=False)

In [104]:
compiled_flights =[] 

for r_id in potentially_low_boys['id'].drop_duplicates().values:
    
    d = gpd.read_file(f"/Volumes/easystore/Drones/flights/kml/{r_id}.kml",driver='KML')
    d['id'] = r_id
    compiled_flights.append(d)


In [109]:
paths = pd.concat(compiled_flights,ignore_index=True)
paths = paths[paths['Name']!="Home"]
paths[['geometry','id']].drop_duplicates().to_csv('./low-paths.csv',index=False)

In [111]:
paths

Unnamed: 0,Name,Description,geometry,id
1,Airdata.com,,"LINESTRING Z (-117.07164 32.61740 34.86653, -1...",d2bdc8877bdb078651008ce731d44a5a
3,Airdata.com,,"LINESTRING Z (-117.07164 32.61740 34.86653, -1...",8a33d73561e523eedeae76178b0e5229
5,Airdata.com,,"LINESTRING Z (-117.07166 32.61738 34.98082, -1...",6285b1e8aaf7455e76174c9335e6f56b
7,Airdata.com,,"LINESTRING Z (-117.07542 32.59255 11.13193, -1...",0d627ef435c0242351203e7fa247970f
9,Airdata.com,,"LINESTRING Z (-117.07168 32.61742 34.89764, -1...",6e36b515b6162150bbc19d1b65027d98
...,...,...,...,...
2799,Airdata.com,,"LINESTRING Z (-117.07167 32.61743 34.87211, -1...",6d2a6c98567541788cf6d5effd7e9cc2
2801,Airdata.com,,"LINESTRING Z (-117.07168 32.61744 34.99027, -1...",c9d3f44b57795f6babda502e37762090
2803,Airdata.com,,"LINESTRING Z (-117.07167 32.61743 34.87211, -1...",4019ff5b921f49e5cca46120cf7ba79f
2805,Airdata.com,,"LINESTRING Z (-117.07167 32.61744 34.98535, -1...",c3c86e9941fa457323fba31583193c77
