# Finding Anomolies

## Setup

### Import Data

In [116]:
import os

import fiona
import geopandas as gpd
import gpxpy
import numpy as np
import pandas as pd
from multiprocess.pool import Pool
from tqdm.notebook import tqdm, trange
import warnings
import geopy.distance

warnings.filterwarnings('ignore')

fiona.drvsupport.supported_drivers['kml'] = 'rw'
fiona.drvsupport.supported_drivers['KML'] = 'rw'

storage = "/Volumes/easystore/Drones/"

lingering_anomolies = gpd.read_file("../../data/anomolies/lingering-one-mile-from-destination.geojson")
sd_county = pd.read_csv("../../data/mapping/sd_county-geometry.csv")

In [117]:
medium_sized_blocks = sd_county[sd_county['ALAND20']<100000]
medium_sized_blocks.to_csv("../../data/mapping/md-size_county-geometry.csv",index=False)

In [118]:
flight_details = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/gpx-with-census-data.csv", chunksize=100000, dtype=str
            ),
            desc="Loading data",
        )
    ]
)

Loading data: 0it [00:00, ?it/s]

In [119]:
flight_details = flight_details[flight_details['id'].isin(lingering_anomolies['id'])]
flight_details["geoid"] = flight_details["geoid"].astype(str)
flight_details["len"] = flight_details["geoid"].apply(lambda x: len(x))
flight_details.loc[flight_details["len"] == 14, "geoid"] = "0" + flight_details["geoid"]


In [120]:
geocodio_flights = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/geocodio/all-flights-manifest-extra-col_geocodio.csv",
                chunksize=100000,
                dtype=str,
            ),
            desc="Loading data",
        )
    ]
)

Loading data: 0it [00:00, ?it/s]

In [121]:
def calc_dist(flight_id):
    flight = geocodio_flights[geocodio_flights['id']==flight_id]
    dist = geopy.distance.geodesic((flight['lat_map'].values[0],flight['lon_map'].values[0]), (flight['Latitude'].values[0], flight['Longitude'].values[0])).miles
    return dist

geocodio_flights['map-delta'] = geocodio_flights['id'].apply(lambda x: calc_dist(x))

In [122]:
geocodio_flights[geocodio_flights['map-delta']<=.16].drop_duplicates().shape[0]

7890

In [123]:
geocodio_flights = geocodio_flights[geocodio_flights['id'].isin(lingering_anomolies['id'])]
geocodio_flights["date"] = pd.to_datetime(geocodio_flights["date"]).dt.date


In [124]:
lingering_anomolies['incident_type'] = lingering_anomolies['incident_type'].astype(str)
lingering_anomolies['destination_block']=lingering_anomolies['destination_block'].astype(float).astype(int)
lingering_anomolies['starting_block']=lingering_anomolies['starting_block'].astype(float).astype(int)
lingering_anomolies['lingering_block']=lingering_anomolies['lingering_block'].astype(float).astype(int)


lingering_anomolies = lingering_anomolies[lingering_anomolies['destination_block'] != lingering_anomolies['starting_block']]
lingering_anomolies = lingering_anomolies[lingering_anomolies['lingering_block'] != lingering_anomolies['destination_block']]
lingering_anomolies['id'].drop_duplicates().shape[0]


628

In [125]:
lingering_anomolies = lingering_anomolies[lingering_anomolies['id'].isin(geocodio_flights[geocodio_flights['map-delta']<=.25]['id'])]
lingering_anomolies['id'].drop_duplicates().shape[0]

90

In [126]:
lingering_anomolies = lingering_anomolies[lingering_anomolies['lingering_block'].isin(medium_sized_blocks['GEOID20'])]
lingering_anomolies['id'].drop_duplicates().shape[0]

32

In [127]:
lingering_anomolies = lingering_anomolies[lingering_anomolies['distance'].astype(float)<=5]
lingering_anomolies['id'].drop_duplicates().shape[0]

31

In [128]:
lingering_anomolies=lingering_anomolies[lingering_anomolies['lingering_block'].isin(medium_sized_blocks['GEOID20'])]
lingering_anomolies['id'].drop_duplicates().shape[0]

31

In [130]:
lingering_anomolies[['id','geometry','incident_type','distance','lingering_block','destination_block','longitude','latitude','address_map','starting_block']].to_file("../../data/anomolies/lingering-one-mile-from-destination-stable-flights.geojson", driver='GeoJSON')
