In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
import re
import json
import os
import geocoder 


# https://www.bingmapsportal.com; https://geocoder.readthedocs.io/providers/Bing.html
global BING_MAPS_KEY
BING_MAPS_KEY = os.environ.get('BING_MAPS')

def getLatLon(location):
    try:
        g = geocoder.bing(location, key=BING_MAPS_KEY)
        results = g.json
        return (results['lng'], results['lat'])
    except Exception as e:
        print('Error: {}'.format(e))
        return 'Location Not Found'

def evalCoord(coord):
    return eval(coord)


def toMinutes(t):
    tl = t.split(' ')
    unit = tl[-1]
    try:
        if unit == 'minutes':
            return float(tl[0])
        elif unit == 'seconds':
            return float(tl[0]) / 60
        elif unit == 'hours':
            return float(tl[0]) * 60
    except:
        print(tl)
        return t

def cleanTable(file):
    """
    Clean dataframe of UFO sightings
    """
    # read data
    df = pd.read_csv(
        file, 
        parse_dates=['Date / Time', 'Posted'],
        dtype={
            'Country':str, 
            'State':str, 
            'City':str,
            'Shape':str, 
            'Duration':str,
            'Summary':str,
            'Images':str
            })
    # filter columns; rename
    df = df.rename(columns={'Date / Time': 'Timestamp'})
    # filter to USA (excluding minor outlying islands)
    df = df.loc[df.Country == 'USA'].reset_index(
        drop=True).drop(columns='Country')
    # read json duration data
    with open('../src/data/durations.json') as f:
        durations = json.loads(f.read())
        f.close()
    # map to `Updated_Duration`; impute null with median
    df["Updated_Duration"] = df.Duration.map(durations).replace({'nan': np.nan})
    df.loc[~df.Updated_Duration.isna(), 'Updated_Duration'] = df.loc[~df.Updated_Duration.isna()].Updated_Duration.apply(toMinutes)
    df.loc[df.Updated_Duration.isna(), 'Updated_Duration'] = np.median(df.Updated_Duration.values)
    # get lat lon
    if os.path.exists('data/locations.csv'):
        locations = pd.read_csv('data/locations.csv')
        df['Coords'] = locations.Coords.apply(evalCoord)
        pass
    else:
        df['Coords'] = (df.City + ', ' + df.State).apply(getLatLon)
        df[['City', 'State', 'Coords']].to_csv('data/locations.csv', index=False)
    return df


date_start = '202107'
date_end = '202206'
file = "../src/data/UFOs{}-{}.csv".format(date_start, date_end)
df = cleanTable(file=file)

In [3]:
from shapely.ops import unary_union
import pickle
import geopandas as gpd

states = gpd.read_file('../src/data/state.shp')
# filter states to main USA body
states = states.loc[~states.STATE.isin([
    'United States Virgin Islands', 
    'Alaska', 
    'Hawaii', 
    'Commonwealth of the Northern Mariana Islands', 
    'Guam'])]

if os.path.exists('data/US_body'):
    # Load US_body from disc
    with open('data/US_body', "rb") as poly_file:
        US_body = pickle.load(poly_file)
else:   
    US_body = unary_union(states.geometry)
    # Save polygon to disc
    with open('data/US_body', "wb") as poly_file:
        pickle.dump(US_body, poly_file, pickle.HIGHEST_PROTOCOL)

In [5]:
import numpy as np
from inpoly import inpoly2

coords = np.array(df.Coords.values.tolist())

array([[-122.15556335,   38.05392456],
       [-121.98082733,   37.55020142],
       [-120.84700775,   38.68222427],
       ...,
       [ -85.37796783,   32.65088272],
       [ -93.44268799,   45.20758438],
       [ -74.91532898,   41.47310257]])

In [6]:
from shapely.geometry import Point
df.loc[df.Coords.apply(Point).isin(US_body)]

  values = _ensure_arraylike(list(values))
  values = _ensure_arraylike(list(values))
Exception ignored in: <function BaseGeometry.__del__ at 0x00000271E1921630>
Traceback (most recent call last):
  File "c:\Users\bento\OneDrive\code_and_data\ufo-from-vector-space\env\lib\site-packages\shapely\geometry\base.py", line 209, in __del__
    self._empty(val=None)
  File "c:\Users\bento\OneDrive\code_and_data\ufo-from-vector-space\env\lib\site-packages\shapely\geometry\base.py", line 191, in _empty
    def _empty(self, val=EMPTY):
KeyboardInterrupt: 


In [None]:
from shapely.geometry import Point
from geopandas import GeoDataFrame
import matplotlib.pyplot as plt

geometry = [Point(c) for c in df.Coords]
gdf = GeoDataFrame(df, geometry=geometry)
gdf.plot(ax=states.plot(figsize=(15,15)), marker='o', color='red', markersize=15)
plt.show()