In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import re
import json


def toMinutes(t):
    tl = t.split(' ')
    unit = tl[-1]
    try:
        if unit == 'minutes':
            return float(tl[0])
        elif unit == 'seconds':
            return float(tl[0]) / 60
        elif unit == 'hours':
            return float(tl[0]) * 60
    except:
        print(tl)
        return t

def cleanTable(file: str):
    """
    Clean dataframe of UFO sightings
    """
    # read data
    df = pd.read_csv(
        file, 
        parse_dates=['Date / Time', 'Posted'],
        dtype={
            'Country':str, 
            'State':str, 
            'City':str,
            'Shape':str, 
            'Duration':str,
            'Summary':str,
            'Images':str
            })
    # filter columns; rename
    df = df.rename(columns={'Date / Time': 'Timestamp'})
    # filter to USA (excluding minor outlying islands)
    df = df.loc[df.Country == 'USA'].reset_index(
        drop=True).drop(columns='Country')
    # read json duration data
    with open('../src/data/durations.json') as f:
        durations = json.loads(f.read())
        f.close()
    # map to `Updated_Duration`; impute null with median
    df["Updated_Duration"] = df.Duration.map(durations).replace({'nan': np.nan})
    df.loc[~df.Updated_Duration.isna(), 'Updated_Duration'] = df.loc[~df.Updated_Duration.isna()].Updated_Duration.apply(toMinutes)
    df.loc[df.Updated_Duration.isna(), 'Updated_Duration'] = np.median(df.Updated_Duration.values)
    # get lat lon
    
    return df


date_start = '202107'
date_end = '202206'
file = "../src/data/UFOs{}-{}.csv".format(date_start, date_end)
df = cleanTable(file=file)

In [3]:
df

Unnamed: 0,Timestamp,City,State,Shape,Duration,Summary,Posted,Images,Updated_Duration
0,2021-07-31 22:00:00,Benicia,CA,Sphere,1 hour,Experienced a UFO drive-by until it came down ...,2021-12-19,,60.0
1,2021-07-31 21:30:00,Fremont,CA,Oval,30,Blue lights and sharp maneuvers.,2021-08-16,,1.0
2,2021-07-31 21:15:00,El Dorado,CA,Light,15 min,"Unexplained lights in the sky, moving erratica...",2021-08-16,,15.0
3,2021-07-31 20:45:00,Windsor,VA,Circle,Only observed for a few m,Exceptionally big and bright star/planet-like ...,2021-08-16,,3.0
4,2021-07-31 18:30:00,Staton Island,NY,Other,30 Seconds,Black Icosahedron Approximately 10ft in Diameter,2022-03-04,Yes,0.5
...,...,...,...,...,...,...,...,...,...
3214,2022-06-01 09:08:00,Kansas City,MO,,,MADAR Node 40,2022-06-22,,1.0
3215,2022-06-01 04:49:00,Jacksonville,FL,Circle,2 hours,Witness three objects hovering in the sky for ...,2022-06-22,,120.0
3216,2022-06-01 03:40:00,Opelika,AL,Formation,2-3 seconds,Three orbs rotating flying through the sky,2022-06-22,,0.033333
3217,2022-06-01 03:36:00,Dayton,MN,Light,a few seconds,"Pulsing bright blue light flashing across sky,...",2022-06-22,,0.05


In [6]:
import os
import geocoder 


# https://www.bingmapsportal.com
global BING_MAPS_KEY
BING_MAPS_KEY = os.environ.get('BING_MAPS')

def getLatLon(location):
    try:
        g = geocoder.bing(location, key=BING_MAPS_KEY)
        results = g.json
        return (results['lat'], results['lng'])
    except:
        return 'Location Not Found'

(df.City + ', ' + df.State).apply(getLatLon)