In [1]:
import gmaps
import gmaps.datasets
import gmaps.geojson_geometries
import json
from matplotlib.cm import viridis, plasma
from matplotlib.colors import to_hex
import matplotlib.pyplot as plt
import pandas as pd
from config import census_key, g_key
pd.set_option('display.max_columns', None)
# Configure gmaps
gmaps.configure(api_key=g_key)
ngeo = json.load(open("data/us_states.json"))

print(len(ngeo['features']))
print(ngeo.keys())
print()

52
dict_keys(['type', 'features'])



In [2]:
#UFO CSV Data

ufo_data = pd.read_csv('data/ufo_data.csv', low_memory=False)
ufo_data.head(6)

Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8830556,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.4180556,-157.803611
5,10/10/1961 19:00,bristol,tn,us,sphere,300,5 minutes,My father is now 89 my brother 52 the girl wit...,4/27/2007,36.595,-82.188889


In [3]:
ufo_data.rename(columns={'state': 'abv'}, inplace=True)


ufo_data

Unnamed: 0,datetime,city,abv,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8830556,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.4180556,-157.803611
...,...,...,...,...,...,...,...,...,...,...,...
80327,9/9/2013 21:15,nashville,tn,us,light,600,10 minutes,Round from the distance/slowly changing colors...,9/30/2013,36.1658333,-86.784444
80328,9/9/2013 22:00,boise,id,us,circle,1200,20 minutes,Boise&#44 ID&#44 spherical&#44 20 min&#44 10 r...,9/30/2013,43.6136111,-116.202500
80329,9/9/2013 22:00,napa,ca,us,other,1200,hour,Napa UFO&#44,9/30/2013,38.2972222,-122.284444
80330,9/9/2013 22:20,vienna,va,us,circle,5,5 seconds,Saw a five gold lit cicular craft moving fastl...,9/30/2013,38.9011111,-77.265556


In [4]:
state_codes = pd.read_excel('data/state_codes.xlsx')

state_codes.rename(columns={'Code': 'abv'}, inplace=True)


In [5]:
ufo_data = pd.merge(ufo_data, state_codes, on='abv')

ufo_data = ufo_data[['datetime', 'country', 'State', 'city', 'abv', 'shape',
       'duration (seconds)', 'duration (hours/min)', 'comments', 'date posted',
       'latitude', 'longitude ']]

ufo_data.to_csv('data/ufo_data_with_state_names.csv', index=None)

In [6]:
# Sighting frequencies for each state

sightings_by_state = ufo_data[['datetime', 'State']].groupby('State').count().reset_index()
sightings_by_state.columns = ['State', 'Sightings']
sightings_by_state

Unnamed: 0,State,Sightings
0,Alabama,691
1,Alaska,354
2,Arizona,2689
3,Arkansas,666
4,California,9655
5,Colorado,1505
6,Connecticut,968
7,D.C.,99
8,Delaware,183
9,Florida,4200


In [7]:
ufo_data['duration (seconds)'] = pd.to_numeric(ufo_data['duration (seconds)'], errors='coerce')

print(ufo_data.info())

ufo_data[~ufo_data['duration (seconds)'].isnull()]

<class 'pandas.core.frame.DataFrame'>
Int64Index: 70905 entries, 0 to 70904
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   datetime              70905 non-null  object 
 1   country               65100 non-null  object 
 2   State                 70905 non-null  object 
 3   city                  70905 non-null  object 
 4   abv                   70905 non-null  object 
 5   shape                 69175 non-null  object 
 6   duration (seconds)    70903 non-null  float64
 7   duration (hours/min)  70905 non-null  object 
 8   comments              70892 non-null  object 
 9   date posted           70905 non-null  object 
 10  latitude              70905 non-null  object 
 11  longitude             70905 non-null  float64
dtypes: float64(2), object(10)
memory usage: 7.0+ MB
None


Unnamed: 0,datetime,country,State,city,abv,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,us,Texas,san marcos,tx,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8830556,-97.941111
1,10/10/1949 21:00,,Texas,lackland afb,tx,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1956 21:00,us,Texas,edna,tx,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783333,-96.645833
3,10/10/1977 12:00,us,Texas,san antonio,tx,other,30.0,30 seconds,i was about six or seven and my family and me ...,2/24/2005,29.4238889,-98.493333
4,10/10/1980 19:00,us,Texas,houston,tx,sphere,180.0,3 min,Sphere&#44 No lights&#44 moving through neigh...,4/16/2005,29.7630556,-95.363056
...,...,...,...,...,...,...,...,...,...,...,...,...
70900,9/11/2006 01:00,,D.C.,washington&#44 d.c.,dc,diamond,720.0,12 minutes,((HOAX??)) It was a very shiny diamond-shaped ...,12/14/2006,38.907231,-77.036464
70901,9/23/2008 21:00,,D.C.,washington&#44 d.c.,dc,light,3360.0,56 minutes,3 lights moving very smoothley at the capital,10/31/2008,38.907231,-77.036464
70902,9/29/2000 16:35,,D.C.,washington&#44 d.c.,dc,circle,900.0,approx. 15 minutes,Saw dancing metallic balls high in the sky ove...,9/6/2002,38.907231,-77.036464
70903,9/29/2004 02:30,,D.C.,washington&#44 d.c.,dc,circle,600.0,10 minutes,circular disc coming to a slight point-like th...,7/13/2005,38.907231,-77.036464


In [8]:
# Average duration by State

duration_by_state = ufo_data[['duration (seconds)', 'State']].groupby('State').mean().reset_index()
duration_by_state.columns = ['State', 'Duration']
duration_by_state

Unnamed: 0,State,Duration
0,Alabama,1393.408828
1,Alaska,4231.830508
2,Arizona,5949.009338
3,Arkansas,100867.138889
4,California,3928.781072
5,Colorado,3024.394751
6,Connecticut,13089.214928
7,D.C.,1161.224545
8,Delaware,868.904372
9,Florida,13504.459262


In [9]:
ufo_data['shape'].value_counts()

light        14647
triangle      7083
circle        6601
fireball      5564
unknown       4984
other         4898
sphere        4738
disk          4539
oval          3293
formation     2169
cigar         1797
changing      1738
flash         1167
rectangle     1161
cylinder      1134
diamond       1019
chevron        871
egg            645
teardrop       644
cone           266
cross          202
delta            6
crescent         2
round            2
hexagon          1
dome             1
flare            1
changed          1
pyramid          1
Name: shape, dtype: int64

In [10]:
ufo_data['shape'] = ufo_data['shape'].apply(lambda x: 'other' if x in ('delta', 'crescent', 'round', 'pyramid', 'changed', 'hexagon', 'flare', 'dome') else x)

ufo_data['shape'].value_counts()


light        14647
triangle      7083
circle        6601
fireball      5564
unknown       4984
other         4913
sphere        4738
disk          4539
oval          3293
formation     2169
cigar         1797
changing      1738
flash         1167
rectangle     1161
cylinder      1134
diamond       1019
chevron        871
egg            645
teardrop       644
cone           266
cross          202
Name: shape, dtype: int64

In [11]:
# Count of Shapes by State

shape_counts = pd.pivot_table(ufo_data, index='State', aggfunc='count', columns='shape')

# shape_counts = shape_counts.rename(columns = {''})

shape_counts_by_state = shape_counts.iloc[:, 0:21].reset_index()
shape_counts_by_state.columns = shape_counts_by_state.columns.droplevel()

cols = ['State', 'changing', 'chevron', 'cigar', 'circle', 'cone', 'cross',
       'cylinder', 'diamond', 'disk', 'egg', 'fireball', 'flash', 'formation',
       'light', 'other', 'oval', 'rectangle', 'sphere', 'teardrop', 'triangle',
       'unknown']

new_cols = []
for c in cols:
    new_cols.append(c.title())

shape_counts_by_state.columns = new_cols

shape_counts_by_state

Unnamed: 0,State,Changing,Chevron,Cigar,Circle,Cone,Cross,Cylinder,Diamond,Disk,Egg,Fireball,Flash,Formation,Light,Other,Oval,Rectangle,Sphere,Teardrop,Triangle,Unknown
0,Alabama,13.0,9.0,16.0,64.0,1.0,3.0,12.0,15.0,44.0,12.0,47.0,15.0,14.0,139.0,44.0,34.0,10.0,50.0,6.0,83.0,46.0
1,Alaska,5.0,,6.0,30.0,1.0,,6.0,5.0,29.0,1.0,18.0,12.0,6.0,98.0,22.0,20.0,4.0,29.0,1.0,27.0,21.0
2,Arizona,77.0,32.0,54.0,239.0,14.0,7.0,42.0,31.0,135.0,25.0,182.0,43.0,125.0,613.0,220.0,116.0,39.0,177.0,25.0,254.0,176.0
3,Arkansas,19.0,11.0,24.0,70.0,1.0,2.0,17.0,9.0,41.0,4.0,28.0,14.0,13.0,149.0,37.0,22.0,9.0,30.0,8.0,92.0,55.0
4,California,290.0,158.0,216.0,899.0,31.0,32.0,134.0,155.0,693.0,95.0,744.0,154.0,318.0,1983.0,677.0,420.0,159.0,674.0,79.0,891.0,603.0
5,Colorado,38.0,32.0,35.0,123.0,3.0,3.0,31.0,15.0,99.0,12.0,99.0,37.0,45.0,308.0,115.0,72.0,26.0,110.0,12.0,172.0,89.0
6,Connecticut,14.0,13.0,34.0,101.0,5.0,5.0,20.0,16.0,61.0,8.0,97.0,14.0,18.0,172.0,66.0,49.0,23.0,50.0,14.0,86.0,73.0
7,D.C.,4.0,1.0,3.0,5.0,1.0,,7.0,2.0,9.0,1.0,8.0,1.0,3.0,16.0,10.0,3.0,1.0,14.0,,6.0,1.0
8,Delaware,4.0,2.0,,24.0,,,1.0,3.0,13.0,3.0,10.0,3.0,6.0,33.0,11.0,3.0,2.0,18.0,2.0,30.0,14.0
9,Florida,113.0,51.0,89.0,387.0,13.0,13.0,72.0,56.0,242.0,37.0,452.0,66.0,126.0,821.0,280.0,214.0,69.0,292.0,38.0,385.0,293.0


In [12]:
df = pd.merge(sightings_by_state, duration_by_state, on='State')
                                            
ufo_state_stats = pd.merge(df, shape_counts_by_state, on='State')

ufo_state_stats.to_csv('data/ufo_state_stats.csv', index=None)

ufo_state_stats

Unnamed: 0,State,Sightings,Duration,Changing,Chevron,Cigar,Circle,Cone,Cross,Cylinder,Diamond,Disk,Egg,Fireball,Flash,Formation,Light,Other,Oval,Rectangle,Sphere,Teardrop,Triangle,Unknown
0,Alabama,691,1393.408828,13.0,9.0,16.0,64.0,1.0,3.0,12.0,15.0,44.0,12.0,47.0,15.0,14.0,139.0,44.0,34.0,10.0,50.0,6.0,83.0,46.0
1,Alaska,354,4231.830508,5.0,,6.0,30.0,1.0,,6.0,5.0,29.0,1.0,18.0,12.0,6.0,98.0,22.0,20.0,4.0,29.0,1.0,27.0,21.0
2,Arizona,2689,5949.009338,77.0,32.0,54.0,239.0,14.0,7.0,42.0,31.0,135.0,25.0,182.0,43.0,125.0,613.0,220.0,116.0,39.0,177.0,25.0,254.0,176.0
3,Arkansas,666,100867.138889,19.0,11.0,24.0,70.0,1.0,2.0,17.0,9.0,41.0,4.0,28.0,14.0,13.0,149.0,37.0,22.0,9.0,30.0,8.0,92.0,55.0
4,California,9655,3928.781072,290.0,158.0,216.0,899.0,31.0,32.0,134.0,155.0,693.0,95.0,744.0,154.0,318.0,1983.0,677.0,420.0,159.0,674.0,79.0,891.0,603.0
5,Colorado,1505,3024.394751,38.0,32.0,35.0,123.0,3.0,3.0,31.0,15.0,99.0,12.0,99.0,37.0,45.0,308.0,115.0,72.0,26.0,110.0,12.0,172.0,89.0
6,Connecticut,968,13089.214928,14.0,13.0,34.0,101.0,5.0,5.0,20.0,16.0,61.0,8.0,97.0,14.0,18.0,172.0,66.0,49.0,23.0,50.0,14.0,86.0,73.0
7,D.C.,99,1161.224545,4.0,1.0,3.0,5.0,1.0,,7.0,2.0,9.0,1.0,8.0,1.0,3.0,16.0,10.0,3.0,1.0,14.0,,6.0,1.0
8,Delaware,183,868.904372,4.0,2.0,,24.0,,,1.0,3.0,13.0,3.0,10.0,3.0,6.0,33.0,11.0,3.0,2.0,18.0,2.0,30.0,14.0
9,Florida,4200,13504.459262,113.0,51.0,89.0,387.0,13.0,13.0,72.0,56.0,242.0,37.0,452.0,66.0,126.0,821.0,280.0,214.0,69.0,292.0,38.0,385.0,293.0


In [13]:
data_dict = sightings_by_state.set_index('State').to_dict()['Sightings']
rate_max = max(data_dict.values())
rate_min = min(data_dict.values())
nh_range = rate_max - rate_min

In [14]:
#Scale the states values to lie between 0 and 1
min_nh = min(data_dict.values())
max_nh = max(data_dict.values())
nh_range = max_nh - min_nh

def calculate_color(state): #Convert the state sightings to a color
    normalized_nh = (state - min_nh) / nh_range # make the state sightings a number between 0 and 1
    inverse_nh = 1.0 - normalized_nh # invert state sightings so that a higher values renders a darker color
    mpl_color = plasma(inverse_nh) # transform the neighborhood value to a matplotlib color
    gmaps_color = to_hex(mpl_color, keep_alpha=False) # transform from a matplotlib color to a valid CSS color
    return gmaps_color

colors = []
for feature in ngeo['features']:
    geo_nh_name = feature['properties']['NAME']
    try:
        nh = data_dict[geo_nh_name]
        color = calculate_color(nh)
    except KeyError:
        # no value for that state: return default color
        color = (0, 0, 0, 0.3)
    colors.append(color)

In [15]:
fig = gmaps.figure()
nh_layer = gmaps.geojson_layer(
    ngeo,
    fill_color=colors,
    stroke_color=colors,
    fill_opacity=0.8)
fig.add_layer(nh_layer)
fig

Figure(layout=FigureLayout(height='420px'))