In [2]:
import gmaps
import gmaps.datasets
import gmaps.geojson_geometries
import json
from matplotlib.cm import viridis, plasma
from matplotlib.colors import to_hex
import matplotlib.pyplot as plt
import pandas as pd
#from config import census_key, g_key
pd.set_option('display.max_columns', None)
#gmaps.configure(api_key=g_key)

## Create State Info Table

In [3]:


# Create file with state info including codes and names

states_json = pd.read_json('data/us_states.json')

objects = states_json.features.to_list()

state_codes = []
state_names = []
state_census_area = []

for object in objects:
    p = object['properties']
    state_codes.append(p['STATE'])
    state_names.append(p['NAME'])
    state_census_area.append(p['CENSUSAREA'])

state_codes = pd.DataFrame(
    {'State Codes': state_codes,
    'State Names': state_names,
    'State Census Area': state_census_area
    })

# Import state abbrievations

state_abbreviations = pd.read_csv('data/state_abbreviations.csv')

state_abbreviations[state_abbreviations =='D.C.'] = 'District of Columbia'

state_abbreviations.rename(columns={'Abbreviation': 'State Abbreviations'}, inplace=True)

state_info = pd.merge(state_codes, state_abbreviations, left_on='State Names', right_on='State', how='outer')

state_info.drop(columns='State', inplace=True)
    
state_info.to_csv('data/county_info.csv', index=None)

state_info

Unnamed: 0,State Codes,State Names,State Census Area,State Abbreviations
0,23,Maine,30842.923,me
1,25,Massachusetts,7800.058,ma
2,26,Michigan,56538.901,mi
3,30,Montana,145545.801,mt
4,32,Nevada,109781.18,nv
5,34,New Jersey,7354.22,nj
6,36,New York,47126.399,ny
7,37,North Carolina,48617.905,nc
8,39,Ohio,40860.694,oh
9,42,Pennsylvania,44742.703,pa


## Create County Info Table

In [4]:
# Create file with county info including names and codes

counties_json = pd.read_json('data/us_counties.json', encoding='latin1')

objects = counties_json.features.to_list()

state_codes = []
county_codes = []
county_names = []
county_census_area = []

for object in objects:
    p = object['properties']
    state_codes.append(p['STATE'])
    county_codes.append(p['COUNTY'])
    county_names.append(p['NAME'])
    county_census_area.append(p['CENSUSAREA'])

county_info = pd.DataFrame(
    {'State Codes': state_codes,
    'County Codes': county_codes,
    'County Names': county_names,
    'County Census Area': county_census_area
    })
    
county_info.to_csv('data/county_info.csv', index=None)

county_info

Unnamed: 0,State Codes,County Codes,County Names,County Census Area
0,01,029,Cleburne,560.100
1,01,031,Coffee,678.972
2,01,037,Coosa,650.926
3,01,039,Covington,1030.456
4,01,041,Crenshaw,608.840
...,...,...,...,...
3216,72,131,San Sebastián,70.423
3217,72,133,Santa Isabel,34.023
3218,72,137,Toa Baja,23.241
3219,72,139,Trujillo Alto,20.764


## UFO Data

In [5]:
#UFO CSV Data (includes Canadian "States")

ufo_data = pd.read_csv('data/ufo_data.csv', low_memory=False)


In [6]:
ufo_data.head()

Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8830556,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.4180556,-157.803611


In [7]:
# Sighting frequencies for each state

sightings_by_state = ufo_data[['datetime', 'state']].groupby('state').count().reset_index()
sightings_by_state.columns = ['State', 'State Sightings']
sightings_by_state

Unnamed: 0,State,State Sightings
0,ab,333
1,ak,354
2,al,691
3,ar,666
4,az,2689
...,...,...
62,wi,1333
63,wv,486
64,wy,205
65,yk,7


In [8]:
ufo_data['duration (seconds)'] = pd.to_numeric(ufo_data['duration (seconds)'], errors='coerce')

print(ufo_data.info())

ufo_data[~ufo_data['duration (seconds)'].isnull()]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80332 entries, 0 to 80331
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   datetime              80332 non-null  object 
 1   city                  80332 non-null  object 
 2   state                 74535 non-null  object 
 3   country               70662 non-null  object 
 4   shape                 78400 non-null  object 
 5   duration (seconds)    80329 non-null  float64
 6   duration (hours/min)  80332 non-null  object 
 7   comments              80317 non-null  object 
 8   date posted           80332 non-null  object 
 9   latitude              80332 non-null  object 
 10  longitude             80332 non-null  float64
dtypes: float64(2), object(9)
memory usage: 6.7+ MB
None


Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8830556,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.4180556,-157.803611
...,...,...,...,...,...,...,...,...,...,...,...
80327,9/9/2013 21:15,nashville,tn,us,light,600.0,10 minutes,Round from the distance/slowly changing colors...,9/30/2013,36.1658333,-86.784444
80328,9/9/2013 22:00,boise,id,us,circle,1200.0,20 minutes,Boise&#44 ID&#44 spherical&#44 20 min&#44 10 r...,9/30/2013,43.6136111,-116.202500
80329,9/9/2013 22:00,napa,ca,us,other,1200.0,hour,Napa UFO&#44,9/30/2013,38.2972222,-122.284444
80330,9/9/2013 22:20,vienna,va,us,circle,5.0,5 seconds,Saw a five gold lit cicular craft moving fastl...,9/30/2013,38.9011111,-77.265556


In [9]:
# Average duration by State

duration_by_state = ufo_data[['duration (seconds)', 'state']].groupby('state').mean().reset_index()
duration_by_state.columns = ['State', 'Duration']
duration_by_state

Unnamed: 0,State,Duration
0,ab,1773.351351
1,ak,4231.830508
2,al,1393.408828
3,ar,100867.138889
4,az,5949.009338
...,...,...
62,wi,2047.964966
63,wv,6239.355967
64,wy,1841.629273
65,yk,1459.714286


### State Sightings and Duration

In [10]:
state_sightings = pd.merge(sightings_by_state, duration_by_state, on='State')

state_sightings.to_csv('data/state_sightings.csv', index=None)

state_sightings

Unnamed: 0,State,State Sightings,Duration
0,ab,333,1773.351351
1,ak,354,4231.830508
2,al,691,1393.408828
3,ar,666,100867.138889
4,az,2689,5949.009338
...,...,...,...
62,wi,1333,2047.964966
63,wv,486,6239.355967
64,wy,205,1841.629273
65,yk,7,1459.714286


In [11]:
ufo_data['shape'].value_counts()

light        16565
triangle      7865
circle        7608
fireball      6208
other         5649
unknown       5584
sphere        5387
disk          5213
oval          3733
formation     2457
cigar         2057
changing      1962
flash         1328
rectangle     1297
cylinder      1283
diamond       1178
chevron        952
egg            759
teardrop       750
cone           316
cross          233
delta            7
crescent         2
round            2
dome             1
pyramid          1
changed          1
hexagon          1
flare            1
Name: shape, dtype: int64

In [12]:
ufo_data['shape'] = ufo_data['shape'].apply(lambda x: 'other' if x in ('delta', 'crescent', 'round', 'pyramid', 'changed', 'hexagon', 'flare', 'dome') else x)
ufo_data['shape'].value_counts()

light        16565
triangle      7865
circle        7608
fireball      6208
other         5665
unknown       5584
sphere        5387
disk          5213
oval          3733
formation     2457
cigar         2057
changing      1962
flash         1328
rectangle     1297
cylinder      1283
diamond       1178
chevron        952
egg            759
teardrop       750
cone           316
cross          233
Name: shape, dtype: int64

### Shapes Counts by State

In [13]:
# Count of Shapes by State

shape_counts = pd.pivot_table(ufo_data, index='state', aggfunc='count', columns='shape')

# shape_counts = shape_counts.rename(columns = {''})

shape_counts_by_state = shape_counts.iloc[:, 0:21].reset_index()
shape_counts_by_state.columns = shape_counts_by_state.columns.droplevel()

cols = ['State', 'changing', 'chevron', 'cigar', 'circle', 'cone', 'cross',
       'cylinder', 'diamond', 'disk', 'egg', 'fireball', 'flash', 'formation',
       'light', 'other', 'oval', 'rectangle', 'sphere', 'teardrop', 'triangle',
       'unknown']

new_cols = []
for c in cols:
    new_cols.append(c.title())

shape_counts_by_state.columns = new_cols

shape_counts_by_state.to_csv('data/state_shape_counts.csv', index=None)

In [14]:
shape_counts_by_state.head()

Unnamed: 0,State,Changing,Chevron,Cigar,Circle,Cone,Cross,Cylinder,Diamond,Disk,Egg,Fireball,Flash,Formation,Light,Other,Oval,Rectangle,Sphere,Teardrop,Triangle,Unknown
0,ab,4.0,2.0,10.0,29.0,4.0,1.0,6.0,9.0,21.0,3.0,12.0,7.0,17.0,66.0,28.0,13.0,5.0,22.0,3.0,34.0,32.0
1,ak,5.0,,6.0,30.0,1.0,,6.0,5.0,29.0,1.0,18.0,12.0,6.0,98.0,22.0,20.0,4.0,29.0,1.0,27.0,21.0
2,al,13.0,9.0,16.0,64.0,1.0,3.0,12.0,15.0,44.0,12.0,47.0,15.0,14.0,139.0,44.0,34.0,10.0,50.0,6.0,83.0,46.0
3,ar,19.0,11.0,24.0,70.0,1.0,2.0,17.0,9.0,41.0,4.0,28.0,14.0,13.0,149.0,37.0,22.0,9.0,30.0,8.0,92.0,55.0
4,az,77.0,32.0,54.0,239.0,14.0,7.0,42.0,31.0,135.0,25.0,182.0,43.0,125.0,613.0,220.0,116.0,39.0,177.0,25.0,254.0,176.0


In [15]:
data_complete = pd.merge(state_sightings, shape_counts_by_state, how="left", on=["State", "State"])

In [16]:
data_complete.head()

Unnamed: 0,State,State Sightings,Duration,Changing,Chevron,Cigar,Circle,Cone,Cross,Cylinder,Diamond,Disk,Egg,Fireball,Flash,Formation,Light,Other,Oval,Rectangle,Sphere,Teardrop,Triangle,Unknown
0,ab,333,1773.351351,4.0,2.0,10.0,29.0,4.0,1.0,6.0,9.0,21.0,3.0,12.0,7.0,17.0,66.0,28.0,13.0,5.0,22.0,3.0,34.0,32.0
1,ak,354,4231.830508,5.0,,6.0,30.0,1.0,,6.0,5.0,29.0,1.0,18.0,12.0,6.0,98.0,22.0,20.0,4.0,29.0,1.0,27.0,21.0
2,al,691,1393.408828,13.0,9.0,16.0,64.0,1.0,3.0,12.0,15.0,44.0,12.0,47.0,15.0,14.0,139.0,44.0,34.0,10.0,50.0,6.0,83.0,46.0
3,ar,666,100867.138889,19.0,11.0,24.0,70.0,1.0,2.0,17.0,9.0,41.0,4.0,28.0,14.0,13.0,149.0,37.0,22.0,9.0,30.0,8.0,92.0,55.0
4,az,2689,5949.009338,77.0,32.0,54.0,239.0,14.0,7.0,42.0,31.0,135.0,25.0,182.0,43.0,125.0,613.0,220.0,116.0,39.0,177.0,25.0,254.0,176.0


In [18]:
#data_complete.dtypes

State               object
State Sightings      int64
Duration           float64
Changing           float64
Chevron            float64
Cigar              float64
Circle             float64
Cone               float64
Cross              float64
Cylinder           float64
Diamond            float64
Disk               float64
Egg                float64
Fireball           float64
Flash              float64
Formation          float64
Light              float64
Other              float64
Oval               float64
Rectangle          float64
Sphere             float64
Teardrop           float64
Triangle           float64
Unknown            float64
dtype: object

In [19]:
c = pd.merge(pd.DataFrame(state_info), pd.DataFrame(data_complete), left_on=['State Abbreviations'], 
             right_on= ['State'], how='left')

In [20]:
new = c.drop(columns=['State Abbreviations'])

In [21]:
new

Unnamed: 0,State Codes,State Names,State Census Area,State,State Sightings,Duration,Changing,Chevron,Cigar,Circle,Cone,Cross,Cylinder,Diamond,Disk,Egg,Fireball,Flash,Formation,Light,Other,Oval,Rectangle,Sphere,Teardrop,Triangle,Unknown
0,23,Maine,30842.923,me,633.0,3066.431122,9.0,3.0,18.0,75.0,,5.0,10.0,10.0,36.0,2.0,50.0,11.0,12.0,139.0,38.0,35.0,8.0,51.0,6.0,65.0,33.0
1,25,Massachusetts,7800.058,ma,1358.0,1312.187776,30.0,16.0,31.0,127.0,6.0,2.0,24.0,25.0,89.0,14.0,97.0,20.0,33.0,255.0,103.0,86.0,24.0,100.0,13.0,139.0,104.0
2,26,Michigan,56538.901,mi,2071.0,3885.088653,45.0,11.0,47.0,170.0,10.0,4.0,28.0,32.0,149.0,17.0,188.0,28.0,64.0,427.0,130.0,96.0,39.0,132.0,19.0,239.0,137.0
3,30,Montana,145545.801,mt,510.0,2148.723529,12.0,8.0,11.0,33.0,1.0,2.0,3.0,10.0,32.0,4.0,57.0,11.0,15.0,115.0,27.0,28.0,5.0,28.0,3.0,38.0,49.0
4,32,Nevada,109781.18,nv,905.0,2926.016519,23.0,15.0,25.0,73.0,3.0,1.0,18.0,10.0,56.0,5.0,51.0,15.0,38.0,183.0,78.0,55.0,13.0,65.0,9.0,65.0,75.0
5,34,New Jersey,7354.22,nj,1512.0,5299.992341,27.0,16.0,37.0,151.0,8.0,4.0,23.0,21.0,140.0,11.0,126.0,27.0,39.0,274.0,120.0,83.0,28.0,99.0,13.0,136.0,104.0
6,36,New York,47126.399,ny,3219.0,2914.711572,73.0,47.0,91.0,331.0,10.0,11.0,44.0,51.0,239.0,42.0,240.0,59.0,93.0,622.0,215.0,177.0,53.0,191.0,36.0,314.0,211.0
7,37,North Carolina,48617.905,nc,1869.0,1281.831648,42.0,28.0,50.0,192.0,8.0,6.0,30.0,25.0,117.0,15.0,135.0,29.0,63.0,392.0,117.0,80.0,32.0,136.0,16.0,192.0,143.0
8,39,Ohio,40860.694,oh,2425.0,1576.187773,57.0,18.0,61.0,230.0,13.0,9.0,42.0,40.0,157.0,23.0,238.0,30.0,75.0,472.0,150.0,114.0,35.0,190.0,21.0,267.0,157.0
9,42,Pennsylvania,44742.703,pa,2582.0,3990.113091,43.0,17.0,79.0,247.0,16.0,7.0,34.0,36.0,175.0,26.0,234.0,31.0,90.0,514.0,178.0,134.0,42.0,154.0,25.0,250.0,190.0


In [22]:
final = new.rename(columns = {'State Codes':'state_codes','State Names':'state_names', 'State Census Area':'state_census_area',
                      'State':'state_abb', 'State Sightings':'state_sightings',
                      'Duration':'duration', 'Changing':'changing',
                      'Chevron':'chevron', 'Cigar':'cigar',
                      'Circle':'circle','Cone':'cone',
                      'Cross':'cross','Cylinder':'cylinder',
                      'Diamond':'diamond','Disk':'disk', 
                      'Egg':'egg','Fireball':'fireball', 
                      'Flash':'flash','Formation':'formation', 
                      'Light':'light','Other':'other', 
                      'Oval':'oval','Rectangle':'rectangle',            
                      'Sphere':'sphere','Teardrop':'teardrop',            
                      'Triangle':'triangle','Unknown':'unknown',})

In [23]:
final

Unnamed: 0,state_codes,state_names,state_census_area,state_abb,state_sightings,duration,changing,chevron,cigar,circle,cone,cross,cylinder,diamond,disk,egg,fireball,flash,formation,light,other,oval,rectangle,sphere,teardrop,triangle,unknown
0,23,Maine,30842.923,me,633.0,3066.431122,9.0,3.0,18.0,75.0,,5.0,10.0,10.0,36.0,2.0,50.0,11.0,12.0,139.0,38.0,35.0,8.0,51.0,6.0,65.0,33.0
1,25,Massachusetts,7800.058,ma,1358.0,1312.187776,30.0,16.0,31.0,127.0,6.0,2.0,24.0,25.0,89.0,14.0,97.0,20.0,33.0,255.0,103.0,86.0,24.0,100.0,13.0,139.0,104.0
2,26,Michigan,56538.901,mi,2071.0,3885.088653,45.0,11.0,47.0,170.0,10.0,4.0,28.0,32.0,149.0,17.0,188.0,28.0,64.0,427.0,130.0,96.0,39.0,132.0,19.0,239.0,137.0
3,30,Montana,145545.801,mt,510.0,2148.723529,12.0,8.0,11.0,33.0,1.0,2.0,3.0,10.0,32.0,4.0,57.0,11.0,15.0,115.0,27.0,28.0,5.0,28.0,3.0,38.0,49.0
4,32,Nevada,109781.18,nv,905.0,2926.016519,23.0,15.0,25.0,73.0,3.0,1.0,18.0,10.0,56.0,5.0,51.0,15.0,38.0,183.0,78.0,55.0,13.0,65.0,9.0,65.0,75.0
5,34,New Jersey,7354.22,nj,1512.0,5299.992341,27.0,16.0,37.0,151.0,8.0,4.0,23.0,21.0,140.0,11.0,126.0,27.0,39.0,274.0,120.0,83.0,28.0,99.0,13.0,136.0,104.0
6,36,New York,47126.399,ny,3219.0,2914.711572,73.0,47.0,91.0,331.0,10.0,11.0,44.0,51.0,239.0,42.0,240.0,59.0,93.0,622.0,215.0,177.0,53.0,191.0,36.0,314.0,211.0
7,37,North Carolina,48617.905,nc,1869.0,1281.831648,42.0,28.0,50.0,192.0,8.0,6.0,30.0,25.0,117.0,15.0,135.0,29.0,63.0,392.0,117.0,80.0,32.0,136.0,16.0,192.0,143.0
8,39,Ohio,40860.694,oh,2425.0,1576.187773,57.0,18.0,61.0,230.0,13.0,9.0,42.0,40.0,157.0,23.0,238.0,30.0,75.0,472.0,150.0,114.0,35.0,190.0,21.0,267.0,157.0
9,42,Pennsylvania,44742.703,pa,2582.0,3990.113091,43.0,17.0,79.0,247.0,16.0,7.0,34.0,36.0,175.0,26.0,234.0,31.0,90.0,514.0,178.0,134.0,42.0,154.0,25.0,250.0,190.0


In [24]:
final.to_csv('data/eric_aggregate.csv', index=None)

In [25]:
#https://www.cdc.gov/nchs/pressroom/sosmap/drug_poisoning_mortality/drug_poisoning.htm

In [49]:
drug_df = pd.read_csv('data/Drug Overdose Mortality by State.csv')

In [50]:
drug_df

Unnamed: 0,YEAR,STATE,RATE,DEATHS,URL
0,2018.0,AL,16.6,775,/nchs/pressroom/states/alabama/al.htm
1,2018.0,AK,14.6,110,/nchs/pressroom/states/alaska/ak.htm
2,2018.0,AZ,23.8,1670,/nchs/pressroom/states/arizona/az.htm
3,2018.0,AR,15.7,444,/nchs/pressroom/states/arkansas/ar.htm
4,2018.0,CA,12.8,5348,/nchs/pressroom/states/california/ca.htm
...,...,...,...,...,...
346,1999.0,WA,9.3,555,/nchs/pressroom/states/washington/washington.htm
347,1999.0,WV,4.1,75,/nchs/pressroom/states/westvirginia/westvirgin...
348,1999.0,WI,4.0,212,/nchs/pressroom/states/wisconsin/wisconsin.htm
349,1999.0,WY,4.1,20,/nchs/pressroom/states/wyoming/wyoming.htm


In [51]:
drug_df.dtypes

YEAR      float64
STATE      object
RATE      float64
DEATHS     object
URL        object
dtype: object

In [61]:
drug_df['STATE'] = drug_df['STATE'].str.lower() 

In [69]:
query_df = drug_df.query('YEAR==2018')

In [73]:
merge = pd.merge(pd.DataFrame(query_df), pd.DataFrame(final), left_on=['STATE'], 
             right_on= ['state_abb'], how='right')

In [74]:
merge

Unnamed: 0,YEAR,STATE,RATE,DEATHS,URL,state_codes,state_names,state_census_area,state_abb,state_sightings,duration,changing,chevron,cigar,circle,cone,cross,cylinder,diamond,disk,egg,fireball,flash,formation,light,other,oval,rectangle,sphere,teardrop,triangle,unknown
0,2018.0,al,16.6,775.0,/nchs/pressroom/states/alabama/al.htm,1,Alabama,50645.326,al,691.0,1393.408828,13.0,9.0,16.0,64.0,1.0,3.0,12.0,15.0,44.0,12.0,47.0,15.0,14.0,139.0,44.0,34.0,10.0,50.0,6.0,83.0,46.0
1,2018.0,ak,14.6,110.0,/nchs/pressroom/states/alaska/ak.htm,2,Alaska,570640.95,ak,354.0,4231.830508,5.0,,6.0,30.0,1.0,,6.0,5.0,29.0,1.0,18.0,12.0,6.0,98.0,22.0,20.0,4.0,29.0,1.0,27.0,21.0
2,2018.0,az,23.8,1670.0,/nchs/pressroom/states/arizona/az.htm,4,Arizona,113594.084,az,2689.0,5949.009338,77.0,32.0,54.0,239.0,14.0,7.0,42.0,31.0,135.0,25.0,182.0,43.0,125.0,613.0,220.0,116.0,39.0,177.0,25.0,254.0,176.0
3,2018.0,ar,15.7,444.0,/nchs/pressroom/states/arkansas/ar.htm,5,Arkansas,52035.477,ar,666.0,100867.138889,19.0,11.0,24.0,70.0,1.0,2.0,17.0,9.0,41.0,4.0,28.0,14.0,13.0,149.0,37.0,22.0,9.0,30.0,8.0,92.0,55.0
4,2018.0,ca,12.8,5348.0,/nchs/pressroom/states/california/ca.htm,6,California,155779.22,ca,9655.0,3928.781072,290.0,158.0,216.0,899.0,31.0,32.0,134.0,155.0,693.0,95.0,744.0,154.0,318.0,1983.0,677.0,420.0,159.0,674.0,79.0,891.0,603.0
5,2018.0,co,16.8,995.0,/nchs/pressroom/states/colorado/co.htm,8,Colorado,103641.888,co,1505.0,3024.394751,38.0,32.0,35.0,123.0,3.0,3.0,31.0,15.0,99.0,12.0,99.0,37.0,45.0,308.0,115.0,72.0,26.0,110.0,12.0,172.0,89.0
6,2018.0,ct,30.7,1069.0,/nchs/pressroom/states/connecticut/ct.htm,9,Connecticut,4842.355,ct,968.0,13089.214928,14.0,13.0,34.0,101.0,5.0,5.0,20.0,16.0,61.0,8.0,97.0,14.0,18.0,172.0,66.0,49.0,23.0,50.0,14.0,86.0,73.0
7,2018.0,de,43.8,401.0,/nchs/pressroom/states/delaware/de.htm,10,Delaware,1948.543,de,183.0,868.904372,4.0,2.0,,24.0,,,1.0,3.0,13.0,3.0,10.0,3.0,6.0,33.0,11.0,3.0,2.0,18.0,2.0,30.0,14.0
8,2018.0,fl,22.8,4698.0,/nchs/pressroom/states/florida/fl.htm,12,Florida,53624.759,fl,4200.0,13504.459262,113.0,51.0,89.0,387.0,13.0,13.0,72.0,56.0,242.0,37.0,452.0,66.0,126.0,821.0,280.0,214.0,69.0,292.0,38.0,385.0,293.0
9,2018.0,ga,13.2,1404.0,/nchs/pressroom/states/georgia/ga.htm,13,Georgia,57513.485,ga,1347.0,7968.701633,33.0,10.0,40.0,148.0,5.0,5.0,17.0,20.0,90.0,16.0,104.0,22.0,26.0,263.0,110.0,60.0,25.0,89.0,14.0,134.0,94.0


In [76]:
merge.drop('URL', axis=1, inplace=True)

In [77]:
merge.drop('STATE', axis=1, inplace=True)

In [81]:
merged_final = merge.rename(columns = {'YEAR':'year','RATE':'death_rate', 'DEATHS':'drug_deaths',})

In [91]:
#merged_final['drug_deaths'] = pd.to_numeric(merged_final['drug_deaths'],errors='coerce')
merged_final

Unnamed: 0,year,death_rate,drug_deaths,state_codes,state_names,state_census_area,state_abb,state_sightings,duration,changing,chevron,cigar,circle,cone,cross,cylinder,diamond,disk,egg,fireball,flash,formation,light,other,oval,rectangle,sphere,teardrop,triangle,unknown
0,2018.0,16.6,775.0,1,Alabama,50645.326,al,691.0,1393.408828,13.0,9.0,16.0,64.0,1.0,3.0,12.0,15.0,44.0,12.0,47.0,15.0,14.0,139.0,44.0,34.0,10.0,50.0,6.0,83.0,46.0
1,2018.0,14.6,110.0,2,Alaska,570640.95,ak,354.0,4231.830508,5.0,,6.0,30.0,1.0,,6.0,5.0,29.0,1.0,18.0,12.0,6.0,98.0,22.0,20.0,4.0,29.0,1.0,27.0,21.0
2,2018.0,23.8,,4,Arizona,113594.084,az,2689.0,5949.009338,77.0,32.0,54.0,239.0,14.0,7.0,42.0,31.0,135.0,25.0,182.0,43.0,125.0,613.0,220.0,116.0,39.0,177.0,25.0,254.0,176.0
3,2018.0,15.7,444.0,5,Arkansas,52035.477,ar,666.0,100867.138889,19.0,11.0,24.0,70.0,1.0,2.0,17.0,9.0,41.0,4.0,28.0,14.0,13.0,149.0,37.0,22.0,9.0,30.0,8.0,92.0,55.0
4,2018.0,12.8,,6,California,155779.22,ca,9655.0,3928.781072,290.0,158.0,216.0,899.0,31.0,32.0,134.0,155.0,693.0,95.0,744.0,154.0,318.0,1983.0,677.0,420.0,159.0,674.0,79.0,891.0,603.0
5,2018.0,16.8,995.0,8,Colorado,103641.888,co,1505.0,3024.394751,38.0,32.0,35.0,123.0,3.0,3.0,31.0,15.0,99.0,12.0,99.0,37.0,45.0,308.0,115.0,72.0,26.0,110.0,12.0,172.0,89.0
6,2018.0,30.7,,9,Connecticut,4842.355,ct,968.0,13089.214928,14.0,13.0,34.0,101.0,5.0,5.0,20.0,16.0,61.0,8.0,97.0,14.0,18.0,172.0,66.0,49.0,23.0,50.0,14.0,86.0,73.0
7,2018.0,43.8,401.0,10,Delaware,1948.543,de,183.0,868.904372,4.0,2.0,,24.0,,,1.0,3.0,13.0,3.0,10.0,3.0,6.0,33.0,11.0,3.0,2.0,18.0,2.0,30.0,14.0
8,2018.0,22.8,,12,Florida,53624.759,fl,4200.0,13504.459262,113.0,51.0,89.0,387.0,13.0,13.0,72.0,56.0,242.0,37.0,452.0,66.0,126.0,821.0,280.0,214.0,69.0,292.0,38.0,385.0,293.0
9,2018.0,13.2,,13,Georgia,57513.485,ga,1347.0,7968.701633,33.0,10.0,40.0,148.0,5.0,5.0,17.0,20.0,90.0,16.0,104.0,22.0,26.0,263.0,110.0,60.0,25.0,89.0,14.0,134.0,94.0


In [93]:
merged_final.to_csv('data/merged_final.csv', index=None)