In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import plotly as pl
import plotly.express as px

In [3]:
filepath = 'data/insurance_weekly_unemployment.csv'
df = pd.read_csv(filepath, skiprows=[0,1,2], header=1)
df = df.drop(df.columns[7:10], axis=1)
df = df.drop([3816, 3817], axis=0) # drop last 2 rows
df.tail()

Unnamed: 0,State,Filed week ended,Initial Claims,Reflecting Week Ended,Continued Claims,Covered Employment,Insured Unemployment Rate
3811,Wyoming,18/4/2020,4381,11/4/2020,14302,270013,5.3
3812,Wyoming,25/4/2020,3497,18/4/2020,16566,270013,6.14
3813,Wyoming,2/5/2020,2854,25/4/2020,17521,270013,6.49
3814,Wyoming,9/5/2020,3297,2/5/2020,17928,270013,6.64
3815,Wyoming,16/5/2020,2303,9/5/2020,17335,270013,6.42


In [4]:
df['State'].unique()

array(['Alabama', 'Alaska', 'Arkansas', 'Arizona', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'District of Columbia',
       'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana',
       'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland',
       'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
       'Missouri', 'Montana', 'North Carolina', 'North Dakota',
       'Nebraska', 'New Hampshire', 'New Jersey', 'New Mexico', 'Nevada',
       'New York', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
       'Puerto Rico', 'Rhode Island', 'South Carolina', 'South Dakota',
       'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virgin Islands',
       'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming'],
      dtype=object)

# Data Preprocessing

In [5]:
state_codes = {
    'Alabama':'al',
    'Alaska':'ak',
    'Arkansas':'ar',
    'Arizona':'aa',
    'California':'ca',
    'Colorado':'co',
    'Connecticut':'ct',
    'Delaware':'de',
    'District of Columbia':'dc',
    'Florida':'fl',
    'Georgia':'ga',
    'Hawaii':'hi',
    'Idaho':'id',
    'Illinois':'il',
    'Indiana':'in',
    'Iowa':'ia',
    'Kansas':'ks',
    'Kentucky':'ky',
    'Louisiana':'la',
    'Maine':'me',
    'Maryland':'md',
   'Massachusetts':'ma',
    'Michigan':'mi',
    'Minnesota':'mn',
    'Mississippi':'ms',
    'Missouri':'mo',
    'Montana':'mt',
    'North Carolina':'nc',
    'North Dakota':'nd',
    'Nebraska':'ne',
    'New Hampshire':'nh',
    'New Jersey':'nj',
    'New Mexico':'nm',
    'Nevada':'nv',
    'New York':'ny',
    'Ohio':'oh',
    'Oklahoma':'ok',
    'Oregon':'or',
    'Pennsylvania':'pa',
    'Puerto Rico':'',
    'Rhode Island':'ri',
    'South Carolina':'sc',
    'South Dakota':'sd',
    'Tennessee':'tn',
    'Texas':'tx',
    'Utah':'ut',
    'Vermont':'vt',
    'Virgin Islands':'',
    'Virginia':'va', 
    'Washington':'wa', 
    'West Virginia':'wv',
    'Wisconsin':'wi',
    'Wyoming':'wy'
    
}

df['State_code'] = df['State'].map(state_codes)
df.tail()

Unnamed: 0,State,Filed week ended,Initial Claims,Reflecting Week Ended,Continued Claims,Covered Employment,Insured Unemployment Rate,State_code
3811,Wyoming,18/4/2020,4381,11/4/2020,14302,270013,5.3,wy
3812,Wyoming,25/4/2020,3497,18/4/2020,16566,270013,6.14,wy
3813,Wyoming,2/5/2020,2854,25/4/2020,17521,270013,6.49,wy
3814,Wyoming,9/5/2020,3297,2/5/2020,17928,270013,6.64,wy
3815,Wyoming,16/5/2020,2303,9/5/2020,17335,270013,6.42,wy


In [7]:
df.to_csv('data/modified_insurance_claims.csv', index=False)

# Tackling cleaned data

In [52]:
filepath = 'data/modified_insurance_claims.csv'
df = pd.read_csv(filepath)
df.columns

Index(['State', 'Filed week ended', 'Initial Claims', 'Reflecting Week Ended',
       'Continued Claims', 'Covered Employment', 'Insured Unemployment Rate',
       'State_code'],
      dtype='object')

In [53]:
df['State_code'] = df['State_code'].str.upper()
df['State_code'].unique()

array(['AL', 'AK', 'AR', 'AA', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA',
       'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA',
       'MI', 'MN', 'MS', 'MO', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
       'NV', 'NY', 'OH', 'OK', 'OR', 'PA', nan, 'RI', 'SC', 'SD', 'TN',
       'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'], dtype=object)

In [54]:
df['Initial Claims'] = df['Initial Claims'].astype(int)

In [55]:
filtered_df = df[df['Filed week ended'] == '25/4/2020']
filtered_df.head()

Unnamed: 0,State,Filed week ended,Initial Claims,Reflecting Week Ended,Continued Claims,Covered Employment,Insured Unemployment Rate,State_code
68,Alabama,25/4/2020,74966,18/4/2020,173855.0,1929897.0,9.01,AL
140,Alaska,25/4/2020,10313,18/4/2020,52022.0,308137.0,16.88,AK
212,Arkansas,25/4/2020,17671,18/4/2020,109337.0,1196468.0,9.14,AR
284,Arizona,25/4/2020,52581,18/4/2020,196401.0,2885408.0,6.81,AA
356,California,25/4/2020,325343,18/4/2020,1887575.0,17330010.0,10.89,CA


In [59]:
def plot_map(df):
    
    plot_data = df[['Filed week ended', 'State', 'State_code', 'Initial Claims']]

    print(plot_data)
    
    fig = px.choropleth(plot_data,
                       locations='State_code',
                       color='Initial Claims',
                       hover_name='State',
                       locationmode='USA-states')
    
    fig.update_layout(title_text= 'Unemployment Claims',
                     geo_scope='usa')
    fig.show()
    return

In [60]:
plot_map(filtered_df)

     Filed week ended                 State State_code  Initial Claims
68          25/4/2020               Alabama         AL           74966
140         25/4/2020                Alaska         AK           10313
212         25/4/2020              Arkansas         AR           17671
284         25/4/2020               Arizona         AA           52581
356         25/4/2020            California         CA          325343
428         25/4/2020              Colorado         CO           38662
500         25/4/2020           Connecticut         CT           33041
572         25/4/2020              Delaware         DE            7947
644         25/4/2020  District of Columbia         DC            8708
716         25/4/2020               Florida         FL          433103
788         25/4/2020               Georgia         GA          266565
860         25/4/2020                Hawaii         HI           22496
932         25/4/2020                 Idaho         ID            8827
1004  

# Implementing Slider

In [63]:
df['Filed week ended'].unique()

array(['5/1/2019', '12/1/2019', '19/1/2019', '26/1/2019', '2/2/2019',
       '9/2/2019', '16/2/2019', '23/2/2019', '2/3/2019', '9/3/2019',
       '16/3/2019', '23/3/2019', '30/3/2019', '6/4/2019', '13/4/2019',
       '20/4/2019', '27/4/2019', '4/5/2019', '11/5/2019', '18/5/2019',
       '25/5/2019', '1/6/2019', '8/6/2019', '15/6/2019', '22/6/2019',
       '29/6/2019', '6/7/2019', '13/7/2019', '20/7/2019', '27/7/2019',
       '3/8/2019', '10/8/2019', '17/8/2019', '24/8/2019', '31/8/2019',
       '7/9/2019', '14/9/2019', '21/9/2019', '28/9/2019', '5/10/2019',
       '12/10/2019', '19/10/2019', '26/10/2019', '2/11/2019', '9/11/2019',
       '16/11/2019', '23/11/2019', '30/11/2019', '7/12/2019',
       '14/12/2019', '21/12/2019', '28/12/2019', '4/1/2020', '11/1/2020',
       '18/1/2020', '25/1/2020', '1/2/2020', '8/2/2020', '15/2/2020',
       '22/2/2020', '29/2/2020', '7/3/2020', '14/3/2020', '21/3/2020',
       '28/3/2020', '4/4/2020', '11/4/2020', '18/4/2020', '25/4/2020',
       '2/5/2

In [69]:
data_slider = []

for date in df['Filed week ended'].unique():
    df_filtered = df[df['Filed week ended'] == date]
    
    data = dict(
        plot_type='choropleth',
        locations=df_filtered['State_code'],
        z=df_filtered['Initial Claims'],
        locationmode='USA-states',
    )
    
    data_slider.append(data)

In [70]:
steps = []

for i in range(len(data_slider)):
    step = dict(method='restyle',
                args=['visible', [False] * len(data_slider)],
                label=i) # label to be displayed for each step (year)
    step['args'][1][i] = True
    steps.append(step)

In [71]:
sliders = [dict(active=0, pad={"t": 1}, steps=steps)]  

In [72]:
def plot_map(df):
    
    plot_data = df[['Filed week ended', 'State', 'State_code', 'Initial Claims']]

    print(plot_data)
    
    fig = px.choropleth(plot_data,
                       locations='State_code',
                       color='Initial Claims',
                       hover_name='State',
                       locationmode='USA-states')
    
    fig.update_layout(title_text= 'Unemployment Claims',
                     geo_scope='usa')
    return fig

In [73]:
fig = plot_map(df)
fig.update_layout(sliders=sliders)

fig.show()

     Filed week ended    State State_code  Initial Claims
0            5/1/2019  Alabama         AL            6660
1           12/1/2019  Alabama         AL            4386
2           19/1/2019  Alabama         AL            3217
3           26/1/2019  Alabama         AL            2414
4            2/2/2019  Alabama         AL            2505
...               ...      ...        ...             ...
3811        18/4/2020  Wyoming         WY            4381
3812        25/4/2020  Wyoming         WY            3497
3813         2/5/2020  Wyoming         WY            2854
3814         9/5/2020  Wyoming         WY            3297
3815        16/5/2020  Wyoming         WY            2303

[3816 rows x 4 columns]
