In [94]:
# Dependencies and Setup
import pandas as pd
import seaborn as sns
from datetime import datetime,date

# File to Load
file_to_load = "Resources/US_Unemployment.csv"

# Read file and store into Pandas data frame
df = pd.read_csv(file_to_load)

# Output File (CSV)
output_data_file = "Resources/cleaned_data.csv"

# Preview of the DataFrame
df.head()

Unnamed: 0,State,Filed week ended,Initial Claims,Reflecting Week Ended,Continued Claims,Covered Employment,Insured Unemployment Rate
0,Alabama,1/4/2020,4578,12/28/2019,18523,1923741,0.96
1,Alabama,1/11/2020,3629,1/4/2020,21143,1923741,1.1
2,Alabama,1/18/2020,2483,1/11/2020,17402,1923741,0.9
3,Alabama,1/25/2020,2129,1/18/2020,18390,1923741,0.96
4,Alabama,2/1/2020,2170,1/25/2020,17284,1923741,0.9


In [95]:
# Check for correct data types - need to convert
df.dtypes

State                         object
Filed week ended              object
Initial Claims                object
Reflecting Week Ended         object
Continued Claims              object
Covered Employment            object
Insured Unemployment Rate    float64
dtype: object

In [96]:
# Remove commas from columns to be able to cast as a number
df["Initial Claims"] = df["Initial Claims"].str.replace(',', '')
df["Continued Claims"] = df["Continued Claims"].str.replace(',', '')
df["Covered Employment"] = df["Covered Employment"].str.replace(',', '')

In [97]:
# Convert data types
df['Initial Claims'] = pd.to_numeric(df['Initial Claims'])
df['Continued Claims'] = pd.to_numeric(df['Continued Claims'])
df['Covered Employment'] = pd.to_numeric(df['Covered Employment'])

In [98]:
# Verify adjusted data types
df.dtypes

State                         object
Filed week ended              object
Initial Claims               float64
Reflecting Week Ended         object
Continued Claims             float64
Covered Employment           float64
Insured Unemployment Rate    float64
dtype: object

In [99]:
# Check column names for spaces - looks good
df.columns

Index(['State', 'Filed week ended', 'Initial Claims', 'Reflecting Week Ended',
       'Continued Claims', 'Covered Employment', 'Insured Unemployment Rate'],
      dtype='object')

In [100]:
# Identify incomplete rows
df.count()

State                        1061
Filed week ended             1060
Initial Claims               1060
Reflecting Week Ended        1060
Continued Claims             1060
Covered Employment           1060
Insured Unemployment Rate    1060
dtype: int64

In [101]:
# Drop all rows with missing information
df = unemployment_df.dropna(how='any')

In [102]:
# Verify dropped rows
df.count()

State                        1060
Filed week ended             1060
Initial Claims               1060
Reflecting Week Ended        1060
Continued Claims             1060
Covered Employment           1060
Insured Unemployment Rate    1060
dtype: int64

In [103]:
# Check states values
df['State'].value_counts()

West Virginia           20
Virgin Islands          20
Alabama                 20
Missouri                20
Pennsylvania            20
New York                20
Oklahoma                20
Vermont                 20
Oregon                  20
Tennessee               20
Florida                 20
Montana                 20
Wisconsin               20
Illinois                20
Georgia                 20
Kentucky                20
Iowa                    20
Idaho                   20
Maine                   20
Indiana                 20
Wyoming                 20
North Dakota            20
Utah                    20
Rhode Island            20
Virginia                20
Texas                   20
New Mexico              20
Kansas                  20
Puerto Rico             20
Arizona                 20
District of Columbia    20
Maryland                20
Washington              20
California              20
Mississippi             20
Nevada                  20
Connecticut             20
O

In [104]:
# Display a statistical overview for a high level insight into the data
df.describe()

Unnamed: 0,Insured Unemployment Rate
count,1060.0
mean,4.872972
std,5.584986
min,0.37
25%,1.12
50%,2.12
75%,7.8925
max,31.2
