florida_covid_july_16_2020_female_cases.txt

In [26]:
### Load the Drive helper and mount
from google.colab import drive

### This will prompt for authorization.
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [27]:
import json 
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
import matplotlib.pyplot as plt

In [28]:
### On google colab side pane go to the Folder icon, scroll down to to correct file right click file and select copy path 
with open('/content/drive/My Drive/florida_covid_july_16_2020_female_cases.txt', 'r') as f:
      covid_data_list = json.load(f)

In [29]:
### Moves DATE column into three columns
data = pd.json_normalize(covid_data_list)

### Dataframe for female deaths
df_femcase = pd.DataFrame(data)

### column headers
df_femcase.columns

Index(['CASE', 'COUNTY', 'AGE', 'GENDER', 'DATE.MONTH', 'DATE.DAY',
       'DATE.YEAR', 'DATE.WEEKDAY'],
      dtype='object')

In [30]:
df_femcase.shape

(160076, 8)

In [31]:
df_femcase.head(3)

Unnamed: 0,CASE,COUNTY,AGE,GENDER,DATE.MONTH,DATE.DAY,DATE.YEAR,DATE.WEEKDAY
0,2,Alachua,21,Female,3,17,2020,Tuesday
1,4,Alachua,50,Female,3,18,2020,Wednesday
2,6,Alachua,39,Female,3,18,2020,Wednesday


In [32]:
### Counts for the individual months
df_femcase["DATE.MONTH"].value_counts()

07    80232
06    51309
04    14084
05    11329
03     3122
Name: DATE.MONTH, dtype: int64

In [33]:
### Counts for the Weekdays
df_femcase["DATE.WEEKDAY"].value_counts()

Wednesday    26270
Saturday     25835
Friday       24208
Thursday     22976
Tuesday      22446
Monday       19311
Sunday       19030
Name: DATE.WEEKDAY, dtype: int64

In [34]:
### Weekdays Find/Replace encoding
cleanup_days = {"DATE.WEEKDAY":     {"Sunday": 1, "Monday": 2, "Tuesday":3, "Wednesday":4, "Thursday":5, "Friday":6, "Saturday":7}}
df_femcase.replace(cleanup_days, inplace=True)

df_femcase.head(3)                               

Unnamed: 0,CASE,COUNTY,AGE,GENDER,DATE.MONTH,DATE.DAY,DATE.YEAR,DATE.WEEKDAY
0,2,Alachua,21,Female,3,17,2020,3
1,4,Alachua,50,Female,3,18,2020,4
2,6,Alachua,39,Female,3,18,2020,4


In [35]:
### drop year since all values are 2020
df_femcase = df_femcase.drop(['DATE.YEAR'], axis = 1)

In [36]:
### change to numerics
df_femcase['DATE.DAY'] = df_femcase['DATE.DAY'].astype(int)
df_femcase['DATE.MONTH'] = df_femcase['DATE.MONTH'].astype(int)

In [37]:
df_femcase.head(3)

Unnamed: 0,CASE,COUNTY,AGE,GENDER,DATE.MONTH,DATE.DAY,DATE.WEEKDAY
0,2,Alachua,21,Female,3,17,3
1,4,Alachua,50,Female,3,18,4
2,6,Alachua,39,Female,3,18,4


In [38]:
### change to numeric
df_femcase['AGE'] = df_femcase['AGE'].astype(int)

In [39]:
### data types check
df_femcase.dtypes

CASE            object
COUNTY          object
AGE              int64
GENDER          object
DATE.MONTH       int64
DATE.DAY         int64
DATE.WEEKDAY     int64
dtype: object

In [40]:
### Counts for the Counties
df_femcase["COUNTY"].value_counts()

Dade            37756
Broward         18049
Palm Beach      11983
Hillsborough    11163
Orange          10491
                ...  
Jefferson          53
Union              43
Lafayette          34
Franklin           29
Liberty            28
Name: COUNTY, Length: 68, dtype: int64

In [41]:
### check for how many unique counties
df_femcase["COUNTY"].nunique()

68

In [42]:
### County Find/Replace encoding
cleanup_county = {"COUNTY": {"Alachua": 1, "Baker": 2, "Bay": 3, "Bradford": 4, "Brevard": 5, "Broward": 6, 
                             "Calhoun": 7, "Charlotte": 8, "Citrus": 9, "Clay": 10, "Collier": 11, "Columbia": 12,
														 "Dade": 13, "Desoto": 14, "Dixie": 15, "Duval": 16, "Escambia": 17, "Flagler": 18, 
														 "Franklin": 19, "Gadsden": 20, "Gilchrist": 21, "Glades": 22, "Gulf": 23, 
														 "Hamilton": 24, "Hardee": 25, "Hendry": 26, "Hernando": 27, "Highlands": 28, 
														 "Hillsborough": 29, "Holmes": 30, "Indian River":	31, "Jackson": 32, "Jefferson":	33, 
														 "Lafayette": 34, "Lake": 35, "Lee": 36, "Leon": 37, "Levy": 38, "Liberty":	39, 
														 "Madison": 40, "Manatee": 41, "Marion": 42, "Martin": 43, "Monroe": 44, 
														 "Nassau": 45, "Okaloosa": 46, "Okeechobee": 47, "Orange": 48, "Osceola": 49, 
														 "Palm Beach": 50, "Pasco": 51, "Pinellas":	52, "Polk":	53, "Putnam": 54, 
														 "St. Johns": 55, "St. Lucie": 56, "Santa Rosa": 57, "Sarasota": 58, "Seminole": 59,
														 "Sumter": 60, "Suwannee": 61, "Taylor": 62, "Union": 63, "Volusia": 64, 
														 "Wakulla": 65, "Walton": 66, "Washington": 67, "Unknown": 68 }} 	  
df_femcase.replace(cleanup_county, inplace=True)

df_femcase.head(3)    

Unnamed: 0,CASE,COUNTY,AGE,GENDER,DATE.MONTH,DATE.DAY,DATE.WEEKDAY
0,2,1,21,Female,3,17,3
1,4,1,50,Female,3,18,4
2,6,1,39,Female,3,18,4


In [43]:
### add columns for female cases
df_femcase['Deaths'] = 0
df_femcase['Cases'] =  1

In [44]:
### add column for female to replace GENDER which will be dropped
df_femcase.insert(3,'Female', 1) 

In [45]:
### add column for male to replace GENDER which will be dropped
df_femcase.insert(4,'Male', 0) 

In [46]:
df_femcase = df_femcase.drop(['GENDER'], axis = 1)

In [47]:
df_femcase = df_femcase.drop(['CASE'], axis = 1)

In [48]:
df_femcase.columns

Index(['COUNTY', 'AGE', 'Female', 'Male', 'DATE.MONTH', 'DATE.DAY',
       'DATE.WEEKDAY', 'Deaths', 'Cases'],
      dtype='object')

In [49]:
df_femcase.head(3)

Unnamed: 0,COUNTY,AGE,Female,Male,DATE.MONTH,DATE.DAY,DATE.WEEKDAY,Deaths,Cases
0,1,21,1,0,3,17,3,0,1
1,1,50,1,0,3,18,4,0,1
2,1,39,1,0,3,18,4,0,1


In [50]:
df_femcase.dtypes

COUNTY          int64
AGE             int64
Female          int64
Male            int64
DATE.MONTH      int64
DATE.DAY        int64
DATE.WEEKDAY    int64
Deaths          int64
Cases           int64
dtype: object

In [51]:
# saving the dataframe 
df_femcase.to_csv('encoded_female_cases.csv') 