florida_covid_july_16_2020_male_cases.txt

In [27]:
### Load the Drive helper and mount
from google.colab import drive

### This will prompt for authorization.
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [28]:
import json 
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
import matplotlib.pyplot as plt

In [29]:
### On google colab side pane go to the Folder icon, scroll down to to correct file right click file and select copy path 
with open('/content/drive/My Drive/florida_covid_july_16_2020_male_cases.txt', 'r') as f:
      covid_data_list = json.load(f)

In [30]:
### Moves DATE column into three columns
data = pd.json_normalize(covid_data_list)

### Dataframe for male deaths
df_malcase = pd.DataFrame(data)

### column headers
df_malcase.columns

Index(['CASE', 'COUNTY', 'AGE', 'GENDER', 'DATE.MONTH', 'DATE.DAY',
       'DATE.YEAR', 'DATE.WEEKDAY'],
      dtype='object')

In [31]:
df_malcase.shape

(153012, 8)

In [32]:
df_malcase.head(3)

Unnamed: 0,CASE,COUNTY,AGE,GENDER,DATE.MONTH,DATE.DAY,DATE.YEAR,DATE.WEEKDAY
0,1,Alachua,24,Male,3,13,2020,Friday
1,3,Alachua,22,Male,3,17,2020,Tuesday
2,5,Alachua,51,Male,3,18,2020,Wednesday


In [33]:
### Counts for the individual months
df_malcase["DATE.MONTH"].value_counts()

07    74331
06    50643
04    13531
05    10845
03     3662
Name: DATE.MONTH, dtype: int64

In [34]:
### Counts for the Weekdays
df_malcase["DATE.WEEKDAY"].value_counts()

Wednesday    24785
Saturday     24512
Friday       23483
Thursday     22158
Tuesday      21165
Monday       18872
Sunday       18037
Name: DATE.WEEKDAY, dtype: int64

In [35]:
### Weekdays Find/Replace encoding
cleanup_days = {"DATE.WEEKDAY":     {"Sunday": 1, "Monday": 2, "Tuesday":3, "Wednesday":4, "Thursday":5, "Friday":6, "Saturday":7}}
df_malcase.replace(cleanup_days, inplace=True)

df_malcase.head(3)                               

Unnamed: 0,CASE,COUNTY,AGE,GENDER,DATE.MONTH,DATE.DAY,DATE.YEAR,DATE.WEEKDAY
0,1,Alachua,24,Male,3,13,2020,6
1,3,Alachua,22,Male,3,17,2020,3
2,5,Alachua,51,Male,3,18,2020,4


In [36]:
### drop year since all values are 2020
df_malcase = df_malcase.drop(['DATE.YEAR'], axis = 1)

In [37]:
### change to numerics
df_malcase['DATE.DAY'] = df_malcase['DATE.DAY'].astype(int)
df_malcase['DATE.MONTH'] = df_malcase['DATE.MONTH'].astype(int)

In [38]:
df_malcase.head(3)

Unnamed: 0,CASE,COUNTY,AGE,GENDER,DATE.MONTH,DATE.DAY,DATE.WEEKDAY
0,1,Alachua,24,Male,3,13,6
1,3,Alachua,22,Male,3,17,3
2,5,Alachua,51,Male,3,18,4


In [39]:
### change to numeric
df_malcase['AGE'] = df_malcase['AGE'].astype(int)

In [40]:
### data types check
df_malcase.dtypes

CASE            object
COUNTY          object
AGE              int64
GENDER          object
DATE.MONTH       int64
DATE.DAY         int64
DATE.WEEKDAY     int64
dtype: object

In [41]:
### Counts for the Counties
df_malcase["COUNTY"].value_counts()

Dade            37323
Broward         17306
Palm Beach      11622
Orange          10589
Hillsborough     9895
                ...  
Dixie              65
Gulf               55
Jefferson          49
Lafayette          30
Franklin           19
Name: COUNTY, Length: 68, dtype: int64

In [42]:
### check for how many unique counties
df_malcase["COUNTY"].nunique()

68

In [43]:
### County Find/Replace encoding
cleanup_county = {"COUNTY": {"Alachua": 1, "Baker": 2, "Bay": 3, "Bradford": 4, "Brevard": 5, "Broward": 6, 
                             "Calhoun": 7, "Charlotte": 8, "Citrus": 9, "Clay": 10, "Collier": 11, "Columbia": 12,
														 "Dade": 13, "Desoto": 14, "Dixie": 15, "Duval": 16, "Escambia": 17, "Flagler": 18, 
														 "Franklin": 19, "Gadsden": 20, "Gilchrist": 21, "Glades": 22, "Gulf": 23, 
														 "Hamilton": 24, "Hardee": 25, "Hendry": 26, "Hernando": 27, "Highlands": 28, 
														 "Hillsborough": 29, "Holmes": 30, "Indian River":	31, "Jackson": 32, "Jefferson":	33, 
														 "Lafayette": 34, "Lake": 35, "Lee": 36, "Leon": 37, "Levy": 38, "Liberty":	39, 
														 "Madison": 40, "Manatee": 41, "Marion": 42, "Martin": 43, "Monroe": 44, 
														 "Nassau": 45, "Okaloosa": 46, "Okeechobee": 47, "Orange": 48, "Osceola": 49, 
														 "Palm Beach": 50, "Pasco": 51, "Pinellas":	52, "Polk":	53, "Putnam": 54, 
														 "St. Johns": 55, "St. Lucie": 56, "Santa Rosa": 57, "Sarasota": 58, "Seminole": 59,
														 "Sumter": 60, "Suwannee": 61, "Taylor": 62, "Union": 63, "Volusia": 64, 
														 "Wakulla": 65, "Walton": 66, "Washington": 67, "Unknown": 68 }} 	 
df_malcase.replace(cleanup_county, inplace=True)

df_malcase.head(3)   

Unnamed: 0,CASE,COUNTY,AGE,GENDER,DATE.MONTH,DATE.DAY,DATE.WEEKDAY
0,1,1,24,Male,3,13,6
1,3,1,22,Male,3,17,3
2,5,1,51,Male,3,18,4


In [44]:
### add columns for male cases
df_malcase['Deaths'] = 0
df_malcase['Cases'] =  1

In [45]:
### add column for male to replace GENDER which will be dropped
df_malcase.insert(3,'Female', 0) 

In [47]:
### add column for male to replace GENDER which will be dropped
df_malcase.insert(4,'Male', 1) 

In [48]:
df_malcase = df_malcase.drop(['GENDER'], axis = 1)

In [49]:
df_malcase = df_malcase.drop(['CASE'], axis = 1)

In [50]:
df_malcase.columns

Index(['COUNTY', 'AGE', 'Female', 'Male', 'DATE.MONTH', 'DATE.DAY',
       'DATE.WEEKDAY', 'Deaths', 'Cases'],
      dtype='object')

In [51]:
df_malcase.head(3)

Unnamed: 0,COUNTY,AGE,Female,Male,DATE.MONTH,DATE.DAY,DATE.WEEKDAY,Deaths,Cases
0,1,24,0,1,3,13,6,0,1
1,1,22,0,1,3,17,3,0,1
2,1,51,0,1,3,18,4,0,1


In [52]:
df_malcase.dtypes

COUNTY          int64
AGE             int64
Female          int64
Male            int64
DATE.MONTH      int64
DATE.DAY        int64
DATE.WEEKDAY    int64
Deaths          int64
Cases           int64
dtype: object

In [53]:
df_malcase.to_csv('encoded_male_cases.csv') 