florida_covid_july_16_2020_female_deaths.txt

In [60]:
### Load the Drive helper and mount
from google.colab import drive

### This will prompt for authorization.
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [61]:
import json 
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
import matplotlib.pyplot as plt

In [62]:
### On google colab side pane go to the Folder icon, scroll down to to correct file right click file and select copy path 
with open('/content/drive/My Drive/florida_covid_july_16_2020_female_deaths.txt', 'r') as f:
      covid_data_list = json.load(f)

In [63]:
### Moves DATE column into three columns
data = pd.json_normalize(covid_data_list)

### Dataframe for female deaths
df_femd = pd.DataFrame(data)

### column headers
df_femd.columns

Index(['CASE', 'COUNTY', 'AGE', 'GENDER', 'DATE.MONTH', 'DATE.DAY',
       'DATE.YEAR', 'DATE.WEEKDAY'],
      dtype='object')

In [64]:
df_femd.shape

(2126, 8)

In [65]:
df_femd.head(3)

Unnamed: 0,CASE,COUNTY,AGE,GENDER,DATE.MONTH,DATE.DAY,DATE.YEAR,DATE.WEEKDAY
0,1,Alachua,60,Female,4,9,2020,Thursday
1,2,Alachua,78,Female,4,9,2020,Thursday
2,6,Alachua,67,Female,4,18,2020,Saturday


In [66]:
### Counts for the individual months
df_femd["DATE.MONTH"].value_counts()

04    735
05    542
06    519
07    195
03    135
Name: DATE.MONTH, dtype: int64

In [67]:
### Counts for the Weekdays
df_femd["DATE.WEEKDAY"].value_counts()

Thursday     340
Wednesday    327
Monday       321
Friday       304
Tuesday      293
Saturday     289
Sunday       252
Name: DATE.WEEKDAY, dtype: int64

In [68]:
### Weekdays Find/Replace encoding
cleanup_days = {"DATE.WEEKDAY":     {"Sunday": 1, "Monday": 2, "Tuesday":3, "Wednesday":4, "Thursday":5, "Friday":6, "Saturday":7}}
df_femd.replace(cleanup_days, inplace=True)

df_femd.head(3)                               

Unnamed: 0,CASE,COUNTY,AGE,GENDER,DATE.MONTH,DATE.DAY,DATE.YEAR,DATE.WEEKDAY
0,1,Alachua,60,Female,4,9,2020,5
1,2,Alachua,78,Female,4,9,2020,5
2,6,Alachua,67,Female,4,18,2020,7


In [69]:
### drop year since all values are 2020
df_femd = df_femd.drop(['DATE.YEAR'], axis = 1)

In [70]:
### change to numerics
df_femd['DATE.DAY'] = df_femd['DATE.DAY'].astype(int)
df_femd['DATE.MONTH'] = df_femd['DATE.MONTH'].astype(int)

In [71]:
df_femd.head(3)

Unnamed: 0,CASE,COUNTY,AGE,GENDER,DATE.MONTH,DATE.DAY,DATE.WEEKDAY
0,1,Alachua,60,Female,4,9,5
1,2,Alachua,78,Female,4,9,5
2,6,Alachua,67,Female,4,18,7


In [72]:
### change to numeric
df_femd['AGE'] = df_femd['AGE'].astype(int)

In [73]:
### data types check
df_femd.dtypes

CASE            object
COUNTY          object
AGE              int64
GENDER          object
DATE.MONTH       int64
DATE.DAY         int64
DATE.WEEKDAY     int64
dtype: object

In [74]:
### Counts for the Counties
df_femd["COUNTY"].value_counts()

Dade            548
Palm Beach      278
Broward         197
Pinellas        153
Hillsborough    125
Lee              99
Polk             91
Manatee          67
Charlotte        47
Volusia          41
St. Lucie        40
Sarasota         39
Duval            39
Orange           38
Collier          38
Escambia         31
Clay             21
Martin           21
Brevard          20
Pasco            19
Lake             18
Seminole         17
Osceola          16
Hendry           13
Indian River     10
Suwannee          9
Citrus            7
Okaloosa          7
Alachua           6
Sumter            6
Washington        5
Hernando          5
Leon              5
Highlands         4
St. Johns         4
Marion            4
Flagler           3
Gadsden           3
Baker             3
Walton            3
Calhoun           3
Jefferson         2
Putnam            2
Santa Rosa        2
Desoto            2
Jackson           2
Bradford          2
Holmes            1
Columbia          1
Bay               1


In [75]:
### check for how many unique counties
df_femd["COUNTY"].nunique()

58

In [76]:
### County Find/Replace encoding
cleanup_county = {"COUNTY": {"Alachua": 1, "Baker": 2, "Bay": 3, "Bradford": 4, "Brevard": 5, "Broward": 6, 
                             "Calhoun": 7, "Charlotte": 8, "Citrus": 9, "Clay": 10, "Collier": 11, "Columbia": 12,
														 "Dade": 13, "Desoto": 14, "Dixie": 15, "Duval": 16, "Escambia": 17, "Flagler": 18, 
														 "Franklin": 19, "Gadsden": 20, "Gilchrist": 21, "Glades": 22, "Gulf": 23, 
														 "Hamilton": 24, "Hardee": 25, "Hendry": 26, "Hernando": 27, "Highlands": 28, 
														 "Hillsborough": 29, "Holmes": 30, "Indian River":	31, "Jackson": 32, "Jefferson":	33, 
														 "Lafayette": 34, "Lake": 35, "Lee": 36, "Leon": 37, "Levy": 38, "Liberty":	39, 
														 "Madison": 40, "Manatee": 41, "Marion": 42, "Martin": 43, "Monroe": 44, 
														 "Nassau": 45, "Okaloosa": 46, "Okeechobee": 47, "Orange": 48, "Osceola": 49, 
														 "Palm Beach": 50, "Pasco": 51, "Pinellas":	52, "Polk":	53, "Putnam": 54, 
														 "St. Johns": 55, "St. Lucie": 56, "Santa Rosa": 57, "Sarasota": 58, "Seminole": 59,
														 "Sumter": 60, "Suwannee": 61, "Taylor": 62, "Union": 63, "Volusia": 64, 
														 "Wakulla": 65, "Walton": 66, "Washington": 67, "Unknown": 68 }} 														 
df_femd.replace(cleanup_county, inplace=True)

df_femd.head(3)   

Unnamed: 0,CASE,COUNTY,AGE,GENDER,DATE.MONTH,DATE.DAY,DATE.WEEKDAY
0,1,1,60,Female,4,9,5
1,2,1,78,Female,4,9,5
2,6,1,67,Female,4,18,7


In [77]:
df_femd['COUNTY'] = df_femd['COUNTY'].astype(int)

In [78]:
### add columns for female/male deaths
df_femd['Deaths'] = 1
df_femd['Cases'] = 0

In [79]:
### add column for female to replace GENDER which will be dropped
df_femd.insert(3,'Female', 1) 

In [80]:
### add column for male to replace GENDER which will be dropped
df_femd.insert(4,'Male', 0) 

In [81]:
df_femd = df_femd.drop(['GENDER'], axis = 1)

In [82]:
df_femd = df_femd.drop(['CASE'], axis = 1)

In [83]:
df_femd.columns

Index(['COUNTY', 'AGE', 'Female', 'Male', 'DATE.MONTH', 'DATE.DAY',
       'DATE.WEEKDAY', 'Deaths', 'Cases'],
      dtype='object')

In [84]:
df_femd.head(3)

Unnamed: 0,COUNTY,AGE,Female,Male,DATE.MONTH,DATE.DAY,DATE.WEEKDAY,Deaths,Cases
0,1,60,1,0,4,9,5,1,0
1,1,78,1,0,4,9,5,1,0
2,1,67,1,0,4,18,7,1,0


In [85]:
df_femd.dtypes

COUNTY          int64
AGE             int64
Female          int64
Male            int64
DATE.MONTH      int64
DATE.DAY        int64
DATE.WEEKDAY    int64
Deaths          int64
Cases           int64
dtype: object

In [86]:
# saving the dataframe 
df_femd.to_csv('encoded_female_deaths.csv') 