# Updated on 05/08/2021

This data contains medical data from the following:
1. Covid tracking project
2. JHU_US_MAP (govex)

In [1]:
import pandas as pd
from datetime import datetime

# Importing the fips data

In [2]:
state_county_fips = pd.read_csv('~/Documents/GitHub/COVID_DataHub/FIPS/state_county_fips.csv')
state_county_fips

Unnamed: 0,stfips,ctyfips,stname,ctyname,fips
0,1,1,Alabama,Autauga County,1001
1,1,3,Alabama,Baldwin County,1003
2,1,5,Alabama,Barbour County,1005
3,1,7,Alabama,Bibb County,1007
4,1,9,Alabama,Blount County,1009
...,...,...,...,...,...
3215,72,145,Puerto Rico,Vega Baja Municipio,72145
3216,72,147,Puerto Rico,Vieques Municipio,72147
3217,72,149,Puerto Rico,Villalba Municipio,72149
3218,72,151,Puerto Rico,Yabucoa Municipio,72151


In [3]:
state_county_fips['ctyname'] = state_county_fips['ctyname'].str.replace(' County', '')
state_county_fips

Unnamed: 0,stfips,ctyfips,stname,ctyname,fips
0,1,1,Alabama,Autauga,1001
1,1,3,Alabama,Baldwin,1003
2,1,5,Alabama,Barbour,1005
3,1,7,Alabama,Bibb,1007
4,1,9,Alabama,Blount,1009
...,...,...,...,...,...
3215,72,145,Puerto Rico,Vega Baja Municipio,72145
3216,72,147,Puerto Rico,Vieques Municipio,72147
3217,72,149,Puerto Rico,Villalba Municipio,72149
3218,72,151,Puerto Rico,Yabucoa Municipio,72151


In [4]:
state_territory_fips = pd.read_csv('~/Documents/GitHub/COVID_DataHub/FIPS/state_territory_fips.csv')
state_territory_fips

Unnamed: 0,stfips,stabbr,stname
0,1,AL,Alabama
1,2,AK,Alaska
2,4,AZ,Arizona
3,5,AR,Arkansas
4,6,CA,California
5,8,CO,Colorado
6,9,CT,Connecticut
7,10,DE,Delaware
8,11,DC,District of Columbia
9,12,FL,Florida


# Govex Medical data (JHU_US_MAP)

In [5]:
df = pd.read_csv("https://opendata.arcgis.com/datasets/4cb598ae041348fb92270f102a6783cb_0.csv")
df.rename(columns={'Countyname': 'ctyname', 'ST_Name': 'stname', 'ST_Abbr': 'stabbr', 'ST_ID': 'stfips', 
                   'FIPS':'fips'}, inplace=True)
df = df[df['stfips'] > 0]
df

Unnamed: 0,OBJECTID,ctyname,stname,stabbr,stfips,fips,FatalityRa,Confirmedb,DeathsbyPo,PCTPOVALL_,...,Inpat_Occ,ICU_Occ,Inpat_Cap,Inpat_COV,Inp_nonCOV,ICU_Cap,ICU_COV,ICU_nonCOV,Shape__Area,Shape__Length
0,1,Autauga,Alabama,AL,1,1001,1.480627,18781.15,278.078729,15.2,...,52.166934,119.047619,89.000000,3.428571,43.000000,6.0,1.000000,6.142857,2.209382e+09,246839.865479
1,2,Baldwin,Alabama,AL,1,1003,1.534804,17786.50,272.987831,10.4,...,72.811060,87.053571,372.000000,7.142857,263.714286,32.0,2.000000,25.857143,5.770469e+09,728445.072448
2,3,Barbour,Alabama,AL,1,1005,2.165570,14384.29,311.501912,30.7,...,51.006711,74.285714,42.571429,1.142857,20.571429,5.0,0.000000,3.714286,3.258643e+09,307285.154510
3,4,Bibb,Alabama,AL,1,1007,2.131110,19192.64,409.016138,18.1,...,39.047619,,30.000000,0.000000,11.714286,0.0,0.000000,0.000000,2.310715e+09,227886.963840
4,5,Blount,Alabama,AL,1,1009,1.785375,18255.58,325.930549,13.6,...,44.897959,61.904762,42.000000,0.000000,18.857143,6.0,0.000000,3.714286,2.456058e+09,286306.840721
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3322,3323,Sweetwater,Wyoming,WY,56,56037,1.188670,18170.54,215.987684,11.5,...,49.579832,42.857143,51.000000,7.714286,17.571429,10.0,2.857143,1.428571,4.872233e+10,948164.491340
3323,3324,Teton,Wyoming,WY,56,56039,0.264101,22770.62,60.137457,6.1,...,37.202381,26.190476,48.000000,2.285714,15.571429,6.0,1.000000,0.571429,2.106816e+10,688448.365694
3324,3325,Uinta,Wyoming,WY,56,56041,0.734177,19288.05,141.608477,11.3,...,10.227273,0.000000,44.000000,0.000000,4.500000,6.0,0.000000,0.000000,9.587595e+09,394479.315921
3325,3326,Washakie,Wyoming,WY,56,56043,1.928375,22611.19,436.028404,10.5,...,43.750000,0.000000,16.000000,4.142857,2.857143,3.0,0.000000,0.000000,1.119331e+10,529956.912317


In [6]:
govex_medical = pd.merge(state_county_fips, df, on=["fips", 'stname', 'stfips', 'ctyname'], how="right")
govex_medical['ctyfips'] = govex_medical['ctyfips'].astype('Int64')
govex_medical

Unnamed: 0,stfips,ctyfips,stname,ctyname,fips,OBJECTID,stabbr,FatalityRa,Confirmedb,DeathsbyPo,...,Inpat_Occ,ICU_Occ,Inpat_Cap,Inpat_COV,Inp_nonCOV,ICU_Cap,ICU_COV,ICU_nonCOV,Shape__Area,Shape__Length
0,1,1,Alabama,Autauga,1001,1,AL,1.480627,18781.15,278.078729,...,52.166934,119.047619,89.000000,3.428571,43.000000,6.0,1.000000,6.142857,2.209382e+09,246839.865479
1,1,3,Alabama,Baldwin,1003,2,AL,1.534804,17786.50,272.987831,...,72.811060,87.053571,372.000000,7.142857,263.714286,32.0,2.000000,25.857143,5.770469e+09,728445.072448
2,1,5,Alabama,Barbour,1005,3,AL,2.165570,14384.29,311.501912,...,51.006711,74.285714,42.571429,1.142857,20.571429,5.0,0.000000,3.714286,3.258643e+09,307285.154510
3,1,7,Alabama,Bibb,1007,4,AL,2.131110,19192.64,409.016138,...,39.047619,,30.000000,0.000000,11.714286,0.0,0.000000,0.000000,2.310715e+09,227886.963840
4,1,9,Alabama,Blount,1009,5,AL,1.785375,18255.58,325.930549,...,44.897959,61.904762,42.000000,0.000000,18.857143,6.0,0.000000,3.714286,2.456058e+09,286306.840721
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,56,37,Wyoming,Sweetwater,56037,3323,WY,1.188670,18170.54,215.987684,...,49.579832,42.857143,51.000000,7.714286,17.571429,10.0,2.857143,1.428571,4.872233e+10,948164.491340
3139,56,39,Wyoming,Teton,56039,3324,WY,0.264101,22770.62,60.137457,...,37.202381,26.190476,48.000000,2.285714,15.571429,6.0,1.000000,0.571429,2.106816e+10,688448.365694
3140,56,41,Wyoming,Uinta,56041,3325,WY,0.734177,19288.05,141.608477,...,10.227273,0.000000,44.000000,0.000000,4.500000,6.0,0.000000,0.000000,9.587595e+09,394479.315921
3141,56,43,Wyoming,Washakie,56043,3326,WY,1.928375,22611.19,436.028404,...,43.750000,0.000000,16.000000,4.142857,2.857143,3.0,0.000000,0.000000,1.119331e+10,529956.912317


In [7]:
govex_medical.to_csv('../Pandemic/medical_govex.csv', index = False)

# Covid tracking project
According to its official website (https://covidtracking.com/data/api), this Covid tracking project is ending all data collection on March 7, 2021. The existing API will continue to work until May 2021, but will only include data up to March 7, 2021.

In [8]:
df = pd.read_csv("https://covidtracking.com/api/v1/states/daily.csv")
#col = [0, 21] + [i for i in range(2, 21)] + [22, 23, 24, 25, 26]
#df = df.iloc[:, col]
#df.rename(columns={"fips":"stfips"}, inplace=True)
df

Unnamed: 0,date,state,positive,probableCases,negative,pending,totalTestResultsSource,totalTestResults,hospitalizedCurrently,hospitalizedCumulative,...,dataQualityGrade,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade
0,20210307,AK,56886.0,,,,totalTestsViral,1731628.0,33.0,1293.0,...,,0,0,dc4bccd4bb885349d7e94d6fed058e285d4be164,0,0,0,0,0,
1,20210307,AL,499819.0,107742.0,1931711.0,,totalTestsPeopleViral,2323788.0,494.0,45976.0,...,,-1,0,997207b430824ea40b8eb8506c19a93e07bc972e,0,0,0,0,0,
2,20210307,AR,324818.0,69092.0,2480716.0,,totalTestsViral,2736442.0,335.0,14926.0,...,,22,11,50921aeefba3e30d31623aa495b47fb2ecc72fae,0,0,0,0,0,
3,20210307,AS,0.0,,2140.0,,totalTestsViral,2140.0,,,...,,0,0,f77912d0b80d579fbb6202fa1a90554fc4dc1443,0,0,0,0,0,
4,20210307,AZ,826454.0,56519.0,3073010.0,,totalTestsViral,7908105.0,963.0,57907.0,...,,5,44,0437a7a96f4471666f775e63e86923eb5cbd8cdf,0,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20775,20200117,WA,0.0,,,,totalTestEncountersViral,,,,...,,0,0,7cefac6b3681020741ca30f45399a7b22f2e45b4,0,0,0,0,0,
20776,20200116,WA,0.0,,,,totalTestEncountersViral,,,,...,,0,0,650501e005a5ee86d93c5f32dda56735ea2af967,0,0,0,0,0,
20777,20200115,WA,0.0,,,,totalTestEncountersViral,,,,...,,0,0,4987e61aad88182abfe641033b597304c2153d4f,0,0,0,0,0,
20778,20200114,WA,0.0,,,,totalTestEncountersViral,,,,...,,0,0,1881c8a2f0d337b22066b4f05df06eb2259e8d57,0,0,0,0,0,


In [9]:
df.columns

Index(['date', 'state', 'positive', 'probableCases', 'negative', 'pending',
       'totalTestResultsSource', 'totalTestResults', 'hospitalizedCurrently',
       'hospitalizedCumulative', 'inIcuCurrently', 'inIcuCumulative',
       'onVentilatorCurrently', 'onVentilatorCumulative', 'recovered',
       'lastUpdateEt', 'dateModified', 'checkTimeEt', 'death', 'hospitalized',
       'hospitalizedDischarged', 'dateChecked', 'totalTestsViral',
       'positiveTestsViral', 'negativeTestsViral', 'positiveCasesViral',
       'deathConfirmed', 'deathProbable', 'totalTestEncountersViral',
       'totalTestsPeopleViral', 'totalTestsAntibody', 'positiveTestsAntibody',
       'negativeTestsAntibody', 'totalTestsPeopleAntibody',
       'positiveTestsPeopleAntibody', 'negativeTestsPeopleAntibody',
       'totalTestsPeopleAntigen', 'positiveTestsPeopleAntigen',
       'totalTestsAntigen', 'positiveTestsAntigen', 'fips', 'positiveIncrease',
       'negativeIncrease', 'total', 'totalTestResultsIncrease', 

In [10]:
# inspect data structure
df2 = df.copy()
cols = list(df2)
# move the column to head of list using index, pop and insert
cols.insert(2, cols.pop(cols.index('fips')))
df2 = df2[cols]
df2 = df2.sort_values(by = ['fips', 'state', 'date'])
#df2.to_csv('~/Documents/HPC_datahub/medical_covidtracking_temp.csv')

In [11]:
df2 = df2.pivot(index = ['state', 'fips'], columns = 'date')
df2 = df2.reset_index()
df2.columns = ['stabbr', 'stfips'] + [x[0] + '_' + str(x[1]) for x in df2.columns[2:]]
df2 = df2.sort_values(by = ['stfips', 'stabbr'])
df2 = df2.reset_index(drop = True)
df2

Unnamed: 0,stabbr,stfips,positive_20200113,positive_20200114,positive_20200115,positive_20200116,positive_20200117,positive_20200118,positive_20200119,positive_20200120,...,grade_20210226,grade_20210227,grade_20210228,grade_20210301,grade_20210302,grade_20210303,grade_20210304,grade_20210305,grade_20210306,grade_20210307
0,AL,1,,,,,,,,,...,,,,,,,,,,
1,AK,2,,,,,,,,,...,,,,,,,,,,
2,AZ,4,,,,,,,,,...,,,,,,,,,,
3,AR,5,,,,,,,,,...,,,,,,,,,,
4,CA,6,,,,,,,,,...,,,,,,,,,,
5,CO,8,,,,,,,,,...,,,,,,,,,,
6,CT,9,,,,,,,,,...,,,,,,,,,,
7,DE,10,,,,,,,,,...,,,,,,,,,,
8,DC,11,,,,,,,,,...,,,,,,,,,,
9,FL,12,,,,,,,,,...,,,,,,,,,,


In [12]:
# state and US territory fips
fips = pd.read_csv('~/Documents/GitHub/COVID_DataHub/FIPS/state_territory_fips.csv')
fips

Unnamed: 0,stfips,stabbr,stname
0,1,AL,Alabama
1,2,AK,Alaska
2,4,AZ,Arizona
3,5,AR,Arkansas
4,6,CA,California
5,8,CO,Colorado
6,9,CT,Connecticut
7,10,DE,Delaware
8,11,DC,District of Columbia
9,12,FL,Florida


In [13]:
final = fips.merge(df2, on = ['stfips', 'stabbr'], how = 'right')
final

Unnamed: 0,stfips,stabbr,stname,positive_20200113,positive_20200114,positive_20200115,positive_20200116,positive_20200117,positive_20200118,positive_20200119,...,grade_20210226,grade_20210227,grade_20210228,grade_20210301,grade_20210302,grade_20210303,grade_20210304,grade_20210305,grade_20210306,grade_20210307
0,1,AL,Alabama,,,,,,,,...,,,,,,,,,,
1,2,AK,Alaska,,,,,,,,...,,,,,,,,,,
2,4,AZ,Arizona,,,,,,,,...,,,,,,,,,,
3,5,AR,Arkansas,,,,,,,,...,,,,,,,,,,
4,6,CA,California,,,,,,,,...,,,,,,,,,,
5,8,CO,Colorado,,,,,,,,...,,,,,,,,,,
6,9,CT,Connecticut,,,,,,,,...,,,,,,,,,,
7,10,DE,Delaware,,,,,,,,...,,,,,,,,,,
8,11,DC,District of Columbia,,,,,,,,...,,,,,,,,,,
9,12,FL,Florida,,,,,,,,...,,,,,,,,,,


In [14]:
final.to_csv('../Pandemic/medical_covidtracking.csv', index = False)

In [15]:
final

Unnamed: 0,stfips,stabbr,stname,positive_20200113,positive_20200114,positive_20200115,positive_20200116,positive_20200117,positive_20200118,positive_20200119,...,grade_20210226,grade_20210227,grade_20210228,grade_20210301,grade_20210302,grade_20210303,grade_20210304,grade_20210305,grade_20210306,grade_20210307
0,1,AL,Alabama,,,,,,,,...,,,,,,,,,,
1,2,AK,Alaska,,,,,,,,...,,,,,,,,,,
2,4,AZ,Arizona,,,,,,,,...,,,,,,,,,,
3,5,AR,Arkansas,,,,,,,,...,,,,,,,,,,
4,6,CA,California,,,,,,,,...,,,,,,,,,,
5,8,CO,Colorado,,,,,,,,...,,,,,,,,,,
6,9,CT,Connecticut,,,,,,,,...,,,,,,,,,,
7,10,DE,Delaware,,,,,,,,...,,,,,,,,,,
8,11,DC,District of Columbia,,,,,,,,...,,,,,,,,,,
9,12,FL,Florida,,,,,,,,...,,,,,,,,,,


# Previous code. Don't run.

In [9]:
covidtracking = pd.merge(fips, pivot, on="stfips", how="left")

In [10]:
covidtracking =covidtracking.iloc[:, 5:]
medical = pd.concat([govex_medical, covidtracking], axis=1)
medical.to_csv("../medical.csv")

In [11]:
medical = pd.read_csv("../medical.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [12]:
medical.columns[71]

'dataQualityGrade_20200122'

In [2]:
# importing fips data. and rearranging the columns
fips = pd.read_excel("~/Documents/HPC_datahub/Raw data/fips.xlsx")
fips = fips.iloc[:, [1, 3, 2, 4, 5]]

# Convert the 'integers' to integers in fips file.
for i in range(fips.shape[0]):
    if fips.iloc[i, 4] != 'unknown':
        fips.iloc[i, 4] = int(fips.iloc[i, 4])
        fips.iloc[i, 2] = int(fips.iloc[i, 2])

fips = fips.drop(fips[fips.fips == 0].index)
fips = fips.drop(fips[fips.fips == "unknown"].index)
fips = fips.reset_index()
fips = fips.iloc[0:3142, 1:]
fips

Unnamed: 0,stfips,stname,ctyfips,ctyname,fips
0,1,Alabama,1,Autauga County,1001
1,1,Alabama,3,Baldwin County,1003
2,1,Alabama,5,Barbour County,1005
3,1,Alabama,7,Bibb County,1007
4,1,Alabama,9,Blount County,1009
...,...,...,...,...,...
3137,56,Wyoming,37,Sweetwater County,56037
3138,56,Wyoming,39,Teton County,56039
3139,56,Wyoming,41,Uinta County,56041
3140,56,Wyoming,43,Washakie County,56043
