In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pathlib import Path

#import api_key
#from api_keys import api_keys

In [2]:
#Createing path to CSV files
Flu_path = Path("../Resources/ILINet.csv")
Temp_path = Path("../Resources/state_temperatures.csv")


In [3]:
#Create data frame
Flu_DF = pd.read_csv(Flu_path)
Temp_DF = pd.read_csv(Temp_path)

Flu_DF.head()
Temp_DF.head()

Unnamed: 0,State,Year,Month,Week,AverageTemperature
0,Alabama,2016,1.0,53.0,44.565887
1,Alabama,2016,1.0,1.0,45.22776
2,Alabama,2016,1.0,2.0,43.47533
3,Alabama,2016,1.0,3.0,39.243889
4,Alabama,2016,1.0,4.0,48.005981


In [4]:
Flu_DF.head()

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,PERCENTAGE OF VISITS FOR INFLUENZA-LIKE-ILLNESS REPORTED BY SENTINEL PROVIDERS
REGION TYPE,REGION,YEAR,WEEK,% WEIGHTED ILI,%UNWEIGHTED ILI,AGE 0-4,AGE 25-49,AGE 25-64,AGE 5-24,AGE 50-64,AGE 65,ILITOTAL,NUM. OF PROVIDERS,TOTAL PATIENTS
HHS Regions,Region 4,2016,40,1.22375,1.36456,637,369,,801,115,84,2006,306,147007
HHS Regions,Region 4,2016,41,1.23174,1.45288,714,399,,769,153,96,2131,317,146674
HHS Regions,Region 4,2016,42,1.2768,1.51243,782,414,,898,185,93,2372,323,156834
HHS Regions,Region 4,2016,43,1.38032,1.66763,789,532,,994,198,109,2622,325,157229


In [5]:
# Remove NaN from Temp_DF
Temp_DF.head().dropna()

Unnamed: 0,State,Year,Month,Week,AverageTemperature
0,Alabama,2016,1.0,53.0,44.565887
1,Alabama,2016,1.0,1.0,45.22776
2,Alabama,2016,1.0,2.0,43.47533
3,Alabama,2016,1.0,3.0,39.243889
4,Alabama,2016,1.0,4.0,48.005981


In [6]:
Temp_DF.drop(columns=['State'],inplace=True)

Temp_DF

Unnamed: 0,Year,Month,Week,AverageTemperature
0,2016,1.0,53.0,44.565887
1,2016,1.0,1.0,45.227760
2,2016,1.0,2.0,43.475330
3,2016,1.0,3.0,39.243889
4,2016,1.0,4.0,48.005981
...,...,...,...,...
2115,2020,12.0,49.0,38.300606
2116,2020,12.0,50.0,44.592144
2117,2020,12.0,51.0,37.202797
2118,2020,12.0,52.0,38.019409


In [7]:
Flu_DF = Flu_DF.reset_index()
Flu_DF.head()

Unnamed: 0,level_0,level_1,level_2,level_3,level_4,level_5,level_6,level_7,level_8,level_9,level_10,level_11,level_12,level_13,PERCENTAGE OF VISITS FOR INFLUENZA-LIKE-ILLNESS REPORTED BY SENTINEL PROVIDERS
0,REGION TYPE,REGION,YEAR,WEEK,% WEIGHTED ILI,%UNWEIGHTED ILI,AGE 0-4,AGE 25-49,AGE 25-64,AGE 5-24,AGE 50-64,AGE 65,ILITOTAL,NUM. OF PROVIDERS,TOTAL PATIENTS
1,HHS Regions,Region 4,2016,40,1.22375,1.36456,637,369,,801,115,84,2006,306,147007
2,HHS Regions,Region 4,2016,41,1.23174,1.45288,714,399,,769,153,96,2131,317,146674
3,HHS Regions,Region 4,2016,42,1.2768,1.51243,782,414,,898,185,93,2372,323,156834
4,HHS Regions,Region 4,2016,43,1.38032,1.66763,789,532,,994,198,109,2622,325,157229


In [8]:
Flu_DF.columns

Index(['level_0', 'level_1', 'level_2', 'level_3', 'level_4', 'level_5',
       'level_6', 'level_7', 'level_8', 'level_9', 'level_10', 'level_11',
       'level_12', 'level_13',
       'PERCENTAGE OF VISITS FOR INFLUENZA-LIKE-ILLNESS REPORTED BY SENTINEL PROVIDERS'],
      dtype='object')

In [9]:
Flu_DF = Flu_DF.rename(columns={'level_0':'Region Type','level_1':'Region','level_2':'Year','level_3':'Week','level_4':'% Weighted ILI','level_5':'% Unweighted ILI',
                                  'level_6':'Ages 0-4','level_7':'Ages 25-49','level_8':'Ages 25-64','level_9':'Ages 5-24','level_10':'Ages 50-64',
                                  'level_11':'Ages 65+','level_12':'ILI Total','level_13':'# Providers'})
Flu_DF.head()

Unnamed: 0,Region Type,Region,Year,Week,% Weighted ILI,% Unweighted ILI,Ages 0-4,Ages 25-49,Ages 25-64,Ages 5-24,Ages 50-64,Ages 65+,ILI Total,# Providers,PERCENTAGE OF VISITS FOR INFLUENZA-LIKE-ILLNESS REPORTED BY SENTINEL PROVIDERS
0,REGION TYPE,REGION,YEAR,WEEK,% WEIGHTED ILI,%UNWEIGHTED ILI,AGE 0-4,AGE 25-49,AGE 25-64,AGE 5-24,AGE 50-64,AGE 65,ILITOTAL,NUM. OF PROVIDERS,TOTAL PATIENTS
1,HHS Regions,Region 4,2016,40,1.22375,1.36456,637,369,,801,115,84,2006,306,147007
2,HHS Regions,Region 4,2016,41,1.23174,1.45288,714,399,,769,153,96,2131,317,146674
3,HHS Regions,Region 4,2016,42,1.2768,1.51243,782,414,,898,185,93,2372,323,156834
4,HHS Regions,Region 4,2016,43,1.38032,1.66763,789,532,,994,198,109,2622,325,157229


In [10]:
Flu_DF = Flu_DF.drop(['Region Type','% Weighted ILI','% Unweighted ILI','Ages 25-64','# Providers',
                      'PERCENTAGE OF VISITS FOR INFLUENZA-LIKE-ILLNESS REPORTED BY SENTINEL PROVIDERS'],axis=1)

Flu_DF = Flu_DF.drop([0],axis=0)
Flu_DF.head()

Unnamed: 0,Region,Year,Week,Ages 0-4,Ages 25-49,Ages 5-24,Ages 50-64,Ages 65+,ILI Total
1,Region 4,2016,40,637,369,801,115,84,2006
2,Region 4,2016,41,714,399,769,153,96,2131
3,Region 4,2016,42,782,414,898,185,93,2372
4,Region 4,2016,43,789,532,994,198,109,2622
5,Region 4,2016,44,928,518,1176,195,122,2939


In [11]:
Flu_DF['DateTime']= pd.to_datetime(Flu_DF.Year.astype(str)+'-'+ Flu_DF.Week.astype(str)+ '-1', format= "%Y-%W-%w")

Flu_DF

Unnamed: 0,Region,Year,Week,Ages 0-4,Ages 25-49,Ages 5-24,Ages 50-64,Ages 65+,ILI Total,DateTime
1,Region 4,2016,40,637,369,801,115,84,2006,2016-10-03
2,Region 4,2016,41,714,399,769,153,96,2131,2016-10-10
3,Region 4,2016,42,782,414,898,185,93,2372,2016-10-17
4,Region 4,2016,43,789,532,994,198,109,2622,2016-10-24
5,Region 4,2016,44,928,518,1176,195,122,2939,2016-10-31
...,...,...,...,...,...,...,...,...,...,...
152,Region 4,2019,35,1013,715,1131,190,149,3198,2019-09-02
153,Region 4,2019,36,1244,790,1261,234,153,3682,2019-09-09
154,Region 4,2019,37,1084,854,1285,264,174,3661,2019-09-16
155,Region 4,2019,38,1165,861,1352,259,188,3825,2019-09-23


In [12]:
Temp_DF['Week'] = Temp_DF['Week'].astype('int')
Temp_DF['DateTime']= pd.to_datetime(Temp_DF.Year.astype(str)+'-'+ Temp_DF.Week.astype(str)+ '-1', format= "%Y-%W-%w")

Temp_DF


Unnamed: 0,Year,Month,Week,AverageTemperature,DateTime
0,2016,1.0,53,44.565887,2017-01-02
1,2016,1.0,1,45.227760,2016-01-04
2,2016,1.0,2,43.475330,2016-01-11
3,2016,1.0,3,39.243889,2016-01-18
4,2016,1.0,4,48.005981,2016-01-25
...,...,...,...,...,...
2115,2020,12.0,49,38.300606,2020-12-07
2116,2020,12.0,50,44.592144,2020-12-14
2117,2020,12.0,51,37.202797,2020-12-21
2118,2020,12.0,52,38.019409,2020-12-28


In [17]:
# Merge the two DataFrames
merged_df = pd.merge(Flu_DF, Temp_DF, on=['DateTime_x', 'DateTime_y'])

merged_df



KeyError: 'DateTime_x'

In [14]:
Temp_DF.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2120 entries, 0 to 2119
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Year                2120 non-null   int32         
 1   Month               2120 non-null   float64       
 2   Week                2120 non-null   int32         
 3   AverageTemperature  2120 non-null   float64       
 4   DateTime            2120 non-null   datetime64[ns]
dtypes: datetime64[ns](1), float64(2), int32(2)
memory usage: 66.4 KB


In [15]:
Flu_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 1 to 156
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Region      156 non-null    object        
 1   Year        156 non-null    int32         
 2   Week        156 non-null    int32         
 3   Ages 0-4    156 non-null    object        
 4   Ages 25-49  156 non-null    object        
 5   Ages 5-24   156 non-null    object        
 6   Ages 50-64  156 non-null    object        
 7   Ages 65+    156 non-null    object        
 8   ILI Total   156 non-null    object        
 9   DateTime    156 non-null    datetime64[ns]
dtypes: datetime64[ns](1), int32(2), object(7)
memory usage: 11.1+ KB
