In [12]:
#Remove from comments if you are running this for the first time
# !pip install meteostat -U
# !pip install pandas
# !pip install numpy

import time
import pandas as pd
import numpy as np
from meteostat import Stations, Hourly, Daily
import datetime

In [13]:
def createCovidDataFrame(cntry, savePath="/", saveType="variable"):
    start_time = time.time()
    # Print iterations progress
    def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
        """
        Call in a loop to create terminal progress bar
        @params:
            iteration   - Required  : current iteration (Int)
            total       - Required  : total iterations (Int)
            prefix      - Optional  : prefix string (Str)
            suffix      - Optional  : suffix string (Str)
            decimals    - Optional  : positive number of decimals in percent complete (Int)
            length      - Optional  : character length of bar (Int)
            fill        - Optional  : bar fill character (Str)
            printEnd    - Optional  : end character (e.g. "\r", "\r\n") (Str)
        """
        percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
        filledLength = int(length * iteration // total)
        bar = fill * filledLength + '-' * (length - filledLength)
        print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
        # Print New Line on Complete
        if iteration == total:
            print()
            
    printProgressBar(0, 100, prefix = 'Confirmed, Deaths and Recovered data', suffix = 'Complete', length = 100)       
    confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
    printProgressBar(33, 100, prefix = 'Confirmed, Deaths and Recovered data', suffix = 'Complete', length = 100)   
    deaths_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
    printProgressBar(66, 100, prefix = 'Confirmed, Deaths and Recovered data', suffix = 'Complete', length = 100)   
    recovered_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
    printProgressBar(100, 100, prefix = 'Confirmed, Deaths and Recovered data', suffix = 'Complete', length = 100)   
    
    printProgressBar(0, 100, prefix = 'Preparing data                      ', suffix = 'Complete', length = 100)   
    confirmed_df = confirmed_df.replace(np.nan, '', regex=True)
    deaths_df = deaths_df.replace(np.nan, '', regex=True)
    recovered_df = recovered_df.replace(np.nan, '', regex=True)
    printProgressBar(5, 100, prefix = 'Preparing data                      ', suffix = 'Complete', length = 100)  
    temp_cr = []
    for i in range(len(confirmed_df.index)):
        if (confirmed_df["Province/State"][i] != ''):
            temp_cr.append("{}/{}".format(confirmed_df["Country/Region"][i] , confirmed_df["Province/State"][i]))
        else:
            temp_cr.append(confirmed_df["Country/Region"][i])
    confirmed_df["Country/Region"] = temp_cr
    
    temp_cr = []
    for i in range(len(deaths_df.index)):
        if (deaths_df["Province/State"][i] != ''):
            temp_cr.append("{}/{}".format(deaths_df["Country/Region"][i] , deaths_df["Province/State"][i]))
        else:
            temp_cr.append(deaths_df["Country/Region"][i])
    deaths_df["Country/Region"] = temp_cr
    
    temp_cr = []
    for i in range(len(recovered_df.index)):
        if (recovered_df["Province/State"][i] != ''):
            temp_cr.append("{}/{}".format(recovered_df["Country/Region"][i] , recovered_df["Province/State"][i]))
        else:
            temp_cr.append(recovered_df["Country/Region"][i])
    recovered_df["Country/Region"] = temp_cr
    
    confirmed_df = confirmed_df.drop(labels = ['Province/State'], axis = 1)
    deaths_df = deaths_df.drop(labels = ['Province/State'], axis = 1)
    recovered_df = recovered_df.drop(labels = ['Province/State'], axis = 1)
    printProgressBar(10, 100, prefix = 'Preparing data                      ', suffix = 'Complete', length = 100)  
    
    coordinates_df = confirmed_df[["Country/Region", "Lat", "Long"]]
    coordinates_df = coordinates_df.T
    new_header = coordinates_df.iloc[0] #grab the first row for the header
    coordinates_df = coordinates_df[1:] #take the data less the header row
    coordinates_df.columns = new_header #set the header row as the df header
    confirmed_df = confirmed_df.drop(labels = ['Lat','Long'], axis = 1)
    deaths_df = deaths_df.drop(labels = ['Lat','Long'], axis = 1)
    recovered_df = recovered_df.drop(labels = ['Lat','Long'], axis = 1)
    confirmed_df = confirmed_df.T
    deaths_df = deaths_df.T
    recovered_df = recovered_df.T
    new_header = confirmed_df.iloc[0] #grab the first row for the header
    confirmed_df = confirmed_df[1:] #take the data less the header row
    confirmed_df.columns = new_header #set the header row as the df header
    confirmed_df.index = pd.to_datetime(confirmed_df.index)

    new_header = deaths_df.iloc[0] #grab the first row for the header
    deaths_df = deaths_df[1:] #take the data less the header row
    deaths_df.columns = new_header #set the header row as the df header
    deaths_df.index = pd.to_datetime(deaths_df.index)

    new_header = recovered_df.iloc[0] #grab the first row for the header
    recovered_df = recovered_df[1:] #take the data less the header row
    recovered_df.columns = new_header #set the header row as the df header
    recovered_df.index = pd.to_datetime(recovered_df.index)
    printProgressBar(20, 100, prefix = 'Preparing data                      ', suffix = 'Complete', length = 100)  
    
    confirmed_df["Australia"] = confirmed_df["Australia/Western Australia"] +confirmed_df["Australia/Victoria"] +confirmed_df["Australia/Tasmania"] +confirmed_df["Australia/South Australia"] +confirmed_df["Australia/Queensland"] +confirmed_df["Australia/Northern Territory"] +confirmed_df["Australia/New South Wales"] +confirmed_df["Australia/Australian Capital Territory"]
    confirmed_df = confirmed_df.drop(labels = ['Australia/Australian Capital Territory','Australia/Western Australia','Australia/Victoria','Australia/Tasmania','Australia/South Australia','Australia/Queensland','Australia/Northern Territory','Australia/New South Wales'], axis = 1)
    coordinates_df["Australia"] = coordinates_df["Australia/Western Australia"]
    coordinates_df = coordinates_df.drop(labels = ['Australia/Australian Capital Territory','Australia/Western Australia','Australia/Victoria','Australia/Tasmania','Australia/South Australia','Australia/Queensland','Australia/Northern Territory','Australia/New South Wales'], axis = 1)
    
    
    printProgressBar(30, 100, prefix = 'Preparing data                      ', suffix = 'Complete', length = 100)  
    
    confirmed_df["Canada"] = confirmed_df["Canada/Alberta"] + confirmed_df["Canada/British Columbia"] + confirmed_df["Canada/Diamond Princess"] + confirmed_df["Canada/Grand Princess"] + confirmed_df["Canada/Manitoba"] + confirmed_df["Canada/New Brunswick"] + confirmed_df["Canada/Newfoundland and Labrador"] + confirmed_df["Canada/Northwest Territories"] + confirmed_df["Canada/Nova Scotia"] + confirmed_df["Canada/Nunavut"] + confirmed_df["Canada/Ontario"] + confirmed_df["Canada/Prince Edward Island"] + confirmed_df["Canada/Quebec"] + confirmed_df["Canada/Repatriated Travellers"] + confirmed_df["Canada/Saskatchewan"] +confirmed_df["Canada/Yukon"] 

    confirmed_df = confirmed_df.drop(labels = ['Canada/Alberta','Canada/Yukon','Canada/Saskatchewan','Canada/Repatriated Travellers','Canada/Quebec','Canada/Prince Edward Island','Canada/Ontario','Canada/Nunavut','Canada/Nova Scotia','Canada/Northwest Territories','Canada/Newfoundland and Labrador','Canada/New Brunswick','Canada/Manitoba','Canada/Grand Princess','Canada/Diamond Princess','Canada/British Columbia'], axis = 1)

    
    coordinates_df["Canada"] = coordinates_df["Canada/British Columbia"]
    coordinates_df = coordinates_df.drop(labels = ['Canada/Alberta','Canada/Yukon','Canada/Saskatchewan','Canada/Repatriated Travellers','Canada/Quebec','Canada/Prince Edward Island','Canada/Ontario','Canada/Nunavut','Canada/Nova Scotia','Canada/Northwest Territories','Canada/Newfoundland and Labrador','Canada/New Brunswick','Canada/Manitoba','Canada/Grand Princess','Canada/Diamond Princess','Canada/British Columbia'], axis = 1)


    confirmed_df = confirmed_df.drop(labels = ['United Kingdom/Anguilla','United Kingdom/Saint Helena, Ascension and Tristan da Cunha','United Kingdom/Montserrat','United Kingdom/Isle of Man','United Kingdom/Gibraltar','United Kingdom/Falkland Islands (Malvinas)','United Kingdom/Channel Islands','United Kingdom/Cayman Islands','United Kingdom/British Virgin Islands','United Kingdom/Bermuda'], axis = 1)
    

    confirmed_df = confirmed_df.drop(labels = ['France/French Guiana','France/Wallis and Futuna','France/St Martin','France/Saint Pierre and Miquelon','France/Saint Barthelemy','France/Reunion','France/New Caledonia','France/Mayotte','France/Martinique','France/Guadeloupe','France/Guadeloupe','France/French Polynesia'], axis = 1)
    

    confirmed_df["Democratic Republic of Congo"] = confirmed_df["Congo (Brazzaville)"] + confirmed_df["Congo (Kinshasa)"] 

    confirmed_df = confirmed_df.drop(labels = ['Congo (Brazzaville)'], axis = 1)
    confirmed_df = confirmed_df.drop(labels = ['Congo (Kinshasa)'], axis = 1)
    
    coordinates_df["Democratic Republic of Congo"] = coordinates_df["Congo (Brazzaville)"]
    coordinates_df = coordinates_df.drop(labels = ['Congo (Brazzaville)'], axis = 1)
    coordinates_df = coordinates_df.drop(labels = ['Congo (Kinshasa)'], axis = 1)
    
    printProgressBar(40, 100, prefix = 'Preparing data                      ', suffix = 'Complete', length = 100)  
    
    confirmed_df["China"] = confirmed_df['China/Anhui'] +confirmed_df['China/Beijing'] +confirmed_df['China/Chongqing'] +confirmed_df['China/Fujian'] +confirmed_df['China/Gansu'] +confirmed_df['China/Guangdong'] +confirmed_df['China/Guangxi'] +confirmed_df['China/Guizhou'] +confirmed_df['China/Hainan'] +confirmed_df['China/Hebei'] +confirmed_df['China/Heilongjiang'] +confirmed_df['China/Henan'] +confirmed_df['China/Hong Kong'] +confirmed_df['China/Hubei'] +confirmed_df['China/Hunan'] +confirmed_df['China/Inner Mongolia'] +confirmed_df['China/Jiangsu'] +confirmed_df['China/Jiangxi'] +confirmed_df['China/Jilin'] +confirmed_df['China/Liaoning'] +confirmed_df['China/Macau'] +confirmed_df['China/Ningxia'] +confirmed_df['China/Qinghai'] +confirmed_df['China/Shaanxi'] +confirmed_df['China/Shandong'] +confirmed_df['China/Shanghai'] +confirmed_df['China/Shanxi'] +confirmed_df['China/Sichuan'] +confirmed_df['China/Tianjin'] +confirmed_df['China/Tibet'] +confirmed_df['China/Xinjiang'] +confirmed_df['China/Yunnan'] +confirmed_df['China/Zhejiang']

    confirmed_df = confirmed_df.drop(labels = ['China/Anhui','China/Zhejiang','China/Yunnan','China/Xinjiang','China/Tibet','China/Tianjin','China/Sichuan','China/Shanxi','China/Shanghai','China/Shandong','China/Shaanxi','China/Qinghai','China/Ningxia','China/Macau','China/Liaoning','China/Jilin','China/Jiangxi','China/Jiangsu','China/Inner Mongolia','China/Hunan','China/Hubei','China/Hong Kong','China/Henan','China/Heilongjiang','China/Hebei','China/Hainan','China/Guizhou','China/Guangxi','China/Guangdong','China/Gansu','China/Fujian','China/Chongqing','China/Beijing'], axis = 1)
    
    coordinates_df["China"] = coordinates_df["China/Beijing"]
    coordinates_df = coordinates_df.drop(labels = ['China/Anhui','China/Zhejiang','China/Yunnan','China/Xinjiang','China/Tibet','China/Tianjin','China/Sichuan','China/Shanxi','China/Shanghai','China/Shandong','China/Shaanxi','China/Qinghai','China/Ningxia','China/Macau','China/Liaoning','China/Jilin','China/Jiangxi','China/Jiangsu','China/Inner Mongolia','China/Hunan','China/Hubei','China/Hong Kong','China/Henan','China/Heilongjiang','China/Hebei','China/Hainan','China/Guizhou','China/Guangxi','China/Guangdong','China/Gansu','China/Fujian','China/Chongqing','China/Beijing'], axis = 1)
    
    confirmed_df = confirmed_df.drop(labels = ['Netherlands/Aruba','Netherlands/Sint Maarten','Netherlands/Curacao','Netherlands/Bonaire, Sint Eustatius and Saba'], axis = 1)
    

    confirmed_df = confirmed_df.drop(labels = ['Denmark/Faroe Islands','Denmark/Greenland'], axis = 1)
    
    printProgressBar(50, 100, prefix = 'Preparing data                      ', suffix = 'Complete', length = 100)  
    
    #deaths_df
    deaths_df["Australia"] = deaths_df["Australia/Western Australia"] +deaths_df["Australia/Victoria"] +deaths_df["Australia/Tasmania"] +deaths_df["Australia/South Australia"] +deaths_df["Australia/Queensland"] +deaths_df["Australia/Northern Territory"] +deaths_df["Australia/New South Wales"] +deaths_df["Australia/Australian Capital Territory"]

    deaths_df = deaths_df.drop(labels = ['Australia/Australian Capital Territory','Australia/Western Australia','Australia/Victoria','Australia/Tasmania','Australia/South Australia','Australia/Queensland','Australia/Northern Territory','Australia/New South Wales'], axis = 1)
    

    deaths_df["Canada"] = deaths_df["Canada/Alberta"] + deaths_df["Canada/British Columbia"] + deaths_df["Canada/Diamond Princess"] + deaths_df["Canada/Grand Princess"] + deaths_df["Canada/Manitoba"] + deaths_df["Canada/New Brunswick"] + deaths_df["Canada/Newfoundland and Labrador"] + deaths_df["Canada/Northwest Territories"] + deaths_df["Canada/Nova Scotia"] + deaths_df["Canada/Nunavut"] + deaths_df["Canada/Ontario"] + deaths_df["Canada/Prince Edward Island"] + deaths_df["Canada/Quebec"] + deaths_df["Canada/Repatriated Travellers"] + deaths_df["Canada/Saskatchewan"] +deaths_df["Canada/Yukon"] 

    deaths_df = deaths_df.drop(labels = ['Canada/Alberta','Canada/Yukon','Canada/Saskatchewan','Canada/Repatriated Travellers','Canada/Quebec','Canada/Prince Edward Island','Canada/Ontario','Canada/Nunavut','Canada/Nova Scotia','Canada/Northwest Territories','Canada/Newfoundland and Labrador','Canada/New Brunswick','Canada/Manitoba','Canada/Grand Princess','Canada/Diamond Princess','Canada/British Columbia'], axis = 1)


    deaths_df = deaths_df.drop(labels = ['United Kingdom/Anguilla','United Kingdom/Saint Helena, Ascension and Tristan da Cunha','United Kingdom/Montserrat','United Kingdom/Isle of Man','United Kingdom/Gibraltar','United Kingdom/Falkland Islands (Malvinas)','United Kingdom/Channel Islands','United Kingdom/Cayman Islands','United Kingdom/British Virgin Islands','United Kingdom/Bermuda'], axis = 1)
    

    deaths_df = deaths_df.drop(labels = ['France/French Guiana','France/Wallis and Futuna','France/St Martin','France/Saint Pierre and Miquelon','France/Saint Barthelemy','France/Reunion','France/New Caledonia','France/Mayotte','France/Martinique','France/Guadeloupe','France/Guadeloupe','France/French Polynesia'], axis = 1)
    

    deaths_df["Congo"] = deaths_df["Congo (Brazzaville)"] + deaths_df["Congo (Kinshasa)"] 

    deaths_df = deaths_df.drop(labels = ['Congo (Brazzaville)'], axis = 1)
    deaths_df = deaths_df.drop(labels = ['Congo (Kinshasa)'], axis = 1)

    printProgressBar(60, 100, prefix = 'Preparing data                      ', suffix = 'Complete', length = 100)  
    
    deaths_df["China"] = deaths_df['China/Anhui'] +deaths_df['China/Beijing'] +deaths_df['China/Chongqing'] +deaths_df['China/Fujian'] +deaths_df['China/Gansu'] +deaths_df['China/Guangdong'] +deaths_df['China/Guangxi'] +deaths_df['China/Guizhou'] +deaths_df['China/Hainan'] +deaths_df['China/Hebei'] +deaths_df['China/Heilongjiang'] +deaths_df['China/Henan'] +deaths_df['China/Hong Kong'] +deaths_df['China/Hubei'] +deaths_df['China/Hunan'] +deaths_df['China/Inner Mongolia'] +deaths_df['China/Jiangsu'] +deaths_df['China/Jiangxi'] +deaths_df['China/Jilin'] +deaths_df['China/Liaoning'] +deaths_df['China/Macau'] +deaths_df['China/Ningxia'] +deaths_df['China/Qinghai'] +deaths_df['China/Shaanxi'] +deaths_df['China/Shandong'] +deaths_df['China/Shanghai'] +deaths_df['China/Shanxi'] +deaths_df['China/Sichuan'] +deaths_df['China/Tianjin'] +deaths_df['China/Tibet'] +deaths_df['China/Xinjiang'] +deaths_df['China/Yunnan'] +deaths_df['China/Zhejiang']

    deaths_df = deaths_df.drop(labels = ['China/Anhui','China/Zhejiang','China/Yunnan','China/Xinjiang','China/Tibet','China/Tianjin','China/Sichuan','China/Shanxi','China/Shanghai','China/Shandong','China/Shaanxi','China/Qinghai','China/Ningxia','China/Macau','China/Liaoning','China/Jilin','China/Jiangxi','China/Jiangsu','China/Inner Mongolia','China/Hunan','China/Hubei','China/Hong Kong','China/Henan','China/Heilongjiang','China/Hebei','China/Hainan','China/Guizhou','China/Guangxi','China/Guangdong','China/Gansu','China/Fujian','China/Chongqing','China/Beijing'], axis = 1)
    

    deaths_df = deaths_df.drop(labels = ['Netherlands/Aruba','Netherlands/Sint Maarten','Netherlands/Curacao','Netherlands/Bonaire, Sint Eustatius and Saba'], axis = 1)
    

    deaths_df = deaths_df.drop(labels = ['Denmark/Faroe Islands'], axis = 1)
    deaths_df = deaths_df.drop(labels = ['Denmark/Greenland'], axis = 1)

    printProgressBar(70, 100, prefix = 'Preparing data                      ', suffix = 'Complete', length = 100)  
    
    #recovered_df
    recovered_df["Australia"] = recovered_df["Australia/Western Australia"] +recovered_df["Australia/Victoria"] +recovered_df["Australia/Tasmania"] +recovered_df["Australia/South Australia"] +recovered_df["Australia/Queensland"] +recovered_df["Australia/Northern Territory"] +recovered_df["Australia/New South Wales"] +recovered_df["Australia/Australian Capital Territory"]

    recovered_df = recovered_df.drop(labels = ['Australia/Australian Capital Territory','Australia/Western Australia','Australia/Victoria','Australia/Tasmania','Australia/South Australia','Australia/Queensland','Australia/Northern Territory','Australia/New South Wales'], axis = 1)
    


    recovered_df = recovered_df.drop(labels = ['United Kingdom/Anguilla','United Kingdom/Saint Helena, Ascension and Tristan da Cunha','United Kingdom/Montserrat','United Kingdom/Isle of Man','United Kingdom/Gibraltar','United Kingdom/Falkland Islands (Malvinas)','United Kingdom/Channel Islands','United Kingdom/Cayman Islands','United Kingdom/British Virgin Islands','United Kingdom/Bermuda'], axis = 1)
    
    recovered_df = recovered_df.drop(labels = ['France/French Guiana','France/Wallis and Futuna','France/St Martin','France/Saint Pierre and Miquelon','France/Saint Barthelemy','France/Reunion','France/New Caledonia','France/Mayotte','France/Martinique','France/Guadeloupe','France/Guadeloupe','France/French Polynesia'], axis = 1)
    

    recovered_df["Congo"] = recovered_df["Congo (Brazzaville)"] + recovered_df["Congo (Kinshasa)"] 

    recovered_df = recovered_df.drop(labels = ['Congo (Brazzaville)'], axis = 1)
    recovered_df = recovered_df.drop(labels = ['Congo (Kinshasa)'], axis = 1)

    printProgressBar(80, 100, prefix = 'Preparing data                      ', suffix = 'Complete', length = 100)  
    
    recovered_df["China"] = recovered_df['China/Anhui'] +recovered_df['China/Beijing'] +recovered_df['China/Chongqing'] +recovered_df['China/Fujian'] +recovered_df['China/Gansu'] +recovered_df['China/Guangdong'] +recovered_df['China/Guangxi'] +recovered_df['China/Guizhou'] +recovered_df['China/Hainan'] +recovered_df['China/Hebei'] +recovered_df['China/Heilongjiang'] +recovered_df['China/Henan'] +recovered_df['China/Hong Kong'] +recovered_df['China/Hubei'] +recovered_df['China/Hunan'] +recovered_df['China/Inner Mongolia'] +recovered_df['China/Jiangsu'] +recovered_df['China/Jiangxi'] +recovered_df['China/Jilin'] +recovered_df['China/Liaoning'] +recovered_df['China/Macau'] +recovered_df['China/Ningxia'] +recovered_df['China/Qinghai'] +recovered_df['China/Shaanxi'] +recovered_df['China/Shandong'] +recovered_df['China/Shanghai'] +recovered_df['China/Shanxi'] +recovered_df['China/Sichuan'] +recovered_df['China/Tianjin'] +recovered_df['China/Tibet'] +recovered_df['China/Xinjiang'] +recovered_df['China/Yunnan'] +recovered_df['China/Zhejiang']

    recovered_df = recovered_df.drop(labels = ['China/Anhui','China/Zhejiang','China/Yunnan','China/Xinjiang','China/Tibet','China/Tianjin','China/Sichuan','China/Shanxi','China/Shanghai','China/Shandong','China/Shaanxi','China/Qinghai','China/Ningxia','China/Macau','China/Liaoning','China/Jilin','China/Jiangxi','China/Jiangsu','China/Inner Mongolia','China/Hunan','China/Hubei','China/Hong Kong','China/Henan','China/Heilongjiang','China/Hebei','China/Hainan','China/Guizhou','China/Guangxi','China/Guangdong','China/Gansu','China/Fujian','China/Chongqing','China/Beijing'], axis = 1)
    

    recovered_df = recovered_df.drop(labels = ['Netherlands/Aruba','Netherlands/Sint Maarten','Netherlands/Curacao','Netherlands/Bonaire, Sint Eustatius and Saba'], axis = 1)
    

    recovered_df = recovered_df.drop(labels = ['Denmark/Faroe Islands'], axis = 1)
    recovered_df = recovered_df.drop(labels = ['Denmark/Greenland'], axis = 1)
    
    printProgressBar(83, 100, prefix = 'Preparing data                      ', suffix = 'Complete', length = 100)  
    
    confirmed_df = confirmed_df.drop(labels = ['Afghanistan','Algeria','Angola','Antigua and Barbuda','Azerbaijan','Bahamas','Barbados','Belize','Benin','Botswana','Brunei','Burkina Faso','Burma','Burundi','Cabo Verde','Cambodia','Cameroon','Central African Republic','Chad','Comoros','Diamond Princess','Djibouti','Egypt','Equatorial Guinea','Eritrea','Eswatini','Gabon','Georgia','Grenada','Guinea','Guinea-Bissau','Guyana','Haiti','Holy See','Honduras','Korea, South','Kosovo','Kyrgyzstan','Laos','Lebanon','Lesotho','Liberia','Liechtenstein','MS Zaandam','Mali','Marshall Islands','Mauritius','Micronesia','Moldova','Monaco','Montenegro','Nicaragua','Papua New Guinea','Saint Kitts and Nevis','Saint Lucia','Saint Vincent and the Grenadines','Samoa','San Marino','Sao Tome and Principe','Seychelles','Sierra Leone','Solomon Islands','Somalia','Suriname','Syria','Taiwan*','Tajikistan','Tanzania','Timor-Leste','United Kingdom/Turks and Caicos Islands','Uzbekistan','Vanuatu','Venezuela','West Bank and Gaza','Yemen'], axis = 1)
    
    #deaths_df
    deaths_df = deaths_df.drop(labels = ['Afghanistan','Algeria','Angola','Antigua and Barbuda','Azerbaijan','Bahamas','Barbados','Belize','Benin','Botswana','Brunei','Burkina Faso','Burma','Burundi','Cabo Verde','Cambodia','Cameroon','Central African Republic','Chad','Comoros','Diamond Princess','Djibouti','Egypt','Equatorial Guinea','Eritrea','Eswatini','Gabon','Georgia','Grenada','Guinea','Guinea-Bissau','Guyana','Haiti','Holy See','Honduras','Korea, South','Kosovo','Kyrgyzstan','Laos','Lebanon','Lesotho','Liberia','Liechtenstein','MS Zaandam','Mali','Marshall Islands','Mauritius','Micronesia','Moldova','Monaco','Montenegro','Nicaragua','Papua New Guinea','Saint Kitts and Nevis','Saint Lucia','Saint Vincent and the Grenadines','Samoa','San Marino','Sao Tome and Principe','Seychelles','Sierra Leone','Solomon Islands','Somalia','Suriname','Syria','Taiwan*','Tajikistan','Tanzania','Timor-Leste','United Kingdom/Turks and Caicos Islands','Uzbekistan','Vanuatu','Venezuela','West Bank and Gaza','Yemen'], axis = 1)

    printProgressBar(90, 100, prefix = 'Preparing data                      ', suffix = 'Complete', length = 100)  
    
    #recovered_df
    recovered_df = recovered_df.drop(labels = ['Afghanistan','Algeria','Angola','Antigua and Barbuda','Azerbaijan','Bahamas','Barbados','Belize','Benin','Botswana','Brunei','Burkina Faso','Burma','Burundi','Cabo Verde','Cambodia','Cameroon','Central African Republic','Chad','Comoros','Diamond Princess','Djibouti','Egypt','Equatorial Guinea','Eritrea','Eswatini','Gabon','Georgia','Grenada','Guinea','Guinea-Bissau','Guyana','Haiti','Holy See','Honduras','Korea, South','Kosovo','Kyrgyzstan','Laos','Lebanon','Lesotho','Liberia','Liechtenstein','MS Zaandam','Mali','Marshall Islands','Mauritius','Micronesia','Moldova','Monaco','Montenegro','Nicaragua','Papua New Guinea','Saint Kitts and Nevis','Saint Lucia','Saint Vincent and the Grenadines','Samoa','San Marino','Sao Tome and Principe','Seychelles','Sierra Leone','Solomon Islands','Somalia','Suriname','Syria','Taiwan*','Tajikistan','Tanzania','Timor-Leste','United Kingdom/Turks and Caicos Islands','Uzbekistan','Vanuatu','Venezuela','West Bank and Gaza','Yemen'], axis = 1)
    
    confirmed_df["United States"] = confirmed_df["US"]
    confirmed_df = confirmed_df.drop(labels = ['US'], axis = 1)
    
    recovered_df["United States"] = recovered_df["US"]
    recovered_df = recovered_df.drop(labels = ['US'], axis = 1)
    
    deaths_df["United States"] = deaths_df["US"]
    deaths_df = deaths_df.drop(labels = ['US'], axis = 1)
    
    coordinates_df["United States"] = coordinates_df["US"]
    coordinates_df = coordinates_df.drop(labels = ['US'], axis = 1)
    printProgressBar(100, 100, prefix = 'Preparing data                      ', suffix = 'Complete', length = 100)  
    
    
    confirmed_df = confirmed_df.drop(labels = ['Armenia', 'Czechia', 'Maldives', 'North Macedonia', 'Slovakia'], axis = 1)
    deaths_df = deaths_df.drop(labels = ['Armenia', 'Czechia', 'Maldives', 'North Macedonia', 'Slovakia'], axis = 1)
    recovered_df = recovered_df.drop(labels = ['Armenia', 'Czechia', 'Maldives', 'North Macedonia', 'Slovakia'], axis = 1)
    if cntry not in confirmed_df.columns:
        print("Country: {} not available. Please provide one of the ollowing countries:".format(cntry))
        print(confirmed_df.columns.tolist())
        return confirmed_df.columns.tolist()
    
    printProgressBar(0, 100, prefix = 'Calculating Daily data              ', suffix = 'Complete', length = 100)  
    
    confirmed_daily_df = pd.DataFrame()
    deaths_daily_df = pd.DataFrame()
    recovered_daily_df = pd.DataFrame()
    confirmed_daily = []
    deaths_daily = []
    recovered_daily = []
    
    printProgressBar(25, 100, prefix = 'Calculating Daily data              ', suffix = 'Complete', length = 100) 
    
    for j in confirmed_df.keys():
        for i in range(confirmed_df.shape[0]):
            if (i > 0):
                confirmed_daily.append(confirmed_df[j][i] - confirmed_df[j][i-1])
            else:
                confirmed_daily.append(confirmed_df[j][i])
        confirmed_daily_df[j] = confirmed_daily
        confirmed_daily = []
    confirmed_daily_df.index = confirmed_df.index
    
    printProgressBar(50, 100, prefix = 'Calculating Daily data              ', suffix = 'Complete', length = 100) 

    for j in deaths_df.keys():
        for i in range(deaths_df.shape[0]):
            if (i > 0):
                deaths_daily.append(deaths_df[j][i] - deaths_df[j][i-1])
            else:
                deaths_daily.append(deaths_df[j][i])
        deaths_daily_df[j] = deaths_daily
        deaths_daily = []
    deaths_daily_df.index = deaths_df.index
    
    printProgressBar(75, 100, prefix = 'Calculating Daily data              ', suffix = 'Complete', length = 100) 

    for j in recovered_df.keys():
        for i in range(recovered_df.shape[0]):
            if (i > 0):
                value = recovered_df[j][i] - recovered_df[j][i-1]
                if (value < 0):
                    value = 0
                recovered_daily.append(value)
            else:
                recovered_daily.append(recovered_df[j][i])
        recovered_daily_df[j] = recovered_daily
        recovered_daily = []
    recovered_daily_df.index = recovered_df.index  
    
    printProgressBar(100, 100, prefix = 'Calculating Daily data              ', suffix = 'Complete', length = 100) 
  
    
    printProgressBar(0, 100, prefix = 'Gathering Weather data              ', suffix = 'Complete', length = 100) 
    Stations._types = {
        'id': 'object',
        'name': 'object',
        'country': 'object',
        'region': 'object',
        'wmo': 'object',
        'icao': 'object',
        'latitude': 'float64',
        'longitude': 'float64',
        'elevation': 'float64',
        'timezone': 'object'
    }

    stations = Stations()
    stations = stations.nearby(coordinates_df[cntry]["Lat"], coordinates_df[cntry]["Long"])
    station = stations.fetch(6)

    printProgressBar(5, 100, prefix = 'Gathering Weather data              ', suffix = 'Complete', length = 100) 
#     data = Hourly(station, start = confirmed_df.index[0].to_pydatetime() - datetime.timedelta(days=14), end = confirmed_df.index[confirmed_df.index.shape[0]-1].to_pydatetime() + datetime.timedelta(days=1))
    
#     data = data.fetch()

    for key in range(0,6):
        data = Hourly(station[station.index == station.index[key]], start = confirmed_df.index[0].to_pydatetime() - datetime.timedelta(days=14), end = confirmed_df.index[confirmed_df.index.shape[0]-1].to_pydatetime() + datetime.timedelta(days=1))
        data = data.fetch()
        if(data.shape[0] > 0):
            break
        else:
            print('Weather data not available. Searching another weather station...')

    data = data.drop(labels = ['dwpt','prcp','snow','wdir','wpgt','pres','tsun','coco'], axis = 1)
    size = data.index.shape[0]-1
    size = size/24
    printProgressBar(10, 100, prefix = 'Gathering Weather data              ', suffix = 'Complete', length = 100) 
    date_rng = pd.date_range(start=confirmed_df.index[0].to_pydatetime() - datetime.timedelta(days=14), end=confirmed_df.index[0].to_pydatetime() + datetime.timedelta(days=size-14), freq='D')
    avg_temp_daily_df = pd.DataFrame()
    temp = []
    size = int(size)
    printProgressBar(15, 100, prefix = 'Gathering Weather data              ', suffix = 'Complete', length = 100) 
    for i in range(size):
        temp.append(data.iloc[int(i*24):int(i*24)+24,0:3]['temp'].sum()/24)
    avg_temp_daily_df["Avg Temp"] = temp
    avg_temp_daily_df.index = date_rng[0:size]
    printProgressBar(20, 100, prefix = 'Gathering Weather data              ', suffix = 'Complete', length = 100) 
    temp = []
    for i in range(size):
        temp.append(data.iloc[int(i*24):int(i*24)+24,0:2]['rhum'].sum()/24)
    avg_temp_daily_df["Avg Humidity"] = temp
    printProgressBar(25, 100, prefix = 'Gathering Weather data              ', suffix = 'Complete', length = 100) 
    temp = []
    for i in range(size):
        temp.append(data.iloc[int(i*24):int(i*24)+24,2:3]['wspd'].sum()/24)
    avg_temp_daily_df["Avg Wind Speed"] = temp
    printProgressBar(30, 100, prefix = 'Gathering Weather data              ', suffix = 'Complete', length = 100) 
    end_rslt_df = pd.DataFrame()
    
    temp = []
    for i in range(avg_temp_daily_df.index[:-14].shape[0]):
        temp.append(avg_temp_daily_df.iloc[0+i:7+i,0:1]['Avg Temp'].sum()/7)
    end_rslt_df['14_Day_Week_Average_Temp'] = temp
    printProgressBar(60, 100, prefix = 'Gathering Weather data              ', suffix = 'Complete', length = 100) 
    temp = []
    for i in range(avg_temp_daily_df.index[:-14].shape[0]):
        temp.append(avg_temp_daily_df.iloc[0+i:7+i,0:4]['Avg Wind Speed'].sum()/7)
    end_rslt_df['14_Day_Week_Average_Wind'] = temp
    printProgressBar(80, 100, prefix = 'Gathering Weather data              ', suffix = 'Complete', length = 100) 
    temp = []
    for i in range(avg_temp_daily_df.index[:-14].shape[0]):
        temp.append(avg_temp_daily_df.iloc[0+i:7+i,0:4]['Avg Humidity'].sum()/7)
    printProgressBar(90, 100, prefix = 'Gathering Weather data              ', suffix = 'Complete', length = 100) 
    end_rslt_df['14_Day_Week_Average_Humidity'] = temp

    end_rslt_df.index = pd.date_range(start=data.index[13].to_pydatetime().date(), end=data.index[13].to_pydatetime().date() + datetime.timedelta(days=size-15), freq='D')
    end_rslt_df.index = pd.to_datetime(end_rslt_df.index)
    printProgressBar(100, 100, prefix = 'Gathering Weather data              ', suffix = 'Complete', length = 100) 

    temp_df = pd.DataFrame()
    
    temp_df['Cumulative Confirmed'] = confirmed_df[cntry]
    end_rslt_df = end_rslt_df.join(temp_df['Cumulative Confirmed'], how='outer')
    
    temp_df = pd.DataFrame()
    temp_df['Cumulative Deaths'] = deaths_df[cntry]
    end_rslt_df = end_rslt_df.join(temp_df['Cumulative Deaths'], how='outer')
    
    temp_df = pd.DataFrame()
    temp_df['Cumulative Recovered'] = recovered_df[cntry]
    end_rslt_df = end_rslt_df.join(temp_df['Cumulative Recovered'], how='outer')
    
    temp_df = pd.DataFrame()
    temp_df['Daily Confirmed'] = confirmed_daily_df[cntry]
    end_rslt_df = end_rslt_df.join(temp_df['Daily Confirmed'], how='outer')
    
    temp_df = pd.DataFrame()
    temp_df['Daily Deaths'] = deaths_daily_df[cntry]
    end_rslt_df = end_rslt_df.join(temp_df['Daily Deaths'], how='outer')
    
    temp_df = pd.DataFrame()
    temp_df['Daily Recovered'] = recovered_daily_df[cntry]
    end_rslt_df = end_rslt_df.join(temp_df['Daily Recovered'], how='outer')
    
    
    avg_temp_daily_df.index = pd.to_datetime(avg_temp_daily_df.index)
    end_rslt_df = end_rslt_df.join(avg_temp_daily_df, how='outer')

    printProgressBar(0, 100, prefix = 'Tests and Vaccination data          ', suffix = 'Complete', length = 100)

    testing_full_df = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/testing/covid-testing-all-observations.csv')

    testing_full_df = testing_full_df[testing_full_df.Entity != 'Canada - people tested']
    testing_full_df = testing_full_df[testing_full_df.Entity != 'Argentina - people tested']
    testing_full_df = testing_full_df[testing_full_df.Entity != 'Italy - people tested']
    testing_full_df = testing_full_df[testing_full_df.Entity != 'Poland - people tested']

    printProgressBar(15, 100, prefix = 'Tests and Vaccination data          ', suffix = 'Complete', length = 100)

    testing_full_df = testing_full_df[testing_full_df["Entity"].isin(['{} - tests performed'.format(cntry), '{} - samples tested'.format(cntry),'{} - people tested'.format(cntry),'{} - units unclear'.format(cntry)])].fillna(0)
    testing_full_df.index = testing_full_df.Date
    testing_full_df = testing_full_df.drop(labels = ['Date','Source URL','Source label','Entity','ISO code','Notes'], axis = 1)
    testing_full_df.index = pd.to_datetime(testing_full_df.index)

    printProgressBar(30, 100, prefix = 'Tests and Vaccination data          ', suffix = 'Complete', length = 100)

    testing_full_df['positive_percent'] = (end_rslt_df['Daily Confirmed'] / testing_full_df['Daily change in cumulative total'])*100
    testing_full_df['positive_percent'] = testing_full_df['positive_percent'].fillna(0)
    testing_full_df['positive_percent'] = testing_full_df['positive_percent'].replace(np.inf, np.nan).interpolate()

    printProgressBar(40, 100, prefix = 'Tests and Vaccination data          ', suffix = 'Complete', length = 100)

    testing_full_df = testing_full_df.rename(columns={"Daily change in cumulative total": "Daily_Tests", "Cumulative total": "Cumulative_Tests_Total", "Cumulative total per thousand":"Cumulative_Total_Tests_Per_Thousand","Daily change in cumulative total per thousand":"Daily_Change_Cumulative_Total_Tests_Per_Thousand","7-day smoothed daily change":"7_Day_Smoothed_Test_Daily_Change","7-day smoothed daily change per thousand":"7_Day_Smoothed_Test_Daily_Change_Per_Thousand","Short-term positive rate":"Test_Short_Term_Positive_Rate","Short-term tests per case":"Short_Term_Tests_Per_Case"})

    resultsS = testing_full_df.join(end_rslt_df, how='outer').interpolate()
    resultsS["Daily_Test_Positivity_ewm_03"] = resultsS["positive_percent"].ewm(alpha=0.3).mean()
    resultsS["Daily_Test_Positivity_ewm_05"] = resultsS["positive_percent"].ewm(alpha=0.5).mean()
    resultsS["Daily_Test_Positivity_ewm_07"] = resultsS["positive_percent"].ewm(alpha=0.7).mean()

    printProgressBar(60, 100, prefix = 'Tests and Vaccination data          ', suffix = 'Complete', length = 100)

    vaccinations_full_df = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv')
    vaccinations_full_df = vaccinations_full_df[vaccinations_full_df["location"] == cntry].fillna(0)
    vaccinations_full_df.index = vaccinations_full_df.date
    vaccinations_full_df = vaccinations_full_df.drop(labels = ['date','iso_code','location','total_boosters', 'total_boosters_per_hundred'], axis = 1)
    vaccinations_full_df.index = pd.to_datetime(vaccinations_full_df.index)

    printProgressBar(90, 100, prefix = 'Tests and Vaccination data          ', suffix = 'Complete', length = 100)

    vaccinations_full_df = vaccinations_full_df.join(resultsS, how='outer').interpolate()
    
    
    
    info_full_df = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv')
    # temp = pd.unique(testing_full_df["location"])
    info_full_df = info_full_df[info_full_df["location"] == cntry].fillna(0)
    info_full_df.index = info_full_df.date
    info_full_df.index = pd.to_datetime(info_full_df.index)
    info_full_df = info_full_df.drop(labels = ['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases', 'new_cases_smoothed', 'total_deaths', 'new_deaths', 'new_deaths_smoothed', 'total_cases_per_million', 'new_cases_per_million', 'new_cases_smoothed_per_million', 'total_deaths_per_million', 'new_deaths_per_million', 'new_deaths_smoothed_per_million','new_tests', 'total_tests', 'total_tests_per_thousand', 'new_tests_per_thousand', 'new_tests_smoothed', 'new_tests_smoothed_per_thousand', 'positive_rate', 'tests_per_case', 'tests_units', 'total_vaccinations', 'people_vaccinated', 'people_fully_vaccinated', 'new_vaccinations', 'new_vaccinations_smoothed', 'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred', 'people_fully_vaccinated_per_hundred', 'new_vaccinations_smoothed_per_million'], axis = 1)
#     info_full_df
    vaccinations_full_df = vaccinations_full_df.join(info_full_df, how='outer').interpolate()
    printProgressBar(100, 100, prefix = 'Tests and Vaccination data          ', suffix = 'Complete', length = 100)
    printProgressBar(0, 100, prefix = 'Gonvernment Strategy data           ', suffix = 'Complete', length = 100)
    
    vector = ["https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c1_school_closing.csv",
          "https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c2_workplace_closing.csv"
    , 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c3_cancel_public_events.csv'
    , 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c4_restrictions_on_gatherings.csv'
    , 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c5_close_public_transport.csv'
    , 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c6_stay_at_home_requirements.csv'
    , 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c7_movementrestrictions.csv'
    , 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/c8_internationaltravel.csv'
    , 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/containment_health_index.csv'
    , 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/e1_income_support.csv'
    , 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/government_response_index.csv'
    , 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/h2_testing_policy.csv'
    , 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/h6_facial_coverings.csv'
    , 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/timeseries/stringency_index.csv']

    vector_names = ["school_closing",
              "workplace_closing"
    , 'cancel_public_events'
    , 'restrictions_on_gatherings'
    , 'close_public_transport'
    , 'stay_at_home_requirements'
    , 'movementrestrictions'
    , 'internationaltravel'
    , 'containment_health_index'
    , 'income_support'
    , 'government_response_index'
    , 'testing_policy'
    , 'facial_coverings'
    , 'stringency_index_2']

    
    gov_index = pd.DataFrame()
    for i in range(len(vector)):
        new = pd.read_csv(vector[i])
        new = new[new["country_name"] == cntry]
        new  = new.drop(labels = ['country_name','country_code','Unnamed: 0'], axis = 1)
        new = new.T
        new.index = pd.to_datetime(new.index)
        new.columns = [vector_names[i]]
        gov_index = gov_index.join(new, how='outer').interpolate()
        printProgressBar(i*5, 100, prefix = 'Gonvernment Strategy data           ', suffix = 'Complete', length = 100)
    vaccinations_full_df = vaccinations_full_df.join(gov_index, how='outer').interpolate()
    
    printProgressBar(100, 100, prefix = 'Gonvernment Strategy data           ', suffix = 'Complete', length = 100)
    
    def MonthDayFormat(m):
        if m <= 9:
            return '0{}'.format(m)
        else:
            return m

    x = datetime.datetime.now()

    date_rng = pd.date_range(start='2020-03-22', end='{}-{}-{}'.format(x.year,MonthDayFormat(x.month),MonthDayFormat(x.day)), freq='D')
    links = []
    for i in range(date_rng.shape[0]):
        links.append('https://raw.githubusercontent.com/thepanacealab/covid19_twitter/master/dailies/{}-{}-{}/{}-{}-{}_top1000terms.csv'.format(date_rng[i].year,MonthDayFormat(date_rng[i].month),MonthDayFormat(date_rng[i].day),date_rng[i].year,MonthDayFormat(date_rng[i].month),MonthDayFormat(date_rng[i].day)))



    l = date_rng.shape[0]
    printProgressBar(0, l, prefix = 'Twitter data                        ', suffix = 'Complete', length = 100)    
    terms_usage_df = pd.DataFrame()
    temp1 = []
    temp2 = []
    temp3 = []
    temp4 = []
    temp5 = []
    temp6 = []
    temp7 = []
    temp8 = []
    temp9 = []
    temp10 = []
    for i in range(date_rng.shape[0]):
        try:
            if links[i] == "https://raw.githubusercontent.com/thepanacealab/covid19_twitter/master/dailies/2021-12-20/2021-12-20_top1000terms.csv":
                temp1.append(np.nan)
                temp2.append(np.nan)
                temp3.append(np.nan)
                temp4.append(np.nan)
                temp5.append(np.nan)
                temp6.append(np.nan)
                temp7.append(np.nan)
                temp8.append(np.nan)
                temp9.append(np.nan)
                temp10.append(np.nan)
                continue
            temp_df = pd.read_csv(links[i],header=None)
            printProgressBar(i + 1, l, prefix = 'Twitter data                        ', suffix = 'Complete', length = 100)
        except:
            printProgressBar(date_rng.shape[0], l, prefix = 'Twitter data                        ', suffix = 'Complete', length = 100)
            date_rng = date_rng[:i]
            break
        temp_df.index = temp_df[0]
        temp_df.drop([0], axis = 1, inplace = True)
        temp_df = temp_df[~temp_df.index.duplicated(keep='first')]#drop duplicat-indexed rows
        try:
            temp1.append(temp_df[1]['coronavirus'])
        except:
            temp1.append(0)
        try:
            temp2.append(temp_df[1]['covid'])
        except:
            temp2.append(0)
        try:
            temp3.append(temp_df[1]['covid19'])
        except:
            temp3.append(0)
        try:
            temp4.append(temp_df[1]['lockdown'])
        except:
            temp4.append(0)
        try:
            temp5.append(temp_df[1]['coronavirus'])
        except:
            temp5.append(0)
        try:
            temp6.append(temp_df[1]['cases'])
        except:
            temp6.append(0)
        try:
            temp7.append(temp_df[1]['mask'])
        except:
            temp7.append(0)
        try:
            temp8.append(temp_df[1]['deaths'])
        except:
            temp8.append(0)
        try:
            temp9.append(temp_df[1]['quarantine'])
        except:
            temp9.append(0)
        try:
            temp10.append(temp_df[1]['virus'])
        except:
            temp10.append(0)
    terms_usage_df['coronavirus'] = temp1
    terms_usage_df['covid'] = temp2
    terms_usage_df['covid19'] = temp3
    terms_usage_df['lockdown'] = temp4
    terms_usage_df['cases'] = temp5
    terms_usage_df['pandemic'] = temp6
    terms_usage_df['mask'] = temp7
    terms_usage_df['deaths'] = temp8
    terms_usage_df['quarantine'] = temp9
    terms_usage_df['virus'] = temp10
    terms_usage_df.index = date_rng
    terms_usage_df.index = pd.to_datetime(terms_usage_df.index)

    printProgressBar(0, 100, prefix = 'Twitter data manipulation           ', suffix = 'Complete', length = 100)

    printProgressBar(40, 100, prefix = 'Twitter data manipulation           ', suffix = 'Complete', length = 100)

    temp = []
    for i in range(14):
        temp.append(np.nan)
    for i in range(terms_usage_df.index.shape[0]-14):
        temp.append(terms_usage_df.iloc[i:7+i,0:9]['coronavirus'].sum()/7)
    terms_usage_df['14_Day_Week_Average_coronavirus'] = temp
    printProgressBar(50, 100, prefix = 'Twitter data manipulation           ', suffix = 'Complete', length = 100)

    temp = []
    for i in range(14):
        temp.append(np.nan)
    for i in range(terms_usage_df.index.shape[0]-14):
        temp.append(terms_usage_df.iloc[i:7+i,0:9]['lockdown'].sum()/7)
    terms_usage_df['14_Day_Week_Average_lockdown'] = temp
    printProgressBar(60, 100, prefix = 'Twitter data manipulation           ', suffix = 'Complete', length = 100)

    temp = []
    for i in range(14):
        temp.append(np.nan)
    for i in range(terms_usage_df.index.shape[0]-14):
        temp.append(terms_usage_df.iloc[i:7+i,0:9]['cases'].sum()/7)
    terms_usage_df['14_Day_Week_Average_cases'] = temp
    printProgressBar(70, 100, prefix = 'Twitter data manipulation           ', suffix = 'Complete', length = 100)

    temp = []
    for i in range(14):
        temp.append(np.nan)
    for i in range(terms_usage_df.index.shape[0]-14):
        temp.append(terms_usage_df.iloc[i:7+i,0:9]['mask'].sum()/7)
    terms_usage_df['14_Day_Week_Average_mask'] = temp
    printProgressBar(80, 100, prefix = 'Twitter data manipulation           ', suffix = 'Complete', length = 100)

    temp = []
    for i in range(14):
        temp.append(np.nan)
    for i in range(terms_usage_df.index.shape[0]-14):
        temp.append(terms_usage_df.iloc[i:7+i,0:9]['pandemic'].sum()/7)
    terms_usage_df['14_Day_Week_Average_pandemic'] = temp
    printProgressBar(90, 100, prefix = 'Twitter data manipulation           ', suffix = 'Complete', length = 100)

    temp = []
    for i in range(14):
        temp.append(np.nan)
    for i in range(terms_usage_df.index.shape[0]-14):
        temp.append(terms_usage_df.iloc[i:7+i,0:9]['deaths'].sum()/7)
    terms_usage_df['14_Day_Week_Average_deaths'] = temp
    printProgressBar(100, 100, prefix = 'Twitter data manipulation           ', suffix = 'Complete', length = 100)
    # terms_usage_df

    printProgressBar(0, 100, prefix = 'Merging Datasets                    ', suffix = 'Complete', length = 100)

    full_dataset = vaccinations_full_df.join(terms_usage_df, how='outer').interpolate()
    printProgressBar(100, 100, prefix = 'Merging Datasets                    ', suffix = 'Complete', length = 100)
    
    if(saveType != 'variable'):
        if(saveType == 'csv'):
            if(savePath == "/"):
                print('No path provided! Saving to working directory with name {}'.format(f'{cntry}.csv'))
                full_dataset.to_csv(f'{cntry}.csv', index = True, header=True)
            else:
                full_dataset.to_csv(savePath, index = True, header=True)
        elif(saveType == 'excel'):
            if(savePath == "/"):
                print('No path provided! Saving to working directory with name {}'.format(f'{cntry}.csv'))
                full_dataset.to_excel(f'{cntry}.csv')
            else:
                full_dataset.to_excel(savePath)
    print("---Execution finished in %s seconds ---" % (time.time() - start_time))
    return full_dataset

In [14]:
UK_df = createCovidDataFrame("United Kingdom")

Confirmed, Deaths and Recovered data |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0% Complete
Preparing data                       |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0% Complete
Calculating Daily data               |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0% Complete
Gathering Weather data               |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0% Complete
Tests and Vaccination data           |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0% Complete
Gonvernment Strategy data            |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0% Complete
Twitter data                         |██████████████████████████