In [13]:
import pandas as pd
import re
from datetime import date, timedelta
import copy

In [14]:
cols = ['Komuna','Lat','Long']
df = pd.DataFrame(columns=cols)
df_mun = pd.read_csv('../data/kosovo-municipalities.csv')
df_id = pd.read_csv('../data/id_map.csv', usecols = ['id', 'komuna'], encoding = "ISO-8859-1")

In [15]:
# Generate template dataframe
df['Komuna'] = df_id['komuna'].values
df['Komuna_raw'] = df_id['komuna'].values

# add names without E for
df["Komuna_raw"].replace("ë", "e", inplace=True, regex=True)
df["Komuna_raw"].replace("ç", "c", inplace=True, regex=True)

# Merge with coordinates after you have everything
df = pd.merge(df, df_mun[["X", "Y", "nam"]], how="left", left_on = "Komuna_raw", right_on = "nam")
df['Lat'] = df['Y'].values
df['Long'] = df['X'].values
df.drop(['X', 'Y', 'nam', 'Komuna_raw'], axis=1, inplace=True)

# Fixes for some text placements in the map
df.at[2, 'Lat'] = 42.403435 + 0.05
df.at[23, 'Lat'] = 42.338611 - 0.04
df.at[14, 'Lat'] = 42.534701 - 0.03


In [20]:
timeseries = ['Confirmed', 'Deaths', 'Recovered']

sdate = date(2020, 3, 13)   # start date 2020-03-13
edate = date(2020, 8, 12)   # end date 2020-08-11
delta = edate - sdate       # as timedelta

for timeseries in timeseries:
    
    df_time_series = copy.deepcopy(df)
    
    for i in range(delta.days + 1):
        day = sdate + timedelta(days=i)
        df_time_series[f'{day}'] = 0
    
    df_time_series.to_csv(f'../data/time_series/time_series_covid19_{timeseries}.csv', index=False)

## Summary time_series

In [56]:
# total
index = ['Confirmed', 'Recovered', 'Deaths', 'Tests']
df_time_series = pd.DataFrame(index=index)
    
for i in range(delta.days + 1):
    day = sdate + timedelta(days=i)
    df_time_series[f'{day}'] = 0
    
# Fill in data. Taken from wiki
Confirmed = [2, 5, 9, 15, 19, 20, 21, 24, 30, 33, 61, 63, 71, 86, 88, 91, 94, 106, 112, 125, 126, 132, 140, 145, 165, 184, 224, 227, 250, 283, 362, 377, 387, 423, 449, 480, 510, 561, 598, 604, 630, 669, 703, 731, 763, 780, 790, 799, 806, 813, 823, 851, 855, 856, 860, 861, 862, 870, 884, 895, 919, 927, 945, 955, 978, 985, 988, 989, 1003, 1004, 1025, 1032, 1032, 1038, 1047, 1048, 1052, 1064, 1070, 1083, 1110, 1123, 1142, 1147, 1158, 1194, 1234, 1263, 1269, 1298, 1326, 1384, 1437, 1486, 1615, 1756, 1833, 1916, 1998, 2073, 2169, 2216, 2268, 2363, 2432, 2494, 2590, 2677, 2799, 2878, 2991, 3064, 3178, 3356, 3508, 3703, 3886, 4100, 4307, 4512, 4715, 4931, 5118, 5118, 5237, 5369, 5472, 5617, 5735, 5877, 6045, 6286, 6467, 6680, 6917, 7137, 7413, 7653, 7846, 8104, 8330, 8554, 8799, 9049, 9274, 9492, 9688, 9869, 10059, 10247, 10419, 10590, 0]
Recovered = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 6, 10, 10, 16, 23, 23, 24, 30, 37, 38, 52, 58, 59, 63, 66, 71, 79, 84, 93, 102, 123, 128, 138 ,159,162,165,166, 201, 232, 249, 271, 298, 336, 381, 403, 490, 533, 561, 622, 653, 655, 657, 671, 671, 690, 691, 713, 736,754,769,772,772,782,785,789,791,794,801,820,829,841,843,843,847,871,874,876,884,890,912,912,913,921,921,928,953,963,968,968,973,980,1018,1047,1069, 1108,1171,1246,1307,1394,1425,1506,1577,1644, 1707, 1824, 1874, 1902, 1946, 2003, 2063, 2095, 2156, 2227, 2267, 2370,2370,2462,2545,2640,2811,2968,3069,3226,3369,3505,3614,3753,3874,4027,4129,4267,4463,4578,4730,4863,4989,5190,5346,5480,5605,5802,5944,6058,6248, 0]
Deaths = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 6, 7, 7, 7, 7, 8, 8, 9, 11, 12, 12, 12, 15, 16, 18, 18, 19, 19, 20, 21, 22, 22, 22, 22, 22, 25, 26, 26, 26, 27, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33, 34, 34, 34, 34, 35, 36, 38, 39, 40, 42, 44, 48, 49, 50, 51, 54, 55, 58, 66, 75, 79, 82, 86, 94, 97, 101, 102, 108, 108, 112, 118, 124, 130, 135, 139, 144, 150, 158, 164, 169, 177, 185, 192, 196, 212, 227, 236, 249, 256, 269, 284, 300, 303, 315, 327, 341, 354, 0]
# TODO: Fix tests array

df_time_series.iloc[0, -len(Confirmed):] = Confirmed
df_time_series.iloc[1, -len(Recovered):] = Recovered
df_time_series.iloc[2, -len(Deaths):] = Deaths
df_time_series.iloc[3, -len(cum_tests):] = cum_tests

df_time_series.to_csv('../data/time_series/time_series_covid19_cumulative.csv')

In [57]:
# daily
index = ['Confirmed', 'Recovered', 'Deaths', 'Tests']
df_time_series = pd.DataFrame(index=index)
    
for i in range(delta.days + 1):
    day = sdate + timedelta(days=i)
    df_time_series[f'{day}'] = 0

# Fill in data. Taken from Wiki
# 13 march
Confirmed = [2, 3, 4, 6, 4, 1, 1, 3, 6, 3, 28, 2, 8, 15, 2, 3, 3, 12, 6, 13, 1, 6, 8, 5, 20, 19, 40, 3, 23, 33, 79, 15, 10, 36, 26, 31, 30, 51, 37, 6, 26, 39, 34, 28, 32, 17, 10, 9, 7, 7, 10, 11, 4, 1, 4, 1, 1, 8, 14, 11, 24, 8, 18, 10, 23, 7, 3, 1, 14, 1, 21, 7, 0, 6, 9, 1, 4, 12, 6, 13, 27, 13, 19, 5, 11, 36, 40, 29, 6, 29, 28, 58, 53, 49, 129, 141, 77, 83, 82, 75, 96, 47, 52, 95, 69, 62, 96, 87, 122, 79, 113, 73, 114, 178, 152, 195, 183, 214, 207, 205, 203, 216, 187, 0, 119, 132, 103, 145, 118, 142, 168, 241, 181, 213, 237, 220, 276, 239, 194, 258, 226, 224, 245, 250, 225, 218, 196, 181, 190, 188, 172, 171,0]
Recovered = [1, 0, 0, 0, 0, 0, 5, 4, 0, 6, 7, 0, 1, 6, 7, 1, 14, 6, 1, 4, 3, 5, 8, 5, 9, 9, 21, 5, 10, 21, 3, 3, 1, 35, 31, 17, 22, 27, 38, 18, 22, 87, 43, 28, 61, 31, 2, 2, 14, 0, 19, 1, 22, 23, 18, 15, 3, 0, 10, 3, 4, 2, 3, 7, 19, 9, 12, 2, 0, 4, 24, 3, 2, 8, 6, 22, 0, 1, 8, 0, 7, 25, 10, 5, 0, 5, 7, 38, 29, 22, 39, 63, 75, 61, 87, 31, 81, 71, 67, 63, 117, 50, 28, 44, 57, 60, 32, 61, 71, 40, 103, 0, 92, 83, 95, 171, 157, 101, 157, 143, 136, 109, 139, 121, 153, 102, 138, 196, 115, 152, 133, 126, 201, 156, 134, 125, 197, 142, 114, 190,0]
Deaths = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 1, 1, 0, 0, 0, 1, 0, 1, 2, 1, 0, 0, 3, 3, 0, 1, 0, 1, 2, 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 2, 1, 1, 2, 2, 4, 1, 1, 1, 3, 1, 3, 8, 9, 4, 3, 4, 8, 3, 4, 1, 6, 0, 4, 6, 6, 6, 5, 4, 5, 6, 8, 6, 5, 8, 8, 7, 4, 16, 15, 9, 13, 7, 13, 15, 16, 3, 12, 12, 14, 13,0]
Tests = [26,99,27,24,21,35,21,77,30,130,113,42,34,114,108,46,45,82,116,113,120,105,266,93,124,259,218,117,189,130,250,154,137,174,195,209,242, 295,353,191,172,255,263,292,297, 226, 250, 225, 243, 221,231,171, 254,220,324,194,243,177,200,196,257,255,300,236,270,202,200,211,208,145,267,306,138,243,220,267,332,311,220,113,172,193,253,294,297,201,296,231,294,280,228,227,313,265,385,397,437,457,456,416,439,314,302,311,281,205,207,202,231,210,266,225,304,346,324,429,365,413,401,388,419,493,424,0,420,486,459,562,489,485,415,419,420,494,555,472,404,401,449,480,459,434,468,454,477,461,475,463,443,462,506,463,0]

df_time_series.iloc[0, -len(Confirmed):] = Confirmed
df_time_series.iloc[1, -len(Recovered):] = Recovered
df_time_series.iloc[2, -len(Deaths):] = Deaths
df_time_series.iloc[3, -len(Tests):] = Tests
df_time_series

df_time_series.to_csv('../data/time_series/time_series_covid19_daily.csv')

In [58]:
df_time_series

Unnamed: 0,2020-03-13,2020-03-14,2020-03-15,2020-03-16,2020-03-17,2020-03-18,2020-03-19,2020-03-20,2020-03-21,2020-03-22,...,2020-08-03,2020-08-04,2020-08-05,2020-08-06,2020-08-07,2020-08-08,2020-08-09,2020-08-10,2020-08-11,2020-08-12
Confirmed,2,3,4,6,4,1,1,3,6,3,...,250,225,218,196,181,190,188,172,171,0
Recovered,0,0,0,0,0,0,0,0,0,0,...,126,201,156,134,125,197,142,114,190,0
Deaths,0,0,0,0,0,0,0,0,0,1,...,7,13,15,16,3,12,12,14,13,0
Tests,26,99,27,24,21,35,21,77,30,130,...,454,477,461,475,463,443,462,506,463,0


In [55]:
from numpy import cumsum
cum_tests = list(cumsum(Tests))