# Analyzing COVID-19 Growth Across Countries

This study uses a modified [Ten Hundred Plot](https://www.youtube.com/watch?v=NP3ZdQwrL_Q) for visualizing growth of COVID-19 deaths

* Original plot developed by [Dr. Jerry Zhu](http://pages.cs.wisc.edu/~jerryzhu/) at the University of Wisconsin - Madison

* Data collected from [COVID-19 data repository](https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series) from the Center for Systems Science and Engineering at Johns Hopkins University
    * This is a time series data set that stores the number of COVID deaths each day for each country from January 22, 2020 to present
    * Extract at the time of this study had data up to July 24, 2020

In [27]:
from math import sqrt
import numpy as np
import copy
from clustering import *

In [28]:
d_dict = {}
with open('time_series_covid19_deaths_global.csv') as f:

    data = list(f)[1:]

    for d in data:
        l = d.strip('\n').split(',')
        c = l[1] # country

        add_row(c, d_dict, l, 4)
f.close()

In [29]:
with open('time_series_covid19_deaths_US.csv') as f:
    data = list(f)[1:]
    for d in data:
        l = d.strip('\n').split(',')
        c = l[7]
        add_row(c, d_dict, l, 11)
f.close()

In [30]:
for c in d_dict:
    d_dict[c] = [int(round(item)) for item in d_dict[c]]

In [32]:
# Raw data to plot
print(d_dict['US'])

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 12, 14, 22, 24, 28, 34, 42, 44, 56, 66, 86, 104, 116, 140, 194, 262, 376, 530, 724, 912, 1202, 1568, 2042, 2666, 3492, 4600, 5868, 7122, 8762, 11210, 13692, 16864, 19494, 22062, 24940, 28276, 32894, 37126, 41276, 45462, 49554, 53096, 56752, 61560, 66658, 70884, 76112, 80884, 83342, 86932, 91846, 96624, 101498, 105734, 109034, 111620, 114470, 118908, 123920, 128512, 132250, 135362, 137554, 140172, 144786, 149702, 153540, 156994, 159952, 161710, 163736, 166966, 170468, 174016, 177338, 179784, 181288, 182828, 185930, 189066, 191512, 194050, 196292, 197508, 198534, 199904, 202922, 205286, 207618, 209556, 210728, 212272, 214338, 216318, 218336, 220276, 221636, 222538, 223548, 225428, 227262, 229024, 230668, 232168, 232764, 233546, 235224, 236726, 238134, 239478, 240698, 241208, 242020, 243694, 245208, 250052, 251262, 252240, 252722, 253422, 254864, 256210, 257606, 258884,

In [31]:
# Raw data to plot
print(d_dict['Canada'])

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 4, 5, 8, 9, 12, 19, 21, 25, 26, 30, 38, 54, 61, 64, 80, 101, 109, 139, 179, 218, 259, 339, 375, 407, 503, 557, 654, 714, 781, 901, 1008, 1259, 1356, 1401, 1565, 1727, 1910, 2078, 2241, 2402, 2571, 2687, 2841, 2983, 3155, 3310, 3537, 3684, 3795, 4003, 4190, 4366, 4541, 4697, 4823, 4991, 5115, 5300, 5425, 5592, 5679, 5800, 5903, 5960, 6028, 6150, 6267, 6360, 6466, 6534, 6655, 6753, 6876, 6982, 7063, 7159, 7374, 7404, 7478, 7579, 7717, 7778, 7850, 7877, 7910, 7970, 8038, 8071, 8125, 8183, 8218, 8228, 8271, 8312, 8361, 8408, 8466, 8482, 8494, 8512, 8544, 8567, 8571, 8576, 8582, 8628, 8650, 8678, 8700, 8722, 8732, 8739, 8748, 8765, 8786, 8797, 8811, 8818, 8829, 8836, 8845, 8857, 8875, 8884, 8892, 8896, 8902, 8908, 8913, 8919, 8923]


In [33]:
# Mortality increase
us_diff = [x - y for x, y in zip(d_dict['US'][1:], d_dict['US'])]
print(us_diff)
canada_diff = [x - y for x, y in zip(d_dict['Canada'][1:], d_dict['Canada'])]
print(us_diff)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 10, 2, 8, 2, 4, 6, 8, 2, 12, 10, 20, 18, 12, 24, 54, 68, 114, 154, 194, 188, 290, 366, 474, 624, 826, 1108, 1268, 1254, 1640, 2448, 2482, 3172, 2630, 2568, 2878, 3336, 4618, 4232, 4150, 4186, 4092, 3542, 3656, 4808, 5098, 4226, 5228, 4772, 2458, 3590, 4914, 4778, 4874, 4236, 3300, 2586, 2850, 4438, 5012, 4592, 3738, 3112, 2192, 2618, 4614, 4916, 3838, 3454, 2958, 1758, 2026, 3230, 3502, 3548, 3322, 2446, 1504, 1540, 3102, 3136, 2446, 2538, 2242, 1216, 1026, 1370, 3018, 2364, 2332, 1938, 1172, 1544, 2066, 1980, 2018, 1940, 1360, 902, 1010, 1880, 1834, 1762, 1644, 1500, 596, 782, 1678, 1502, 1408, 1344, 1220, 510, 812, 1674, 1514, 4844, 1210, 978, 482, 700, 1442, 1346, 1396, 1278, 494, 542, 650, 2390, 1640, 1980, 1622, 1352, 856, 722, 1800, 1898, 1886, 1816, 1706, 830, 982, 2164, 2390, 2228, 2260]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [34]:
def days_to(country_data, divisor1, divisor2, divisor3):
    """Returns index where value was less than or equal to specified amounts"""
    country_data_reversed = country_data[::-1]
    most_recent = country_data_reversed[0]

    if most_recent == 0:
        return 0, 0, 0

    value1 = most_recent // divisor1
    value2 = most_recent // divisor2
    value3 = most_recent // divisor3

    found1 = False
    found2 = False
    found3 = False

    for idx in range(len(country_data_reversed)):
        if not found1 and (country_data_reversed[idx] <= value1):
            days1 = idx
            found1 = True

            if value1 == 0:
                remaining_values = (len(country_data) - (days1 + 1)) // 2
                return days1, remaining_values, remaining_values

        if not found2 and (country_data_reversed[idx] <= value2):
            days2 = idx - days1
            found2 = True

            if value2 == 0:
                return days1, days2, len(country_data) - (days1 + days2 + 1)

        if not found3 and (country_data_reversed[idx] <= value3):
            days3 = idx - (days2 + days1)
            found3 = True

    return days1, days2, days3

In [35]:
# Feature Computation
f_dict = {}

for c in d_dict:
    country_data = d_dict[c]
    f_dict[c] = {} 
    f_dict[c]['days'] = [-1, -1, -1]
    f_dict[c]['days'][0], f_dict[c]['days'][1], f_dict[c]['days'][2] = days_to(country_data, 2, 4, 8)
    
print(f_dict['US'])

{'days': [80, 19, 9]}
