# Covid 19 Curve Slope 
- **Created by Andrés Segura Tinoco**
- **Created on May 19, 2020**

## 1. Read C19 data by country

In [1]:
# # Import libraries
import pandas as pd
import numpy as np
import datetime

In [2]:
# Reading data from CSV file
dataURL = "../data/historical_data.csv"
column_list = ["country", "region", "subregion", "date", "total_cases", "total_deaths", "active_cases", "total_tests"]
raw_data = pd.read_csv(dataURL, usecols = lambda column : column in column_list)
raw_data

Unnamed: 0,country,region,subregion,date,total_cases,total_deaths,active_cases,total_tests
0,China,Asia,Eastern Asia,01/22/2020,571,17,554,0
1,Japan,Asia,Eastern Asia,01/22/2020,2,0,2,0
2,China,Asia,Eastern Asia,01/23/2020,830,25,771,0
3,Japan,Asia,Eastern Asia,01/23/2020,2,0,2,0
4,China,Asia,Eastern Asia,01/24/2020,1287,41,1208,0
...,...,...,...,...,...,...,...,...
20210,Vietnam,Asia,South-Eastern Asia,05/19/2020,324,0,61,275000
20211,Western Sahara,Africa,Northern Africa,05/19/2020,6,0,0,0
20212,Yemen,Asia,Western Asia,05/19/2020,130,20,109,120
20213,Zambia,Africa,Eastern Africa,05/19/2020,772,7,573,18519


In [3]:
raw_data["date"] = pd.to_datetime(raw_data["date"])
raw_data.dtypes

country                 object
region                  object
subregion               object
date            datetime64[ns]
total_cases              int64
total_deaths             int64
active_cases             int64
total_tests              int64
dtype: object

## 2. Calculate Curve Slope by Country

In [4]:
# Filtering data
country_list = raw_data["country"].unique()
var_name = "total_deaths"
last_days = 15
top_date = datetime.datetime.today() - datetime.timedelta(days=last_days)
top_date

datetime.datetime(2020, 5, 4, 14, 58, 31, 636887)

In [5]:
# Calculate the curve slope of each country
curve_slope = {}

for country in country_list:
    # Filter data by country
    country_data = raw_data[(raw_data["country"] == country) & (raw_data["date"] >= top_date)]
    
    # Get x and y values
    y_values = np.array(country_data[var_name])
    x_values = np.arange(0, len(y_values))
    
    # Calculate curve slope
    X = x_values - x_values.mean()
    Y = y_values - y_values.mean()
    slope = (X.dot(Y)) / (X.dot(X))
    
    curve_slope[country] = slope

## 3. Select Top N

In [6]:
# Filter countries
top_country = 10
output = sorted(curve_slope, key=curve_slope.get, reverse=True)[:top_country]

In [7]:
# Show Top 10 countries with the highest Curve Slope
print('country, curve_slope')
for country in output:
    print('%s, %s' % (country, curve_slope[country]))

country, curve_slope
USA, 1442.9392857142857
Brazil, 692.5857142857143
UK, 409.26785714285717
Mexico, 218.12857142857143
France, 199.6392857142857
Italy, 198.36428571428573
Spain, 156.87857142857143
Canada, 136.25714285714287
India, 113.625
Ecuador, 102.01428571428572


<hr>
<p><a href="https://github.com/ansegura7/WebScraping_Covid19">« Home</a></p>