# Hello! This project is all about getting the newest reports about confirmed COVID-19 cases around the world.

## 1. Connecting to my local MySQL server

In [1]:
import pandas as pd
import sqlalchemy as sa

In [2]:
# Creating engine required later by Pandas' methods
engine = sa.create_engine("mysql+pymysql://{user}:{pw}@localhost/{db}".format(user="root", pw="password123", db="ProjectDb"))

In [3]:
# Urls to daily refreshed datasets provided by ourworldindata.org
metricsUrl = "https://covid.ourworldindata.org/data/ecdc/full_data.csv"
populationUrl = "https://covid.ourworldindata.org/data/ecdc/locations.csv"

In [4]:
# Creating dataframe objects
df1 = pd.read_csv(metricsUrl)
df2 = pd.read_csv(populationUrl)

In [5]:
df1

Unnamed: 0,date,location,new_cases,new_deaths,total_cases,total_deaths
0,2019-12-31,Afghanistan,0,0,0,0
1,2020-01-01,Afghanistan,0,0,0,0
2,2020-01-02,Afghanistan,0,0,0,0
3,2020-01-03,Afghanistan,0,0,0,0
4,2020-01-04,Afghanistan,0,0,0,0
...,...,...,...,...,...,...
19600,2020-05-22,Zimbabwe,3,0,51,4
19601,2020-05-23,Zimbabwe,5,0,56,4
19602,2020-05-24,Zimbabwe,0,0,56,4
19603,2020-05-25,Zimbabwe,0,0,56,4


In [6]:
df1.info() 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19605 entries, 0 to 19604
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   date          19605 non-null  object
 1   location      19605 non-null  object
 2   new_cases     19605 non-null  int64 
 3   new_deaths    19605 non-null  int64 
 4   total_cases   19605 non-null  int64 
 5   total_deaths  19605 non-null  int64 
dtypes: int64(4), object(2)
memory usage: 919.1+ KB


In [7]:
df2

Unnamed: 0,countriesAndTerritories,location,continent,population_year,population
0,Afghanistan,Afghanistan,Asia,2020.0,38928341.0
1,Albania,Albania,Europe,2020.0,2877800.0
2,Algeria,Algeria,Africa,2020.0,43851043.0
3,Andorra,Andorra,Europe,2020.0,77265.0
4,Angola,Angola,Africa,2020.0,32866268.0
...,...,...,...,...,...
205,Vietnam,Vietnam,Asia,2020.0,97338583.0
206,Western_Sahara,Western Sahara,Africa,2020.0,597330.0
207,Yemen,Yemen,Asia,2020.0,29825968.0
208,Zambia,Zambia,Africa,2020.0,18383956.0


In [8]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 210 entries, 0 to 209
Data columns (total 5 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   countriesAndTerritories  210 non-null    object 
 1   location                 210 non-null    object 
 2   continent                209 non-null    object 
 3   population_year          209 non-null    float64
 4   population               209 non-null    float64
dtypes: float64(2), object(3)
memory usage: 8.3+ KB


In [None]:
# Creating new tables
df1.to_sql("metrics", con = engine)
df2.to_sql("population", con = engine)

In [10]:
engine.execute("SHOW TABLES")

<sqlalchemy.engine.result.ResultProxy at 0x118564b90>

In [11]:
# Confirming both tables are just fin
engine.table_names()

['metrics', 'population']

## 2. Reports

In [35]:
df = pd.read_sql("SELECT continent, sum(total_deaths) FROM metrics m, population p WHERE m.location = p.location GROUP BY p.continent ORDER BY 2 DESC ", con = engine)
df

Unnamed: 0,continent,sum(total_deaths)
0,Europe,6759686.0
1,North America,3409795.0
2,Asia,1159372.0
3,South America,599922.0
4,Africa,91936.0
5,Oceania,5691.0
6,,51.0
