In [1]:
import pandas as pd
import json

# Config

In [2]:
corona_data = 'owid-covid-data.csv'
save_to = 'corona_average_per_month.json'
pd.set_option('display.max_rows', 255)

# load data

In [3]:
corona_data = pd.read_csv(corona_data, header=0)
corona_data.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
0,AFG,Asia,Afghanistan,2019-12-31,0.0,0.0,,0.0,0.0,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498
1,AFG,Asia,Afghanistan,2020-01-01,0.0,0.0,,0.0,0.0,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498
2,AFG,Asia,Afghanistan,2020-01-02,0.0,0.0,,0.0,0.0,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498
3,AFG,Asia,Afghanistan,2020-01-03,0.0,0.0,,0.0,0.0,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498
4,AFG,Asia,Afghanistan,2020-01-04,0.0,0.0,,0.0,0.0,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498


# columns

In [4]:
list(corona_data.columns)

['iso_code',
 'continent',
 'location',
 'date',
 'total_cases',
 'new_cases',
 'new_cases_smoothed',
 'total_deaths',
 'new_deaths',
 'new_deaths_smoothed',
 'total_cases_per_million',
 'new_cases_per_million',
 'new_cases_smoothed_per_million',
 'total_deaths_per_million',
 'new_deaths_per_million',
 'new_deaths_smoothed_per_million',
 'new_tests',
 'total_tests',
 'total_tests_per_thousand',
 'new_tests_per_thousand',
 'new_tests_smoothed',
 'new_tests_smoothed_per_thousand',
 'tests_per_case',
 'positive_rate',
 'tests_units',
 'stringency_index',
 'population',
 'population_density',
 'median_age',
 'aged_65_older',
 'aged_70_older',
 'gdp_per_capita',
 'extreme_poverty',
 'cardiovasc_death_rate',
 'diabetes_prevalence',
 'female_smokers',
 'male_smokers',
 'handwashing_facilities',
 'hospital_beds_per_thousand',
 'life_expectancy',
 'human_development_index']

# get lebanon data

In [5]:
corona_lb = corona_data[corona_data.iso_code == 'LBN'].copy().reset_index(drop=True)

# split date

In [6]:
corona_lb[['year', 'month', 'day']] = corona_lb.date.str.split('-', expand=True)

# remove 2019 data and Jan, 2020 data (data before corona)

In [7]:
corona_lb = corona_lb[(corona_lb.year == '2020') & (corona_lb.month > '01')].copy().reset_index(drop=True)

# clean the memory

In [8]:
corona = []

# display data

In [9]:
corona_lb

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,year,month,day
0,LBN,Asia,Lebanon,2020-02-01,0.0,0.0,0.0,0.0,0.0,0.0,...,12.71,26.9,40.7,,2.9,78.93,0.757,2020,2,1
1,LBN,Asia,Lebanon,2020-02-02,0.0,0.0,0.0,0.0,0.0,0.0,...,12.71,26.9,40.7,,2.9,78.93,0.757,2020,2,2
2,LBN,Asia,Lebanon,2020-02-03,0.0,0.0,0.0,0.0,0.0,0.0,...,12.71,26.9,40.7,,2.9,78.93,0.757,2020,2,3
3,LBN,Asia,Lebanon,2020-02-04,0.0,0.0,0.0,0.0,0.0,0.0,...,12.71,26.9,40.7,,2.9,78.93,0.757,2020,2,4
4,LBN,Asia,Lebanon,2020-02-05,0.0,0.0,0.0,0.0,0.0,0.0,...,12.71,26.9,40.7,,2.9,78.93,0.757,2020,2,5
5,LBN,Asia,Lebanon,2020-02-06,0.0,0.0,0.0,0.0,0.0,0.0,...,12.71,26.9,40.7,,2.9,78.93,0.757,2020,2,6
6,LBN,Asia,Lebanon,2020-02-07,0.0,0.0,0.0,0.0,0.0,0.0,...,12.71,26.9,40.7,,2.9,78.93,0.757,2020,2,7
7,LBN,Asia,Lebanon,2020-02-08,0.0,0.0,0.0,0.0,0.0,0.0,...,12.71,26.9,40.7,,2.9,78.93,0.757,2020,2,8
8,LBN,Asia,Lebanon,2020-02-09,0.0,0.0,0.0,0.0,0.0,0.0,...,12.71,26.9,40.7,,2.9,78.93,0.757,2020,2,9
9,LBN,Asia,Lebanon,2020-02-10,0.0,0.0,0.0,0.0,0.0,0.0,...,12.71,26.9,40.7,,2.9,78.93,0.757,2020,2,10


# confirm years

In [10]:
corona_lb.year.unique()

array(['2020'], dtype=object)

# month data

In [11]:
corona_lb.month.unique()

array(['02', '03', '04', '05', '06', '07', '08', '09'], dtype=object)

# group by month and get average

In [13]:
month_groups = corona_lb.groupby('month')

print('Cases Data over months & days')
print('******************************')
comulative_cases = 0
average_month_data = {}

for month, month_data in month_groups:
    average_cases = int(month_data.new_cases.mean())
    month_cases = int(month_data.new_cases.sum())
    comulative_cases += month_cases
    
    average_month_data[month] = {
        'average_cases':average_cases,
        'month_cases': month_cases,
        'Comulative Cases': comulative_cases
    }
    
average_month_data

Cases Data over months & days
******************************


{'02': {'average_cases': 0, 'month_cases': 3, 'Comulative Cases': 3},
 '03': {'average_cases': 16, 'month_cases': 443, 'Comulative Cases': 446},
 '04': {'average_cases': 9, 'month_cases': 275, 'Comulative Cases': 721},
 '05': {'average_cases': 15, 'month_cases': 470, 'Comulative Cases': 1191},
 '06': {'average_cases': 18, 'month_cases': 554, 'Comulative Cases': 1745},
 '07': {'average_cases': 83, 'month_cases': 2589, 'Comulative Cases': 4334},
 '08': {'average_cases': 404, 'month_cases': 12536, 'Comulative Cases': 16870},
 '09': {'average_cases': 500, 'month_cases': 5007, 'Comulative Cases': 21877}}

# save data

In [15]:
with open(save_to, 'w') as save_file:
    save_file.write(json.dumps(average_month_data, indent=2))