In [1]:
import pandas as pd
from datetime import datetime as dt
from functools import reduce
import calendar
import datetime
import plotly.express as px
import dash_daq as daq

In [51]:
cls=['#006400','#98FB98','#7FFF00','#00FF00','#32CD32','#00FF7F','#3CB371','#2E8B57','#228B22','#008000']

In [38]:
covid_data = 'https://covid.ourworldindata.org/data/owid-covid-data.csv'
"""Offline Version."""
# covid_data = 'assets/covid_data.csv'  # offline csv version
# covid_data = 'temp_assets/covid.csv'  # offline csv version
df_covid_data_v1 = pd.read_csv(covid_data)
df_covid_data_v1['date'] = pd.to_datetime(df_covid_data_v1['date'])  # converting date column to type date
df_covid_data = df_covid_data_v1.sort_values('date').groupby(['continent','location']).last().reset_index()

df_covid_data.to_csv('temp_assets/covid.csv',index=False)

In [44]:
covid_data = 'temp_assets/covid.csv'
df_covid_data_v1 = pd.read_csv(covid_data)
df_covid_data_v1.columns

Index(['continent', 'location', 'iso_code', 'date', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'hosp_patients_per_million', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'total_tests', 'new_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'positive_rate', 'tests_per_case', 'tests_units', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'total_boosters',
       'new_vaccinations', 'new_vaccinations_smoothed',
       't

In [71]:
def func_continent(a='Africa'):
    df_covid_data = df_covid_data_v1.sort_values('date').groupby(['continent','location']).last().reset_index()
    df_covid_cont = df_covid_data['continent'] == a
    df_cont_new = df_covid_data[df_covid_cont]
    if a == 'Africa':
        df_cont_new.at[13,'location'] = 'DRC'
    elif a == 'Asia':
        df_cont_new.at[101,'location'] = 'UAE'
    else: df_cont_new
    return  df_cont_new[['date','location','total_cases','total_deaths','icu_patients','total_vaccinations','people_vaccinated','hosp_patients',
                          'new_tests_per_thousand','positive_rate','people_vaccinated_per_hundred']]
func_continent('Asia')

Unnamed: 0,date,location,total_cases,total_deaths,icu_patients,total_vaccinations,people_vaccinated,hosp_patients,new_tests_per_thousand,positive_rate,people_vaccinated_per_hundred
56,2023-02-04,Afghanistan,208621.0,7894.0,,12684950.0,11684680.0,,,0.1714,28.41
57,2023-02-04,Armenia,446008.0,8717.0,,2150112.0,1129669.0,,0.526,0.003,40.63
58,2023-02-04,Azerbaijan,828064.0,10095.0,,13857110.0,5373253.0,,0.666,0.017,51.88
59,2023-02-04,Bahrain,700835.0,1544.0,,3476633.0,1241174.0,,4.931,0.2771,84.31
60,2023-02-04,Bangladesh,2037588.0,29443.0,,351956000.0,150956600.0,,0.05,0.2761,88.18
61,2023-02-04,Bhutan,62605.0,21.0,,2011426.0,699116.0,,16.733,0.036,89.35
62,2023-02-04,Brunei,276067.0,225.0,,1287323.0,450788.0,,0.667,,100.4
63,2023-02-04,Cambodia,138697.0,3056.0,,45612650.0,15254470.0,,0.201,0.0,90.97
64,2023-02-04,China,2023904.0,87468.0,,3490419000.0,1310267000.0,,,0.0016,91.89
65,2023-02-04,Georgia,1815871.0,16934.0,,2930677.0,1654504.0,,5.407,0.0706,44.19


In [70]:
def fig_funnel(a='Africa'):
    return px.funnel(func_continent(a).nlargest(10, 'positive_rate'), x='positive_rate', y='location',
                       labels={"positive_rate": "New Positivity Rate", "location": "Country"},
                    color_discrete_sequence=cls)
fig_funnel('South America')

In [69]:
def fig_bar(a='Africa'):
    return px.bar(func_continent(a).nlargest(10, 'total_cases'), x="location", template="simple_white",
           labels={"location": "Country", "total_cases": "Total Cases"}, y="total_cases").update_traces(marker_color=cls)
fig_bar()

In [57]:
def fig_pie(a='Africa'):
    return px.pie(func_continent(a).nlargest(10, 'people_vaccinated'), names='location', values='people_vaccinated',
                 labels={"people_vaccinated": "Total Vaccinations", "location": "Country"},
                 color_discrete_sequence=cls)
fig_pie()

In [68]:
def fig_funnel_vaccine(a='Africa'):
    return px.funnel(func_continent(a).nlargest(10, 'people_vaccinated_per_hundred'),
                               x='people_vaccinated_per_hundred',
                               y='location', color_discrete_sequence=cls,
                               labels={"people_vaccinated_per_hundred": "Vaccination per 100", "location": "Country"}).update_yaxes(showticklabels=False)
fig_funnel_vaccine('North America')

In [8]:
df_covid_data_v1['continent'].unique()

array(['Asia', nan, 'Europe', 'Africa', 'North America', 'South America',
       'Oceania'], dtype=object)

In [17]:
"""Group By can be used to group the data into continents."""
df_cont_grouped = df_covid_data_v1.groupby('continent')
df_cont_grouped.size()

continent
Africa           26547
Asia             24418
Europe           24458
North America    12873
Oceania           4047
South America     6183
dtype: int64

In [18]:
df_cont_grouped.get_group('North America').nlargest(10, 'total_cases')

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
98481,USA,North America,United States,2021-07-18,34079960.0,12048.0,32287.429,609019.0,135.0,258.0,...,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,
98480,USA,North America,United States,2021-07-17,34067912.0,12960.0,31446.857,608884.0,69.0,242.286,...,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,
98479,USA,North America,United States,2021-07-16,34054952.0,79310.0,30886.571,608815.0,391.0,252.286,...,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,
98478,USA,North America,United States,2021-07-15,33975642.0,28412.0,26448.143,608424.0,283.0,268.429,...,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,
98477,USA,North America,United States,2021-07-14,33947230.0,31845.0,25255.143,608141.0,331.0,264.714,...,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,
98476,USA,North America,United States,2021-07-13,33915385.0,26424.0,23981.714,607810.0,354.0,259.143,...,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,
98475,USA,North America,United States,2021-07-12,33888961.0,35013.0,23667.429,607456.0,243.0,256.429,...,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,
98474,USA,North America,United States,2021-07-11,33853948.0,6164.0,19455.286,607213.0,25.0,228.571,...,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,
98473,USA,North America,United States,2021-07-10,33847784.0,9038.0,19102.857,607188.0,139.0,229.857,...,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,
98472,USA,North America,United States,2021-07-09,33838746.0,48241.0,18488.714,607049.0,504.0,221.429,...,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,


### VACCINATIONS BY MANUCATURER

In [8]:
# vax_url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations-by-manufacturer.csv'
"""Offline version"""
vax_url = 'temp_assets/vaccines.csv'
def covid_vaccine():
    vax_df = pd.read_csv(vax_url, index_col=0, parse_dates=['date'])
#     vax_df.to_csv('temp_assets/vaccines.csv')
    """Below returns months"""
    vax_df['Month'] =  pd.DatetimeIndex(vax_df['date']).month_name()
    """Below returns names of months. This is depricated"""
#     vax_df['Month']= vax_df['Month'].apply(lambda x: calendar.month_abbr[x])
    return vax_df
covid_vaccine().info()

<class 'pandas.core.frame.DataFrame'>
Index: 53583 entries, Argentina to European Union
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   date                53583 non-null  datetime64[ns]
 1   vaccine             53583 non-null  object        
 2   total_vaccinations  53583 non-null  int64         
 3   Month               53583 non-null  object        
dtypes: datetime64[ns](1), int64(1), object(2)
memory usage: 2.0+ MB


In [19]:
vax_pivot = covid_vaccine().pivot_table(index=('Month','location','vaccine'),aggfunc = 'sum')
vax_pivot.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total_vaccinations
Month,location,vaccine,Unnamed: 3_level_1
April,Argentina,CanSino,19939851
April,Argentina,Moderna,201936810
April,Argentina,Oxford/AstraZeneca,815866927
April,Argentina,Pfizer/BioNTech,464761215
April,Argentina,Sinopharm/Beijing,901256615


In [22]:
def pct_vaccination():
    vax_by_pcnt = covid_vaccine()[['vaccine','total_vaccinations']].groupby('vaccine').sum().reset_index()
    vax_by_pcnt['pcnt_vaccination'] = round(vax_by_pcnt.total_vaccinations * 100 /vax_by_pcnt.total_vaccinations.sum(),2)
    return vax_by_pcnt.sort_values(['pcnt_vaccination'], ascending=[0])
pct_vaccination()

Unnamed: 0,vaccine,total_vaccinations,pcnt_vaccination
6,Pfizer/BioNTech,574251932389,66.34
3,Moderna,169060426106,19.53
5,Oxford/AstraZeneca,60134938895,6.95
7,Sinopharm/Beijing,18306777957,2.12
2,Johnson&Johnson,18069239755,2.09
8,Sinovac,17092199088,1.97
9,Sputnik V,8334589275,0.96
0,CanSino,208647528,0.02
4,Novavax,108122963,0.01
1,Covaxin,25968,0.0


In [23]:
def df_this_month():
    vax_by_pcnt = covid_vaccine()[['Month','vaccine','total_vaccinations']].groupby('Month').sum().reset_index()
    vax_by_pcnt['pcnt_vaccination'] = round(vax_by_pcnt.total_vaccinations * 100 /vax_by_pcnt.total_vaccinations.sum(),2)
    return vax_by_pcnt.sort_values(['Month'], ascending=[0])
df_this_month()

Unnamed: 0,Month,total_vaccinations,pcnt_vaccination
11,September,48108114903,5.56
10,October,54390663671,6.28
9,November,54884634322,6.34
8,May,99665223326,11.51
7,March,85849924696,9.92
6,June,100768328115,11.64
5,July,82480876302,9.53
4,January,70413350357,8.13
3,February,70447977063,8.14
2,December,64409593743,7.44


In [24]:
"""Last 30 days vaccination stats"""
def last_two_months():
    df_last_60_days_vax = covid_vaccine()[covid_vaccine().date > datetime.datetime.now() - pd.to_timedelta("50day")]
    df_last_60_days_vax = df_last_60_days_vax[['Month','vaccine','total_vaccinations']].groupby(['vaccine','Month']).sum().reset_index()
    df_last_60_days_vax['pcnt_vaccination'] = round(df_last_60_days_vax.total_vaccinations * 100 /df_last_60_days_vax.total_vaccinations.sum(),2)
    return df_last_60_days_vax.sort_values(['vaccine'], ascending=[0])

last_two_months()

Unnamed: 0,vaccine,Month,total_vaccinations,pcnt_vaccination
19,Sputnik V,June,605410584,0.55
18,Sputnik V,July,473285577,0.43
17,Sinovac,June,901766239,0.82
16,Sinovac,July,698457964,0.64
15,Sinopharm/Beijing,June,1846505069,1.68
14,Sinopharm/Beijing,July,1084494974,0.99
13,Pfizer/BioNTech,June,43755470851,39.8
12,Pfizer/BioNTech,July,31486586644,28.64
11,Oxford/AstraZeneca,June,3141332344,2.86
10,Oxford/AstraZeneca,July,2113285727,1.92


In [16]:
def last_two_months_diff(a='Moderna'):
    last_two = last_two_months()['vaccine']==a
    month_a = last_two_months()[last_two]['pcnt_vaccination'].iloc[0]
    month_b = last_two_months()[last_two]['pcnt_vaccination'].iloc[1]
    return round(month_a-month_b,2)
last_two_months_diff()

IndexError: single positional indexer is out-of-bounds

In [None]:
def covid_vaccine_treemap():
    vax_df0 = covid_vaccine().sort_values('date').groupby(['Month','location','vaccine']).last().reset_index()
    vax_df1 = vax_df0[{'Month','location','vaccine','total_vaccinations'}]
    return px.treemap(vax_df1, path=["Month","location","vaccine"],values="total_vaccinations",
                     title='Covid19 Vaccinations',
                      labels={"total_vaccinations": "Vaccinations"})

In [None]:
def fig_bar_vax():
    return px.bar(pct_vaccination(), x='vaccine',y='pcnt_vaccination',color='vaccine',template="simple_white",
                  labels={'pcnt_vaccination': 'Percentage Vaccinations','vaccine':'Vaccine'})

In [None]:
func_continent()['new_cases'].sum()

In [None]:
#CO2 Footprint Breakdown Per Capita - Goodall (2011)
#CO2 emissions by sector (CAIT, 2021).csv
co2_url = 'https://raw.githubusercontent.com/gyleodhis/owid-datasets/master/datasets/CO2%20Footprint%20Breakdown%20Per%20Capita%20-%20Goodall%20(2011)/CO2%20Footprint%20Breakdown%20Per%20Capita%20-%20Goodall%20(2011).csv'
# co2_url=('assets/co2Footprint.csv')
co2_df = pd.read_csv(co2_url, index_col=0)
co2_df.to_csv('assets/co2Footprint.csv',index=False)
co2_df.sort_values(by='CO2 Footprint Breakdown Per Capita (Goodall (2011))')

In [None]:
def load_data(a):
    return pd.read_csv(a,index_col=False)

In [None]:
#Carbon dioxide (CO₂) emissions broken down by sector, measured in tonnes per year. 
# cos_sector = 'https://raw.githubusercontent.com/gyleodhis/owid-datasets/master/datasets/CO2%20emissions%20by%20sector%20(CAIT%2C%202021)/CO2%20emissions%20by%20sector%20(CAIT%2C%202021).csv'
cos_sector = ('assets/CO₂_Sector.csv')
def carbon_dioxide():
    cos_sector_df = load_data(cos_sector)
#     cos_sector_df.to_csv('assets/CO₂_Sector.csv',index=False)
    cos_sector_df.drop(cos_sector_df.columns[[0]], axis=1, inplace=True)
    return cos_sector_df
carbon_dioxide()

In [None]:
def emissions_by_year():
    co2_sector_df=carbon_dioxide().iloc[:,[0,1,3,5,7,9,11,13,17,24]]
    return co2_sector_df.groupby(['Year']).mean().reset_index()
emissions_by_year()

In [None]:
def emissions_by_sctor():
    new_co2 = emissions_by_year().iloc[-1].to_frame().dropna()
#     new_co2.rename(columns={2018:'Amount'},inplace = True)
#     i = [0,7,6]
#     for a in i:new_co2=new_co2.drop(new_co2.index[a])
    new_co2['Percentage'] = round(new_co2.Amount*100/new_co2.Amount.sum(),2)
    new_co2.index = new_co2.index.str.replace(' \(per capita\)','')
#     new_co2.index[3] = 'Manufacturing '
    return new_co2.sort_values(by='Percentage')
emissions_by_sctor()   #['Percentage'][-1]

In [None]:
def last_two_decades_emissions():
    last_two = round((emissions_by_year().iloc[-1]-emissions_by_year().
                      iloc[0])*100/emissions_by_year().iloc[0]).to_frame()
    last_two.index = last_two.index.str.replace(' \(per capita\)','')
    last_two.rename(columns={0:'% Change'},inplace = True)
    return last_two.sort_values(by='% Change')
#     new_co2.index = new_co2.index.str.replace(' \(per capita\)','')
#     return new_co2.sort_values(by='Percentage')
last_two_decades_emissions()#['% Change'][3]

In [None]:
def top_emitters_by_year():
    top_co2_sector_df=carbon_dioxide().iloc[:,[0,1,3,5,7]]
    return top_co2_sector_df.groupby(['Year']).mean().reset_index()
top_emitters_by_year()

In [None]:
def fig_corbon_line():
    return px.line(top_emitters_by_year(), x='Year', y=top_emitters_by_year().columns,
                  markers=True,template="simple_white",
                  labels={'value':'Amt in Tonnes','variable':'Sector'})
fig_corbon_line()

In [None]:
#  df_covid_data_v1[['continent','location']]
def emission_with_continent():
    cos_with_continent = load_data(cos_sector).iloc[:,[0,1,2,4,6,8,10,12,14,16]]
    cos_with_continent.rename(columns={'Entity':'location'},inplace = True)
    df_continent = df_covid_data_v1[['continent','location']]
    cos_with_continent = cos_with_continent[cos_with_continent['Year']%5==0]
    return pd.merge(df_continent,cos_with_continent)
emission_with_continent()

In [None]:
top_emitters_year=emission_with_continent().iloc[:,[0,2,6]]
top_emitters_year=top_emitters_year.groupby(['continent','Year']).mean()
top_emitters_year


In [None]:
def top_emitter_by_year(a='Africa'):
    """Possible valies for a are Africa,Asia,Europe,North America,Oceania,South America"""
    top_emitters_year=emission_with_continent().iloc[:,[0,2,6]]
    df_top_emitters_year = top_emitters_year['continent'] == a
    top_emitters_year_new = top_emitters_year[df_top_emitters_year]
    top_emitters_year_new=top_emitters_year_new.groupby(['continent','Year']).mean()
#     if a == 'Africa':
#         df_cont_new.at[13,'location'] = 'DRC'
#     elif a == 'Asia':
#         df_cont_new.at[101,'location'] = 'UAE'
#     else: df_cont_new
    return top_emitters_year_new.reset_index()
top_emitter_by_year()

In [None]:
def fig_top_emitter_by_year(a='Africa'):
    fig = px.area(top_emitter_by_year(a), x='Year', y="Energy", color="continent",
                  markers=True,template="simple_white",labels={'Energy':'Amt in Tonnes'})
    fig.layout.autosize
    return fig
fig_top_emitter_by_year('North America')

In [None]:
def emission_by_continent(a='Africa'):
    return round(top_emitter_by_year(a).Energy.sum(),2)
emission_by_continent()

In [None]:
def fig_emission_by_continent(a='Africa'):
    return daq.Thermometer(min=100,max=2000,value=emission_by_continent(a),showCurrentValue=True,units="Tones")
fig_emission_by_continent()