#### Notebook Imports

In [1]:
import datetime as dt
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter
from matplotlib.dates import DateFormatter
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode,iplot,plot
init_notebook_mode(connected=True)
import cufflinks as cf
cf.go_offline()

%matplotlib inline

ModuleNotFoundError: No module named 'cufflinks'

### Data Scrubbing and Organizing

In [None]:
#import CSV from project github

covid19Dataframe = pd.read_csv("https://raw.githubusercontent.com/WhipSnake23/Python-Class-Project/main/Data/owid-covid-data.csv")

#scrub data from rolled up rows for each region globally. NOTE: ~=="not"
covid19Dataframe = covid19Dataframe[~covid19Dataframe.iso_code.str.contains("OWID")]

#Create a date field for Day, Month, Year to help with Data aggregation.
covid19Dataframe.insert(59,'Day',pd.to_datetime(covid19Dataframe['date']),True)
covid19Dataframe.insert(60,'Month',pd.to_datetime(covid19Dataframe['date']),True)
covid19Dataframe.insert(61,'Year',pd.to_datetime(covid19Dataframe['date']),True)
covid19Dataframe.insert(62,'Year_Month',pd.to_datetime(covid19Dataframe['date']),True)
covid19Dataframe['Day'] = covid19Dataframe['Day'].dt.day
covid19Dataframe['Month'] = covid19Dataframe['Month'].dt.month
covid19Dataframe['Year'] = covid19Dataframe['Year'].dt.year
#create a column that is the first date of every month for every row. Used in group by statements
covid19Dataframe['Year_Month'] = pd.to_datetime(covid19Dataframe['Year'].astype(str) + '-' + covid19Dataframe['Month'].astype(str) + '-' +'01')
#Convert Date Column to Date and Time - NOTE: This may not be needed
covid19Dataframe['date'] = pd.to_datetime(covid19Dataframe['date'])
#Set Positivity Rate to an actual percentage
covid19Dataframe['positive_rate'] = covid19Dataframe['positive_rate'] * 100

## Notebook Functions

In [None]:
def millions(x, pos):
    'The two args are the value and tick position'
    return '%1.1fM' % (x * 1e-6)

### Total Deaths in Mexico

In [None]:
#This function converts the axis into millions so that the chart scale is better


#Create a data frame that only has Mexico data
covid19DataframeMexico = covid19Dataframe[covid19Dataframe["iso_code"]=="MEX"]
#Group the data by month
covid19DataframeMexicoGrouped = covid19DataframeMexico.groupby(['Year_Month'], as_index=False)['total_deaths'].sum()
#reset the index
covid19DataframeMexicoGrouped.reset_index()

In [None]:
#Source: https://www.earthdatascience.org/courses/use-data-open-source-python/use-time-series-data-in-python/date-time-types-in-pandas-python/customize-dates-matplotlib-plots-python/

# Create figure and plot space
fig, ax = plt.subplots(figsize=(12, 12))

# Add x-axis and y-axis from the data frame
ax.bar(covid19DataframeMexicoGrouped['Year_Month'],
       covid19DataframeMexicoGrouped['total_deaths'],
       color='red')

# Set title and labels for axis
ax.set(xlabel="Date",
       ylabel="Total Deaths",
       title="Mexico Total Deaths by Month")

# Define the date format
date_form = DateFormatter("%m-%y")
ax.xaxis.set_major_formatter(date_form)
ax.xaxis.set_major_locator(mdates.MonthLocator())

#Format the y axis ticks
formatter = FuncFormatter(millions)
ax.yaxis.set_major_formatter(formatter)

ax.grid(False)

#Render the chart
plt.show()

### Total Deaths vs the Top 20 countries with the highest Median Age

In [None]:
##This code block will filter the data and get the top n of a column
countries = covid19Dataframe[['iso_code','median_age']]
countries = countries.drop_duplicates()
top20Countries_MedianAge = countries.nlargest(20,'median_age')
top20Countries_list = top20Countries_MedianAge['iso_code'].values.tolist()
booleanSeries = covid19Dataframe.iso_code.isin(top20Countries_list)
top20Countries_MedianAge = covid19Dataframe[booleanSeries]


top20Countries_MedianAge = top20Countries_MedianAge[['location','date','total_deaths','median_age']]
#This gets one row per location to get the most recent row which has Total Deaths
##Needs work for Null Values
top20Countries_MedianAge = top20Countries_MedianAge[top20Countries_MedianAge.groupby('location').date.transform('max') == top20Countries_MedianAge['date']]

In [None]:
sns.set_theme()

# Create a visualization
sns.relplot(
    data=top20Countries_MedianAge,
    x="total_deaths", y="median_age",
    hue="location", style="location",
)
#plt.figure(figsize=(10,6))
plt.xlabel("Total Deaths")
plt.ylabel("Median Age")
plt.title("Total Deaths vs Top 20 Countries with the Highest Median Age", size=18)

### Total Deaths vs the Top 20 countries with the highest population density

In [None]:
countries = covid19Dataframe[['iso_code','population_density']]
countries = countries.drop_duplicates()
top20Countries_PopD = countries.nlargest(20,'population_density')
top20Countries_list = top20Countries_PopD['iso_code'].values.tolist()
booleanSeries = covid19Dataframe.iso_code.isin(top20Countries_list)
top20Countries_PopD = covid19Dataframe[booleanSeries]
top20Countries_PopD = top20Countries_PopD[['location','date','total_deaths','population_density']]
top20Countries_PopD = top20Countries_PopD[top20Countries_PopD.groupby('location').date.transform('max') == top20Countries_PopD['date']]


In [None]:
fig = px.scatter(top20Countries_PopD, x="total_deaths", y="population_density", color="location",
                 title="Total Deaths vs Top 20 Countries with the Highest Population Density",
                 labels={
                     "total_deaths" : "Total Deaths",
                    "population_density" : "Population Density",
                     "location" : "Country"
                    } #Labels are customized using the above dictionary type configuration
                )

fig.show()

### Italy's Positivity Rate

In [None]:
covid19DataframeItaly = covid19Dataframe[covid19Dataframe["iso_code"]=="ITA"]
covid19DataframeItalyGrouped = covid19DataframeItaly.groupby(['Year_Month'], as_index=False)['positive_rate'].mean()
covid19DataframeItalyGrouped.sort_values(by=['Year_Month'])
fig = px.line(covid19DataframeItalyGrouped, x="Year_Month", y="positive_rate", title='Italy Positivty Rate',  labels={
                  "positive_rate" : "Total Deaths",
                "Year_Month" : "Month"
               } )
fig.show()

### Asia Region Focus

In [None]:
# Filter dataframe to only include Asian countries on February 21, 2021
februaryAsia = covid19Dataframe[covid19Dataframe["continent"] == 'Asia']
februaryAsia = februaryAsia[februaryAsia["Year"] == 2021]
februaryAsia = februaryAsia[februaryAsia["Month"] == 2]
februaryAsia = februaryAsia[februaryAsia["Day"] == 21]

In [None]:
# Filter out the top 5 highest new case counts in Asian countries for February 21, 2021
top5Countries_newcases = februaryAsia.nlargest(5,'new_cases')
top5Countries_newcases

In [None]:
# Top 5 new case counts
asiaNewCases = top5Countries_newcases["new_cases"]
asiaNewCases

In [None]:
# Top 5 new case countries
asiaTop5 = top5Countries_newcases["location"]
asiaTop5

### A look at new cases in Asian Countries on Febuary 21st in Three different Charts

In [None]:
# Using Matplotlib to generate a scatter plot for top 5 largest new cases in Asia
plt.scatter(asiaNewCases,asiaTop5)

In [None]:
# Using Matplotlib to generate a line plot for top 5 largest new cases in Asia
plt.plot(asiaNewCases,asiaTop5)

In [None]:
# Using Matplotlib to generate a pie chart with labels
pie = plt.pie(asiaNewCases, labels=asiaTop5)


In [None]:
# Use Seaborn to create plot graph
sns.set_theme()

# Create a visualization
sns.relplot(
    data=top5Countries_newcases,
    x="new_deaths", y="location",
    hue="location", style="location",
)

In [None]:
# Using plotly to display February, 21 2021 new cases in Asia
fig = px.bar(februaryAsia, x='location', y='new_cases', color='location', title='February 21, 2021 New Cases in Asia')
fig.show()

In [None]:
#Create a data frame that only has North America data
covid19DataframeNorthAmerica = (covid19Dataframe[covid19Dataframe["continent"]=="North America"]).copy()
covid19DataframeNorthAmerica.head()

In [None]:
sns.heatmap(covid19DataframeNorthAmerica.corr(),cmap='coolwarm')

In [None]:
#Choropleth World Map - Death Count
data = dict(
type = 'choropleth',
colorscale = 'Viridis',
reversescale = True,
locations = covid19Dataframe['location'],
locationmode = "country names",
z = covid19Dataframe['total_deaths_per_million'],
text = covid19Dataframe['location'],
colorbar = {'title' : 'total_deaths_per_million'},
)
layout = dict(title = 'Total Deaths Per Million',
geo = dict(showframe = False,projection = {'type':'mercator'})
              )
choromap = go.Figure(data = [data],layout = layout)
plot(choromap,validate=False)



In [None]:
Country1 = "USA"
Country2 = "BRA"
Country3 = "MEX"
Country4 = "IND"
Country5 = "GBR"


In [None]:
#covid19Data = pd.read_csv("https://raw.githubusercontent.com/WhipSnake23/Python-Class-Project/main/Data/owid-covid-data.csv")

covid19DataCountry1= covid19Dataframe[(covid19Dataframe["iso_code"] == Country1)]
covid19DataCountry2= covid19Dataframe[(covid19Dataframe["iso_code"] == Country2)]
covid19DataCountry3= covid19Dataframe[(covid19Dataframe["iso_code"] == Country3)]
covid19DataCountry4= covid19Dataframe[(covid19Dataframe["iso_code"] == Country4)]
covid19DataCountry5= covid19Dataframe[(covid19Dataframe["iso_code"] == Country5)]

covid19DataCountry1.insert(0,'YearMonth',pd.to_datetime(covid19DataCountry1['date']).dt.to_period('M'))
covid19DataCountry2.insert(0,'YearMonth',pd.to_datetime(covid19DataCountry2['date']).dt.to_period('M'))
covid19DataCountry3.insert(0,'YearMonth',pd.to_datetime(covid19DataCountry3['date']).dt.to_period('M'))
covid19DataCountry4.insert(0,'YearMonth',pd.to_datetime(covid19DataCountry4['date']).dt.to_period('M'))
covid19DataCountry5.insert(0,'YearMonth',pd.to_datetime(covid19DataCountry5['date']).dt.to_period('M'))

country1Deaths=covid19DataCountry1.groupby(['YearMonth'], as_index=False).agg(total_deaths1=('total_deaths', sum))
country2Deaths=covid19DataCountry2.groupby(['YearMonth'], as_index=False).agg(total_deaths2=('total_deaths', sum))
country3Deaths=covid19DataCountry3.groupby(['YearMonth'], as_index=False).agg(total_deaths3=('total_deaths', sum))
country4Deaths=covid19DataCountry4.groupby(['YearMonth'], as_index=False).agg(total_deaths4=('total_deaths', sum))
country5Deaths=covid19DataCountry5.groupby(['YearMonth'], as_index=False).agg(total_deaths5=('total_deaths', sum))

deathsByPeriod = pd.concat([country1Deaths['YearMonth'], country1Deaths['total_deaths1'], country2Deaths['total_deaths2'], country3Deaths['total_deaths3']
                                                         ,country4Deaths['total_deaths4'], country5Deaths['total_deaths5']
                              ], axis=1,join='outer')

print(f"{deathsByPeriod}")

In [None]:
def millions(x, pos):
    'The two args are the value and tick position'
    return '%1.1fM' % (x * 1e-6)

In [None]:
deathsByPeriod['total_deaths1'] = deathsByPeriod['total_deaths1'].div(1000).round(0)
deathsByPeriod['total_deaths2'] = deathsByPeriod['total_deaths2'].div(1000).round(0)
deathsByPeriod['total_deaths3'] = deathsByPeriod['total_deaths3'].div(1000).round(0)
deathsByPeriod['total_deaths4'] = deathsByPeriod['total_deaths4'].div(1000).round(0)
deathsByPeriod['total_deaths5'] = deathsByPeriod['total_deaths5'].div(1000).round(0)

In [None]:
bars1 = deathsByPeriod['total_deaths1'][:12]
bars2 = deathsByPeriod['total_deaths2'][:12]
bars3 = deathsByPeriod['total_deaths3'][:12]
bars4 = deathsByPeriod['total_deaths4'][:12]
bars5 = deathsByPeriod['total_deaths5'][:12]

In [None]:
names = deathsByPeriod['YearMonth'][:12]
#print(f"{names}")

In [None]:
barWidth = 1
bars12 = np.add(bars1, bars2).tolist()
bars123 = np.add(bars12, bars3).tolist()
bars1234 = np.add(bars123, bars4).tolist()
p = [0,1,2,3,4,5,6,7,8,9,10,11] # bar position on x-axis
plt.subplots(figsize=(12, 12))
plt.bar(p, bars1, color='#0000ff', edgecolor='white', width=barWidth)
plt.bar(p, bars2, bottom=bars1, color='#557f2d', edgecolor='white', width=barWidth)
plt.bar(p, bars3, bottom=bars12, color='#ffff00', edgecolor='white', width=barWidth)
plt.bar(p, bars4, bottom=bars123, color='#ff0000', edgecolor='white', width=barWidth)
plt.bar(p, bars5, bottom=bars1234, color='#7f6d5f', edgecolor='white', width=barWidth)


# Custom X axis
plt.xticks(p, names, fontweight='bold')
plt.xlabel("Time Periods (YYYY-MM)")

plt.show()