### Import Libraries

In [None]:
import numpy as np
import pandas as pd
from datetime import datetime as dt
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

### Prepare data

#### Input and delete uneccessary data

In [None]:
df = pd.read_csv('time_series_covid19_confirmed_global.csv')
df.shape

(289, 1147)

In [None]:
df = df.groupby('Country/Region').sum()
df.drop('Province/State', axis =1, inplace = True)
df.reset_index(inplace=True)

In [None]:
df_line = df.drop(columns = ['Lat', 'Long'])

In [None]:
# Choose top 10 values on Total Cases
top_values = df_line.iloc[:,-1].nlargest(5).tolist()

# Filter the rows and results in dataframe
top_rows = df_line[df_line.iloc[:,-1].isin(top_values)]

df_top_5 = top_rows


bottom_values = df_line.iloc[:,-1].nsmallest(5).tolist()

bottom_rows = df_line[df_line.iloc[:,-1].isin(bottom_values)]

df_bottom_5 = bottom_rows

### Map figure

In [None]:
!pip install pycountry
!pip install pycountry_convert



In [None]:
import pycountry

def do_fuzzy_search(country_name):
    try:
        result = pycountry.countries.search_fuzzy(country_name)
        return result[0].alpha_3
    except LookupError:
        return None

df["Alpha3"] = df["Country/Region"].apply(do_fuzzy_search)

In [None]:
manual_mapping = {
    "Burma": "MMR",
    "Congo (Brazzaville)": "COG",
    "Congo (Kinshasa)": "COD",
    "Korea, South": "KOR",
    "Korea, North": "PRK",
    "Taiwan*": "TWN",
    "Turkey": "TUR"
    # Add more mappings as needed
}
df["Alpha3_new"] = df["Country/Region"].map(manual_mapping)

df['combined'] = df['Alpha3'].fillna('').astype(str) + df['Alpha3_new'].fillna('').astype(str)

In [None]:
df = df.drop(columns = ['Alpha3', 'Alpha3_new'])
df.rename(columns={'combined': 'Alpha3'}, inplace=True)

In [None]:
df = df[df['Alpha3'] != '']
df.reset_index(drop=True, inplace=True)

In [None]:
import pycountry_convert as pc

def alpha3_to_continent(alpha3_code):
  try:
    country_continent_code = pc.country_alpha3_to_country_alpha2(alpha3_code)
    country_continent_name = pc.country_alpha2_to_continent_code(country_continent_code)
    return pc.convert_continent_code_to_continent_name(country_continent_name)
  except KeyError:
    return ""


df["Continent"] = df["Alpha3"].apply(alpha3_to_continent)

In [None]:
df = df.melt(id_vars = ['Country/Region', 'Lat', 'Long', 'Alpha3', 'Continent'])

In [None]:
df.rename(columns = {'variable': 'Date', 'value': 'infections'}, inplace = True)

In [None]:
df['Date'] = pd.to_datetime(df['Date'])

In [None]:
df['year'] = df['Date'].dt.year
df['month'] = df['Date'].dt.month
df['month/year'] = df['Date'].dt.strftime('%m/%Y')

In [None]:
df

Unnamed: 0,Country/Region,Lat,Long,Alpha3,Continent,Date,infections,year,month,month/year
0,Afghanistan,33.939110,67.709953,AFG,Asia,2020-01-22,0,2020,1,01/2020
1,Albania,41.153300,20.168300,ALB,Europe,2020-01-22,0,2020,1,01/2020
2,Algeria,28.033900,1.659600,DZA,Africa,2020-01-22,0,2020,1,01/2020
3,Andorra,42.506300,1.521800,AND,Europe,2020-01-22,0,2020,1,01/2020
4,Angola,-11.202700,17.873900,AGO,Africa,2020-01-22,0,2020,1,01/2020
...,...,...,...,...,...,...,...,...,...,...
224023,Venezuela,6.423800,-66.589700,VEN,South America,2023-03-09,552162,2023,3,03/2023
224024,Vietnam,14.058324,108.277199,VNM,Asia,2023-03-09,11526994,2023,3,03/2023
224025,Yemen,15.552727,48.516388,YEM,Asia,2023-03-09,11945,2023,3,03/2023
224026,Zambia,-13.133897,27.849332,ZMB,Africa,2023-03-09,343135,2023,3,03/2023


In [None]:
import plotly.express as px
fig = px.scatter_geo(df, locations="Alpha3", color="Continent",
                     hover_name="Country/Region", size="infections",
                     size_max = 50,
                     animation_frame='month/year',
                     projection="natural earth", width = 1600, height = 800, basemap_visible = True, title = 'Culmulative number of cases')

fig.show()

Output hidden; open in https://colab.research.google.com to view.

- As can be seen in this graph, the size of the circle demonstrates the culmulative COVID cases of each country, and each color represents different continents. In addition, the most culmulative infections came from the USA, which could be explained by the wave of anti-vaccine legislation spread throughout this country.
- As observed, COVID infections in Europe spread significantly between countries among them compared to Africa. The possible explanation for this issue is the open border policy between European countries.

In [None]:
fig = px.choropleth(df, locations="Alpha3",
                    color="infections",
                    hover_name="Country/Region",
                    animation_frame='month/year',
                    color_continuous_scale=px.colors.sequential.Turbo,
                    range_color = (0, max(df['infections'])),
                    width = 1600, height=800, basemap_visible = True,
                    title = 'Cumulative COVID-19 Infections'
                    )
fig.show()

Output hidden; open in https://colab.research.google.com to view.

- A different graph illustrates the culmulative COVID cases around the world. There are borders between countries, and the color represents the number of infectious cases.

### Line chart

In [None]:
df_top_5 = pd.melt(df_top_5, id_vars=['Country/Region'], var_name='date', value_name='infections')

In [None]:
df_bottom_5 = pd.melt(df_bottom_5, id_vars=['Country/Region'], var_name='date', value_name='infections')

In [None]:
df_top_5['daily_diff'] = df_top_5.groupby(['Country/Region'])['infections'].diff().fillna(0)

df_bottom_5['daily_diff'] = df_bottom_5.groupby(['Country/Region'])['infections'].diff().fillna(0)

In [None]:
df_top_5

Unnamed: 0,Country/Region,date,infections,daily_diff
0,Brazil,1/22/20,0,0.0
1,France,1/22/20,0,0.0
2,Germany,1/22/20,0,0.0
3,India,1/22/20,0,0.0
4,US,1/22/20,1,0.0
...,...,...,...,...
5710,Brazil,3/9/23,37076053,0.0
5711,France,3/9/23,39866718,6308.0
5712,Germany,3/9/23,38249060,7829.0
5713,India,3/9/23,44690738,440.0


In [None]:
df_top_5['date']= pd.to_datetime(df_top_5['date'])
df_bottom_5['date']= pd.to_datetime(df_bottom_5['date'])

In [None]:
import plotly.express as px
fig = px.line(df_top_5, x="date", y='infections', color = 'Country/Region',
              hover_data={"date": "|%B %d, %Y"},
              title='Cumulative number of cases')
fig.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y",
    ticklabelmode="period")
fig.show()

- The line graph shows the top 5 countries that have the most COVID cases in the world. In addition, the final total of cases in the US is almost double the second one, which is in India.
- Moreover, it can be seen from the data that there is a rapid shift during the period of January 2022 among all countries.

In [None]:
import plotly.express as px

fig1 = px.bar(df_top_5, x="date", y='daily_diff', color = 'Country/Region',
              hover_data={"date": "|%B %d, %Y"},
              title='Daily confirmed cases')
fig1.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y",
    ticklabelmode="period")


fig2 = px.histogram(df_top_5, x="date", y='daily_diff', color = 'Country/Region',
              hover_data={"date": "|%B %d, %Y"},
              title='Histogram of Daily confirmed cases',
              barmode = 'overlay',
              histfunc = 'max'
              )
fig2.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y",
    ticklabelmode="period")

fig1.show()
fig2.show()

- The first bar graph shows the daily cases of the top five countries, while the second is an abstract version that shows only one number for each month, which is the highest value of that month.
- Both graphs confirm and reinforce the data that, around January 2022, there will be a large spike in the number of COVID cases in these five nations, and possibly worldwide.