In [2]:
#Imports
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import seaborn as sns



In [3]:
#Converting to Panda DF

df_confirmed = pd.read_csv('time_series_covid19_confirmed_global.csv')
df_deaths = pd.read_csv('time_series_covid19_deaths_global.csv')
df_recovered = pd.read_csv('time_series_covid19_recovered_global.csv')

In [4]:
#Viewing DF
df_confirmed.info
df_confirmed.shape
df_confirmed.columns
df_confirmed.describe
df_confirmed.sample(15)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,2/28/23,3/1/23,3/2/23,3/3/23,3/4/23,3/5/23,3/6/23,3/7/23,3/8/23,3/9/23
253,,Timor-Leste,-8.874217,125.727539,0,0,0,0,0,0,...,23418,23418,23418,23418,23418,23418,23418,23418,23419,23419
110,,Ecuador,-1.8312,-78.1834,0,0,0,0,0,0,...,1056578,1057121,1057121,1057121,1057121,1057121,1057121,1057121,1057121,1057121
271,Guernsey,United Kingdom,49.448196,-2.58949,0,0,0,0,0,0,...,34867,34929,34929,34929,34929,34929,34929,34929,34991,34991
238,,Somalia,5.152149,46.199616,0,0,0,0,0,0,...,27324,27324,27324,27324,27324,27324,27324,27324,27324,27324
275,Pitcairn Islands,United Kingdom,-24.3768,-128.3242,0,0,0,0,0,0,...,4,4,4,4,4,4,4,4,4,4
258,,Turkey,38.9637,35.2433,0,0,0,0,0,0,...,17042722,17042722,17042722,17042722,17042722,17042722,17042722,17042722,17042722,17042722
147,,Iceland,64.9631,-19.0208,0,0,0,0,0,0,...,209093,209093,209093,209093,209093,209093,209093,209137,209137,209137
20,,Bahrain,26.0275,50.55,0,0,0,0,0,0,...,707480,707828,708061,708532,708768,709230,709230,709858,710306,710693
143,,Haiti,18.9712,-72.2852,0,0,0,0,0,0,...,34143,34143,34143,34143,34202,34202,34202,34202,34202,34202
109,,Dominican Republic,18.7357,-70.1627,0,0,0,0,0,0,...,660533,660705,660705,660705,660705,660705,660705,660705,660790,660790


In [5]:
#Dropping Unused Columns
df_deaths.drop(['Lat', 'Long'], axis=1, inplace=True)
df_confirmed.drop(['Lat', 'Long'], axis=1, inplace=True)
df_recovered.drop(['Lat', 'Long'], axis=1, inplace=True)


In [6]:
# All Province/State values are nan
df_deaths.drop(['Province/State'], axis=1, inplace=True)
df_confirmed.drop(['Province/State'], axis=1, inplace=True)
df_recovered.drop(['Province/State'], axis=1, inplace=True)


In [7]:
#Checking for Nan Values
print(df_confirmed.isna().values.any())
print(df_deaths.isna().values.any())
print(df_recovered.isna().values.any())

False
False
False


In [8]:
# Melt the DataFrame so that 'Country/Region' stays as a column, and the dates become a 'Date' column with corresponding 'Cases' values
df_confirmed_melted = pd.melt(df_confirmed, id_vars='Country/Region', var_name='Date', value_name='Cases')

In [9]:
#Converting date column to date time module
df_confirmed_melted['Date'] = pd.to_datetime(df_confirmed_melted['Date'])


  df_confirmed_melted['Date'] = pd.to_datetime(df_confirmed_melted['Date'])


In [10]:
duplicates = df_confirmed_melted[df_confirmed_melted.duplicated(subset=['Date', 'Country/Region'], keep=False)]
print(duplicates)


        Country/Region       Date     Cases
9            Australia 2020-01-22         0
10           Australia 2020-01-22         0
11           Australia 2020-01-22         0
12           Australia 2020-01-22         0
13           Australia 2020-01-22         0
...                ...        ...       ...
330312  United Kingdom 2023-03-09      1403
330313  United Kingdom 2023-03-09         4
330314  United Kingdom 2023-03-09      2166
330315  United Kingdom 2023-03-09      6561
330316  United Kingdom 2023-03-09  24425309

[109728 rows x 3 columns]


In [11]:
# Step 1: Aggregate the Data
df_confirmed_aggregated = df_confirmed_melted.groupby(['Date', 'Country/Region']).sum().reset_index()

# Step 2: Pivot the DataFrame after aggregation
df_confirmed_pivot = df_confirmed_aggregated.pivot(index='Date', columns='Country/Region', values='Cases')



In [12]:
# Aggregate the data by month
df_monthly = df_confirmed_melted.groupby([pd.Grouper(key='Date', freq='M'), 'Country/Region']).sum().reset_index()

  df_monthly = df_confirmed_melted.groupby([pd.Grouper(key='Date', freq='M'), 'Country/Region']).sum().reset_index()


In [14]:
# Filter for the top 50 countries with the highest total cases
top_countries = df_monthly.groupby('Country/Region')['Cases'].max().nlargest(50).index
df_filtered = df_monthly[df_monthly['Country/Region'].isin(top_countries)]

In [15]:
# Recalculate the number of frames after filtering
num_frames = df_filtered['Date'].nunique()
frame_duration = 30000 // num_frames  # Adjust frame duration for a 30-second animation

In [17]:
# Create the animated scatter map with smaller bubbles
fig = px.scatter_geo(df_filtered, 
                     locations="Country/Region", 
                     locationmode="country names", 
                     color="Cases", 
                     size="Cases", 
                     size_max=50,  # Adjust as needed
                     hover_name="Country/Region", 
                     animation_frame="Date", 
                     projection="natural earth")

# Update layout for title, appearance, and optimization settings
fig.update_layout(
    title='COVID-19 Global Spread: Top 50 Countries Visualized',
    coloraxis_colorscale="Viridis",  # Use pink color scale
    updatemenus=[{
        'buttons': [
            {
                'args': [None, {'frame': {'duration': frame_duration, 'redraw': True},
                                'fromcurrent': True, 'transition': {'duration': 0}}],
                'label': 'Play',
                'method': 'animate'
            },
            {
                'args': [[None], {'frame': {'duration': 0, 'redraw': True},
                                  'mode': 'immediate',
                                  'transition': {'duration': 0}}],
                'label': 'Pause',
                'method': 'animate'
            }
        ],
        'direction': 'left',
        'pad': {'r': 10, 't': 87},
        'showactive': True,
        'type': 'buttons',
        'x': 0.1,
        'xanchor': 'right',
        'y': 0,
        'yanchor': 'top'
    }]
)

# Use scattergl for faster rendering
fig.update_traces(marker=dict(line=dict(width=0)), selector=dict(type='scattergeo'))

# Save the animation as an HTML file for pre-rendering
fig.write_html("covid_animation.html")

# Display the plot (this will be smoother when viewed from the saved HTML file)
fig.show()