In [None]:
#  IMPORTS AND SETUP
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# CONFIGURATIONS
DATA_PATH = "owid-covid-data.csv"  # Update this if needed
CHARTS_DIR = "charts"
os.makedirs(CHARTS_DIR, exist_ok=True)


In [None]:
# Load Data
try:
    df = pd.read_csv(DATA_PATH)
    print("Data loaded successfully.")
except FileNotFoundError:
    print(f" File {DATA_PATH} not found. Make sure it's in the correct location.")
    exit()


In [None]:
#  Clean & Preprocess Data

# Fill missing values
df.fillna(0, inplace=True)

# Convert 'date' to datetime
if 'date' in df.columns:
    df['date'] = pd.to_datetime(df['date'])

#Exploration of the data
print("\n Data Overview:")
print(df.head())
print("\n Columns:", df.columns.tolist())
print("\n Missing Values:\n", df.isnull().sum())
print("\nℹ Data Info:")
print(df.info())
print("\n Data Summary:")
print(df.describe())


In [None]:
# Analysis 1 Daily COVID-19 Cases Over Time

if 'date' in df.columns and 'new_cases' in df.columns:
    cases_by_date = df.groupby('date')['new_cases'].sum()
    plt.figure(figsize=(10, 6))
    cases_by_date.plot(title='Daily COVID-19 Cases')
    plt.xlabel('Date')
    plt.ylabel('New Cases')
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f"{CHARTS_DIR}/daily_cases.png")
    plt.show()


In [None]:
# Analysis 2 Top 10 Locations by Total Deaths
if 'location' in df.columns and 'total_deaths' in df.columns:
    deaths_by_location = df.groupby('location')['total_deaths'].max().sort_values(ascending=False).head(10)
    plt.figure(figsize=(10, 6))
    deaths_by_location.plot(kind='bar', color='red')
    plt.title('Top 10 Locations by Total Deaths')
    plt.ylabel('Total Deaths')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(f"{CHARTS_DIR}/top10_deaths.png")
    plt.show()


In [None]:
# TOP COUNTRIES BY TOTAL CASES
top_countries = df.groupby('location')['total_cases'].max().sort_values(ascending=False).head(10)
plt.figure(figsize=(10,6))
top_countries.plot(kind='bar', color='orange')
plt.title('Top 10 Countries by Total COVID-19 Cases')
plt.ylabel('Total Cases')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(f"{CHARTS_DIR}/top10_cases.png")
plt.show()

In [None]:
#  TIME TREND ANALYSIS FOR SELECTED COUNTRIES
plt.figure(figsize=(12,6))
for country in ['United States', 'India']:
    subset = df[df['location'] == country]
    plt.plot(subset['date'], subset['total_cases'], label=f"{country} Cases")
    plt.plot(subset['date'], subset['total_deaths'], label=f"{country} Deaths")
plt.title('COVID-19 Total Cases and Deaths Over Time')
plt.xlabel('Date')
plt.ylabel('Count')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(f"{CHARTS_DIR}/trend_US_India.png")
plt.show()

In [None]:
#  VACCINATION TRENDS
selected_countries = ['United States', 'India', 'Brazil']
plt.figure(figsize=(12,6))
for country in selected_countries:
    country_data = df[df['location'] == country]
    plt.plot(country_data['date'], country_data['total_vaccinations'], label=country)
plt.title('Cumulative COVID-19 Vaccinations Over Time')
plt.xlabel('Date')
plt.ylabel('Total Vaccinations')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(f"{CHARTS_DIR}/vaccination_trends.png")
plt.show()

In [None]:
# Analysis 3 Correlation Heatmap of Key COVID Metrics
key_metrics = ['total_cases', 'total_deaths', 'new_cases', 'new_deaths', 'total_vaccinations']
available_metrics = [col for col in key_metrics if col in df.columns]
numeric_df = df[available_metrics]
if not numeric_df.empty:
    corr = numeric_df.corr()
    plt.figure(figsize=(10, 8))
    sns.heatmap(corr, annot=True, cmap="coolwarm")
    plt.title("Correlation Between Key COVID Metrics")
    plt.tight_layout()
    plt.savefig(f"{CHARTS_DIR}/correlation_heatmap.png")
    plt.show()


In [None]:
#  PIE CHART: VACCINATED VS UNVACCINATED (Approximation)
total_population = 8_000_000_000  # Assume 8B globally
vaccinated = df['total_vaccinations'].max()
unvaccinated = total_population - vaccinated
plt.figure(figsize=(8,8))
plt.pie([vaccinated, unvaccinated],
        labels=['Vaccinated', 'Unvaccinated'],
        autopct='%1.1f%%',
        colors=['green', 'red'],
        startangle=90)
plt.title('Global Vaccinated vs Unvaccinated Population')
plt.tight_layout()
plt.savefig(f"{CHARTS_DIR}/vaccinated_pie.png")
plt.show()

In [None]:
#  SUMMARY OF INSIGHTS
print("\n--- INSIGHTS ---")
print("• United States and India had the highest total cases.")
print("• Strong positive correlation between total cases and total deaths.")
print("• Vaccination numbers grew rapidly in major countries.")
print("• Pie chart shows vaccination gap at global scale.")
print("\n All analyses completed successfully.")
