# 📊 COVID-19 Data Analysis

This notebook covers:

- Data Loading & Exploration
- Data Cleaning
- Exploratory Data Analysis (EDA)
- Vaccination Analysis
- (Optional) Choropleth Map
- Key Insights

## 2️⃣ Data Loading & Exploration

In [None]:
# Load CSV after manually downloading from Kaggle
# Place the file in the same directory as this notebook

import pandas as pd

file_path = "owid-covid-data.csv"  # Update to match the exact name you downloaded
df = pd.read_csv(file_path)

# Display columns
print(df.columns)

# Preview rows
print(df.head())

# Check for missing values
print(df.isnull().sum())

## 3️⃣ Data Cleaning

In [None]:
# Filter countries of interest
countries = ["Kenya", "United States", "India"]
df_countries = df[df["location"].isin(countries)]

# Drop rows with missing critical values
df_countries_cleaned = df_countries.dropna(subset=["date", "total_cases", "total_deaths", "new_cases", "new_deaths"])

# Convert date column to datetime
df_countries_cleaned["date"] = pd.to_datetime(df_countries_cleaned["date"])

# Interpolate missing numeric values
numeric_cols = df_countries_cleaned.select_dtypes(include="number").columns
df_countries_cleaned[numeric_cols] = df_countries_cleaned[numeric_cols].interpolate()

# Calculate death rate
df_countries_cleaned["death_rate"] = df_countries_cleaned["total_deaths"] / df_countries_cleaned["total_cases"]

## 4️⃣ Exploratory Data Analysis (EDA)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")

# Plot total cases over time
plt.figure(figsize=(12, 6))
for country in countries:
    subset = df_countries_cleaned[df_countries_cleaned["location"] == country]
    plt.plot(subset["date"], subset["total_cases"], label=country)

plt.title("Total COVID-19 Cases Over Time")
plt.xlabel("Date")
plt.ylabel("Total Cases")
plt.legend()
plt.tight_layout()
plt.show()