# COVID-19 Case Analysis
**By:** [Brian Ndegwa Mwalwala]  
**Date:** [11/05/2025] #happymothersday

## 1. Data Loading

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load dataset
url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
df = pd.read_csv(url)
print(f"Data loaded successfully! Last update: {df['date'].max()}")
df.head(2)

## 2. Data Preparation

In [None]:
# Filter to target countries
countries = ['Kenya', 'United States', 'India', 'Germany']
analysis_df = df[df['location'].isin(countries)][['date','location','total_cases']]

# Convert dates and scale cases to millions
analysis_df['date'] = pd.to_datetime(analysis_df['date'])
analysis_df['total_cases_millions'] = analysis_df['total_cases'] / 1e6

analysis_df.head()

## 3. Visualization

In [None]:
# Create the plot
plt.figure(figsize=(10,5))

for country in countries:
    country_data = analysis_df[analysis_df['location'] == country]
    plt.plot(country_data['date'], 
             country_data['total_cases_millions'], 
             label=country,
             linewidth=2)

# Formatting
plt.title('COVID-19 Cases by Country (Millions)', fontsize=14)
plt.ylabel('Total Cases (in millions)')
plt.legend(title='Country')
plt.grid(True, alpha=0.3)

# Save and display
plt.tight_layout()
plt.savefig('covid_results.png', dpi=120)
plt.show()

## 4. Key Insights

In [None]:
# Calculate latest stats
latest = analysis_df.sort_values('date').groupby('location').last()

print("🔍 Key Findings:")
print(f"- Highest cases: {latest['total_cases_millions'].idxmax()} ({latest['total_cases_millions'].max():.1f} million)")
print(f"- Lowest cases: {latest['total_cases_millions'].idxmin()} ({latest['total_cases_millions'].min():.1f} million)")