# COVID-19 Global Data Tracker

This project analyzes global COVID-19 trends using data from [Our World in Data](https://ourworldindata.org/coronavirus).  
We will explore cases, deaths, and vaccinations across countries and over time.

---

## Step 1: Import Libraries


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


: 

## Step 2:Load Data


In [None]:

df = pd.read_csv('data/owid-covid-data.csv')
df.head()

## Step 3: Explore Data


In [None]:
df.columns
df.shape
df.isnull().sum()

# Step 4: Data Cleaning
# Filter for selected countries

In [None]:
countries = ['Kenya', 'India', 'United States']
df = df[df['location'].isin(countries)]

# Convert date column


In [None]:
df['date'] = pd.to_datetime(df['date'])

# Fill missing values


In [None]:
df.fillna(0, inplace=True)

In [None]:
# Step 5: Plot Total Cases Over Time
plt.figure(figsize=(12, 6))
for country in countries:
    country_df = df[df['location'] == country]
    plt.plot(country_df['date'], country_df['total_cases'], label=country)

plt.title('Total COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Total Cases')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


#Data cleaning-  List of countries to be analyzed


In [None]:
countries = ['Kenya', 'India', 'United States']

In [None]:
# Filter data to include only selected countries
df_countries = df[df['location'].isin(countries)]

In [None]:
# Check the result
df_countries['location'].unique()

In [None]:
# Convert the 'date' column to datetime
df_countries['date'] = pd.to_datetime(df_countries['date'])

# Confirm the change
df_countries.dtypes['date']


In [None]:
# Drop rows where total_cases is missing
df_cleaned = df_countries.dropna(subset=['total_cases'])

# Preview cleaned data
df_cleaned.head()


In [None]:
# Fill missing values with 0 (or use interpolate() for smoother data)
df_cleaned[['total_deaths', 'total_vaccinations']] = df_cleaned[['total_deaths', 'total_vaccinations']].fillna(0)


In [None]:
df_cleaned.to_csv('data/cleaned_covid_data.csv', index=False)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Set plot style
sns.set(style="darkgrid")

# Plot total cases over time
plt.figure(figsize=(12, 6))
for country in df_cleaned['location'].unique():
    country_data = df_cleaned[df_cleaned['location'] == country]
    plt.plot(country_data['date'], country_data['total_cases'], label=country)

plt.title('Total COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Total Cases')
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
for country in df_cleaned['location'].unique():
    country_data = df_cleaned[df_cleaned['location'] == country]
    plt.plot(country_data['date'], country_data['total_deaths'], label=country)

plt.title('Total COVID-19 Deaths Over Time')
plt.xlabel('Date')
plt.ylabel('Total Deaths')
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
for country in df_cleaned['location'].unique():
    country_data = df_cleaned[df_cleaned['location'] == country]
    plt.plot(country_data['date'], country_data['new_cases'], label=country)

plt.title('Daily New COVID-19 Cases')
plt.xlabel('Date')
plt.ylabel('New Cases')
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# Adding a new column for death rate
df_cleaned['death_rate'] = df_cleaned['total_deaths'] / df_cleaned['total_cases']

# Plot
plt.figure(figsize=(12, 6))
for country in df_cleaned['location'].unique():
    country_data = df_cleaned[df_cleaned['location'] == country]
    plt.plot(country_data['date'], country_data['death_rate'], label=country)

plt.title('COVID-19 Death Rate Over Time')
plt.xlabel('Date')
plt.ylabel('Death Rate')
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
for country in df_cleaned['location'].unique():
    country_data = df_cleaned[df_cleaned['location'] == country]
    plt.plot(country_data['date'], country_data['total_vaccinations'], label=country)

plt.title('Total COVID-19 Vaccinations Over Time')
plt.xlabel('Date')
plt.ylabel('Total Vaccinations')
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
for country in df_cleaned['location'].unique():
    country_data = df_cleaned[df_cleaned['location'] == country]
    plt.plot(country_data['date'], country_data['people_vaccinated_per_hundred'], label=country)

plt.title('Percentage of People Vaccinated Over Time')
plt.xlabel('Date')
plt.ylabel('% Vaccinated')
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# Latest data for Kenya 
kenya_latest = df_cleaned[df_cleaned['location'] == 'Kenya'].dropna(subset=['people_vaccinated_per_hundred']).iloc[-1]
vaccinated = kenya_latest['people_vaccinated_per_hundred']
unvaccinated = 100 - vaccinated

# Pie chart
plt.figure(figsize=(6, 6))
plt.pie([vaccinated, unvaccinated], labels=['Vaccinated', 'Unvaccinated'], autopct='%1.1f%%', startangle=140)
plt.title(f'Vaccination Status in Kenya as of {kenya_latest["date"].date()}')
plt.show()


In [None]:
# Get the latest date per country
latest_data = df_cleaned.sort_values('date').groupby('location').tail(1)

# Keep only the required columns
map_df = latest_data[['location', 'iso_code', 'total_cases', 'people_vaccinated_per_hundred']]


In [None]:
import plotly.express as px

fig = px.choropleth(
    map_df,
    locations="iso_code",
    color="total_cases",
    hover_name="location",
    color_continuous_scale="Reds",
    title="Total COVID-19 Cases by Country (Latest)"
)
fig.show()


In [None]:
fig = px.choropleth(
    map_df,
    locations="iso_code",
    color="people_vaccinated_per_hundred",
    hover_name="location",
    color_continuous_scale="Greens",
    title="Percentage of Population Vaccinated by Country (Latest)"
)
fig.show()


# Key Insights from COVID-19 Global Data Tracker

- The USA and India consistently have the highest total COVID-19 cases and deaths.
- Vaccination rates vary significantly across countries, with some achieving over 70% coverage while others lag behind.
- There is a visible correlation between high vaccination coverage and lower new cases in some countries.
- Some countries have anomalous spikes in cases, indicating possible data reporting issues or outbreaks.
- Death rates have generally declined over time, likely due to better treatment and vaccine rollout.
