# COVID-19 Global Data Tracker
## Data Loading & Exploration

In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv('owid-covid-data.csv')

# Check columns
print(df.columns)

# Preview first 5 rows
print(df.head())

# Identify missing values
print(df.isnull().sum())

## Data Cleaning

In [None]:
# Filter countries of interest
countries = ['Kenya', 'USA', 'India']
df_filtered = df[df['location'].isin(countries)]

# Drop rows with missing dates or critical values
df_filtered = df_filtered.dropna(subset=['date', 'total_cases', 'total_deaths'])

# Convert date column to datetime
df_filtered['date'] = pd.to_datetime(df_filtered['date'])

# Handle missing numeric values with fillna or interpolate
df_filtered['total_cases'] = df_filtered['total_cases'].fillna(method='ffill')
df_filtered['total_deaths'] = df_filtered['total_deaths'].fillna(method='ffill')

# Preview cleaned data
print(df_filtered.head())

## Exploratory Data Analysis (EDA)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot total cases over time for selected countries
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_filtered, x='date', y='total_cases', hue='location')
plt.title('Total COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Total Cases')
plt.legend(title='Country')
plt.show()

# Plot total deaths over time
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_filtered, x='date', y='total_deaths', hue='location')
plt.title('Total COVID-19 Deaths Over Time')
plt.xlabel('Date')
plt.ylabel('Total Deaths')
plt.legend(title='Country')
plt.show()

# Compare daily new cases between countries
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_filtered, x='date', y='new_cases', hue='location')
plt.title('Daily New COVID-19 Cases')
plt.xlabel('Date')
plt.ylabel('New Cases')
plt.legend(title='Country')
plt.show()

# Calculate death rate
df_filtered['death_rate'] = df_filtered['total_deaths'] / df_filtered['total_cases']
print(df_filtered[['location', 'date', 'death_rate']].head())