# Statistical Analysis of COVID-19 Data

This notebook performs statistical analysis on the COVID-19 dataset, including correlation analysis and calculations of growth rates and doubling times.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv('../data/covid19_data.csv')
 
# Display the first few rows of the dataset
data.head()

# Display the last few rows of the dataset
data.tail()

# Display the shape of the dataset
data.shape

# Display the columns of the dataset
data.columns

In [None]:
# Correlation Analysis
correlation_matrix = data.corr()

# Display the correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Matrix of COVID-19 Data')
plt.show()

In [None]:
# Growth Rate Calculation
data['growth_rate'] = data['new_cases'].pct_change() * 100

# Display the growth rate
plt.figure(figsize=(12, 6))
plt.plot(data['date'], data['growth_rate'], marker='o')
plt.title('COVID-19 Growth Rate Over Time')
plt.xlabel('Date')
plt.ylabel('Growth Rate (%)')
plt.xticks(rotation=45)
plt.grid()
plt.show()

In [None]:
# Doubling Time Calculation
def doubling_time(cases):
    if cases <= 0:
        return np.nan
    return np.log(2) / np.log(1 + (cases / (cases - 1)))

data['doubling_time'] = data['new_cases'].apply(doubling_time)

# Display the doubling time
plt.figure(figsize=(12, 6))
plt.plot(data['date'], data['doubling_time'], marker='o', color='orange')
plt.title('COVID-19 Doubling Time Over Time')
plt.xlabel('Date')
plt.ylabel('Doubling Time (Days)')
plt.xticks(rotation=45)
plt.grid()
plt.show()

## Conclusions

In this analysis, we explored the correlation between various COVID-19 metrics, calculated the growth rate of new cases, and determined the doubling time for the spread of the virus. These insights can help in understanding the dynamics of the pandemic and inform public health responses.