This code will:

Load the COVID-19 data from the CSV file.
Convert the 'date' column to datetime format.
Display the first few rows of the dataframe and basic statistics.
Calculate and print the total cases, deaths, and recoveries.
Plot daily new cases, deaths, and recoveries using line plots.
Display a correlation matrix and its heatmap.
Calculate and plot weekly averages for new cases, deaths, and recoveries.

In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the generated COVID-19 data
data = pd.read_csv('covid19_sample_data.csv')

# Convert the 'date' column to datetime format
data['date'] = pd.to_datetime(data['DateTested'])

# Display the first few rows of the dataframe
print(data.head())



                              PatientID  Age  Gender  \
0  4ed8c3e8-7f34-4035-bfd8-722ada31a4f1   37   Other   
1  7c955579-cffe-40b1-bbd7-2e09e64a6738   59   Other   
2  bf147208-a9b8-44d1-a543-02d27e51bef1   74    Male   
3  85e8b0f8-b7f9-4fe4-a872-d63b976293ab   96    Male   
4  4de243af-3a38-4bb7-a5e6-f08068716e87   14  Female   

                                     Symptoms TestResult  Hospitalized  \
0                        Loss of smell, Cough   Positive          True   
1          Shortness of breath, Loss of smell   Positive         False   
2  Fever, Shortness of breath, Fatigue, Cough   Negative          True   
3                                     Fatigue   Positive         False   
4                                       Fever   Positive         False   

   Recovered  DateTested       date  
0       True  2020-03-11 2020-03-11  
1      False  2022-12-11 2022-12-11  
2      False  2023-06-18 2023-06-18  
3       True  2021-05-22 2021-05-22  
4      False  2020-08-27 202

In [7]:
# Basic Statistics
print("\nBasic Statistics:\n")
print(data.describe())



Basic Statistics:

               Age
count  1000.000000
mean     48.530000
std      29.162468
min       0.000000
25%      22.000000
50%      47.000000
75%      73.000000
max     100.000000


In [None]:
# Total cases, deaths, and recoveries
total_cases = data['new_cases'].sum()
total_deaths = data['new_deaths'].sum()
total_recoveries = data['new_recoveries'].sum()

print("\nTotal Cases:", total_cases)
print("Total Deaths:", total_deaths)
print("Total Recoveries:", total_recoveries)



In [None]:
# Plotting the data
plt.figure(figsize=(14, 7))



In [None]:
# Time series plot for new cases
plt.subplot(3, 1, 1)
sns.lineplot(x='date', y='new_cases', data=data, color='blue')
plt.title('Daily New COVID-19 Cases')
plt.xlabel('')
plt.ylabel('New Cases')


In [None]:
# Time series plot for new deaths
plt.subplot(3, 1, 2)
sns.lineplot(x='date', y='new_deaths', data=data, color='red')
plt.title('Daily New COVID-19 Deaths')
plt.xlabel('')
plt.ylabel('New Deaths')



In [None]:
# Time series plot for new recoveries
plt.subplot(3, 1, 3)
sns.lineplot(x='date', y='new_recoveries', data=data, color='green')
plt.title('Daily New COVID-19 Recoveries')
plt.xlabel('Date')
plt.ylabel('New Recoveries')

plt.tight_layout()
plt.show()



In [None]:
# Correlation Analysis
correlation_matrix = data[['new_cases', 'new_deaths', 'new_recoveries']].corr()
print("\nCorrelation Matrix:\n", correlation_matrix)

# Heatmap of the correlation matrix
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix')
plt.show()



In [None]:
# Weekly averages
data['week'] = data['date'].dt.isocalendar().week
weekly_data = data.groupby('week').agg({
    'new_cases': 'mean',
    'new_deaths': 'mean',
    'new_recoveries': 'mean'
}).reset_index()

# Plotting weekly averages
plt.figure(figsize=(14, 7))



In [None]:
# Weekly new cases
plt.subplot(3, 1, 1)
sns.lineplot(x='week', y='new_cases', data=weekly_data, color='blue')
plt.title('Weekly Average New COVID-19 Cases')
plt.xlabel('Week')
plt.ylabel('Average New Cases')

# Weekly new deaths
plt.subplot(3, 1, 2)
sns.lineplot(x='week', y='new_deaths', data=weekly_data, color='red')
plt.title('Weekly Average New COVID-19 Deaths')
plt.xlabel('Week')
plt.ylabel('Average New Deaths')

# Weekly new recoveries
plt.subplot(3, 1, 3)
sns.lineplot(x='week', y='new_recoveries', data=weekly_data, color='green')
plt.title('Weekly Average New COVID-19 Recoveries')
plt.xlabel('Week')
plt.ylabel('Average New Recoveries')

plt.tight_layout()
plt.show()