In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [0]:
# Load the data to a DataFrame
df = pd.read_csv("data.csv")

df['Date'] = pd.to_datetime(df['Date'])
df.fillna(0, inplace=True)

In [26]:
immutable_columns = set()

# Group the data by 'Entity' (country)
grouped_df = df.groupby('Entity')

print(grouped_df.head())

# Iterate over each country
for country, country_df in grouped_df:
    # Iterate over each column
    for column in country_df.columns:
        if country_df[column].nunique() == 1:
            immutable_columns.add(column)


         Entity Continent  Latitude  Longitude  Average temperature per year  \
0       Albania    Europe     41.15      20.17                            14   
1       Albania    Europe     41.15      20.17                            14   
2       Albania    Europe     41.15      20.17                            14   
3       Albania    Europe     41.15      20.17                            14   
4       Albania    Europe     41.15      20.17                            14   
...         ...       ...       ...        ...                           ...   
38126  Zimbabwe    Africa    -19.02      29.15                            20   
38127  Zimbabwe    Africa    -19.02      29.15                            20   
38128  Zimbabwe    Africa    -19.02      29.15                            20   
38129  Zimbabwe    Africa    -19.02      29.15                            20   
38130  Zimbabwe    Africa    -19.02      29.15                            20   

       Hospital beds per 1000 people  M

In [23]:
# Column set of DataFrame
column_set = set(df.columns)
print(column_set)
print(immutable_columns)
print(column_set - immutable_columns)

{'Median age', 'Date', 'Deaths', 'Continent', 'Hospital beds per 1000 people', 'GDP/Capita', 'Population', 'Cases', 'Daily tests', 'Population aged 65 and over (%)', 'Medical doctors per 1000 people', 'Average temperature per year', 'Entity', 'Longitude', 'Latitude'}
{'Median age', 'Continent', 'Hospital beds per 1000 people', 'GDP/Capita', 'Population', 'Population aged 65 and over (%)', 'Medical doctors per 1000 people', 'Average temperature per year', 'Entity', 'Longitude', 'Latitude'}
{'Date', 'Daily tests', 'Deaths', 'Cases'}


In [14]:
# Describe the data.
df.describe()


In [13]:
# Bases on the 'Deaths' column which shows the deaths for a country to that day, calculate the daily deaths by subtracting the previous day's deaths from the current day's deaths.
df['Deaths'] = df.groupby('Entity')['Deaths'].diff().fillna(0)

In [8]:
# Average cases per data per country
df.groupby('Entity')['Cases'].max().sort_values(ascending=False)

Entity
United States     28605669.0
India             11112241.0
Russia             4198400.0
United Kingdom     4188827.0
France             3747263.0
                     ...    
Mongolia              2952.0
Vietnam               2448.0
New Zealand           2378.0
Bhutan                 867.0
Fiji                    59.0
Name: Cases, Length: 104, dtype: float64

In [None]:
# Bar Chart of Mean Values
mean_values = df.mean(numeric_only=True)
mean_values.plot(kind='bar')
plt.title('Mean Values')
plt.xlabel('Columns')
plt.ylabel('Mean')
plt.show()


In [None]:
# Box Plot
df.boxplot()
plt.title('Box Plot')
plt.ylabel('Values')
plt.show()


In [None]:
# Histograms
df[['Latitude', 'Longitude', 'GDP/Capita']].hist()
plt.suptitle('Histograms')
plt.show()


In [None]:
# Heatmap of Correlation Matrix
corr_matrix = df.corr(numeric_only=True)
sns.heatmap(corr_matrix, annot=True)
plt.title('Correlation Heatmap')
plt.show()


In [None]:
# Scatter Plot
plt.scatter(df['GDP/Capita'], df['Population'])
plt.title('Scatter Plot')
plt.xlabel('GDP/Capita')
plt.ylabel('Population')
plt.show()

In [None]:
# Convert the 'Date' column to datetime type
df['Date'] = pd.to_datetime(df['Date'])


In [None]:
# Line plot of daily cases
plt.plot(df['Date'], df['Cases'])
plt.title('Daily COVID-19 Cases')
plt.xlabel('Date')
plt.ylabel('Cases')
plt.xticks(rotation=45)
plt.show()


In [None]:
# Line plot of daily deaths
plt.plot(df['Date'], df['Deaths'])
plt.title('Daily COVID-19 Deaths')
plt.xlabel('Date')
plt.ylabel('Deaths')
plt.xticks(rotation=45)
plt.show()


In [None]:
# Calculate and visualize cumulative cases over time
df['Cumulative Cases'] = df['Cases'].cumsum()
plt.plot(df['Date'], df['Cumulative Cases'])
plt.title('Cumulative COVID-19 Cases')
plt.xlabel('Date')
plt.ylabel('Cumulative Cases')
plt.xticks(rotation=45)
plt.show()


In [None]:
# Calculate and visualize case fatality rate (CFR)
df['CFR'] = df['Deaths'] / df['Cases'] * 100
plt.plot(df['Date'], df['CFR'])
plt.title('Case Fatality Rate (CFR)')
plt.xlabel('Date')
plt.ylabel('CFR (%)')
plt.xticks(rotation=45)
plt.show()


In [None]:
# Calculate and visualize daily tests conducted
plt.plot(df['Date'], df['Daily tests'])
plt.title('Daily COVID-19 Tests')
plt.xlabel('Date')
plt.ylabel('Tests')
plt.xticks(rotation=45)
plt.show()


In [None]:
# Calculate and visualize tests per case ratio
df['Tests per Case'] = df['Daily tests'] / df['Cases']
plt.plot(df['Date'], df['Tests per Case'])
plt.title('Tests per Case Ratio')
plt.xlabel('Date')
plt.ylabel('Tests per Case')
plt.xticks(rotation=45)
plt.show()