In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# reading csv file
df = pd.read_csv(r"C:\Users\yasha\Documents\covid_19_data.csv")
df.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,1/22/2020,Anhui,Mainland China,1/22/2020 17:00,1,0,0
1,2,1/22/2020,Beijing,Mainland China,1/22/2020 17:00,14,0,0
2,3,1/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6,0,0
3,4,1/22/2020,Fujian,Mainland China,1/22/2020 17:00,1,0,0
4,5,1/22/2020,Gansu,Mainland China,1/22/2020 17:00,0,0,0


In [3]:
df.columns

Index(['SNo', 'ObservationDate', 'Province/State', 'Country/Region',
       'Last Update', 'Confirmed', 'Deaths', 'Recovered'],
      dtype='object')

In [4]:
#  remove unused columns
df.drop(['SNo','Last Update'] , axis=1 , inplace=True)
df.head(3)

Unnamed: 0,ObservationDate,Province/State,Country/Region,Confirmed,Deaths,Recovered
0,1/22/2020,Anhui,Mainland China,1,0,0
1,1/22/2020,Beijing,Mainland China,14,0,0
2,1/22/2020,Chongqing,Mainland China,6,0,0


In [5]:
#  rename some columns for better readbility
df.rename(columns={"ObservationDate":"Date", "Province/State":"State","Country/Region":"Country"} , inplace=True)
df.head(3)

Unnamed: 0,Date,State,Country,Confirmed,Deaths,Recovered
0,1/22/2020,Anhui,Mainland China,1,0,0
1,1/22/2020,Beijing,Mainland China,14,0,0
2,1/22/2020,Chongqing,Mainland China,6,0,0


In [6]:
# check for missing values
df.isnull().sum()

Date            0
State        2465
Country         0
Confirmed       0
Deaths          0
Recovered       0
dtype: int64

In [7]:
#  removing missing values
df = df.dropna()

In [8]:
df.isnull().sum()

Date         0
State        0
Country      0
Confirmed    0
Deaths       0
Recovered    0
dtype: int64

In [9]:
# fixing date format
df["Date"] = pd.to_datetime(df["Date"],format="mixed")
df.head(3)

Unnamed: 0,Date,State,Country,Confirmed,Deaths,Recovered
0,2020-01-22,Anhui,Mainland China,1,0,0
1,2020-01-22,Beijing,Mainland China,14,0,0
2,2020-01-22,Chongqing,Mainland China,6,0,0


In [10]:
# Analysing Global trends
total_confirmed = df["Confirmed"].sum()
total_deaths = df["Deaths"].sum()
total_recovered = df["Recovered"].sum()
print(total_confirmed , total_deaths , total_recovered)

3044906 97975 1293358


In [11]:
# Country-wise summary
country_summary = df.groupby("Country")[["Confirmed","Deaths","Recovered"]].sum().reset_index()
country_summary

Unnamed: 0,Country,Confirmed,Deaths,Recovered
0,Australia,2499,40,503
1,Canada,1998,11,162
2,Denmark,4508,6,6
3,France,23878,505,72
4,Germany,17,0,0
5,Hong Kong,3507,82,1112
6,Israel,5,0,0
7,Macau,490,0,258
8,Mainland China,2958465,96559,1288455
9,Netherlands,1414,24,2


In [12]:
#Top 5 countries with confirmed cases
top_countries = country_summary.sort_values("Confirmed",ascending=False).head(5)
top_countries

Unnamed: 0,Country,Confirmed,Deaths,Recovered
8,Mainland China,2958465,96559,1288455
3,France,23878,505,72
10,Others,21260,137,2244
13,US,20204,465,239
12,UK,5557,122,116


In [13]:
# Mortality and recovery rate
country_summary['Mortality Rate (%)'] = (country_summary["Deaths"]/country_summary["Confirmed"]*100).round(2)
country_summary['Recovery Rate (%)'] = (country_summary["Recovered"]/country_summary["Confirmed"]*100).round(2)
country_summary

Unnamed: 0,Country,Confirmed,Deaths,Recovered,Mortality Rate (%),Recovery Rate (%)
0,Australia,2499,40,503,1.6,20.13
1,Canada,1998,11,162,0.55,8.11
2,Denmark,4508,6,6,0.13,0.13
3,France,23878,505,72,2.11,0.3
4,Germany,17,0,0,0.0,0.0
5,Hong Kong,3507,82,1112,2.34,31.71
6,Israel,5,0,0,0.0,0.0
7,Macau,490,0,258,0.0,52.65
8,Mainland China,2958465,96559,1288455,3.26,43.55
9,Netherlands,1414,24,2,1.7,0.14


In [14]:
from fpdf import FPDF

# Create PDF object
pdf = FPDF()
pdf.add_page()

# Title
pdf.set_font("Arial", 'B', 16)
pdf.cell(0, 10, "COVID-19 Automated Report", ln=True, align='C')

# 1. Global Trends
pdf.set_font("Arial", 'B', 14)
pdf.cell(0, 10, "1. Global Trends", ln=True)
pdf.set_font("Arial", size=12)
pdf.cell(0, 10, f"Total Confirmed Cases: {total_confirmed}", ln=True)
pdf.cell(0, 10, f"Total Deaths: {total_deaths}", ln=True)
pdf.cell(0, 10, f"Total Recovered: {total_recovered}", ln=True)
pdf.ln(10)

# 2. Country Summary (first 10 rows for example)
pdf.set_font("Arial", 'B', 14)
pdf.cell(0, 10, "2. Country Summary (Sample)", ln=True)

# Table headers
pdf.set_font("Arial", 'B', 12)
pdf.cell(35, 10, "Country", border=1)
pdf.cell(30, 10, "Confirmed", border=1)
pdf.cell(25, 10, "Deaths", border=1)
pdf.cell(25, 10, "Recovered", border=1)
pdf.cell(35, 10, "Recovery Rate (%)", border=1)
pdf.cell(35, 10, "Mortality Rate (%)", border=1)
pdf.ln()

# Table rows 
pdf.set_font("Arial", size=12)
for row in country_summary.head(10).itertuples():
    pdf.cell(35, 10, str(row.Country), border=1)
    pdf.cell(30, 10, str(row.Confirmed), border=1)
    pdf.cell(25, 10, str(row.Deaths), border=1)
    pdf.cell(25, 10, str(row.Recovered), border=1)
    pdf.cell(35, 10, str(row._5), border=1)  # Recovery Rate (%) column
    pdf.cell(35, 10, str(row._6), border=1)  # Mortality Rate (%) column
    pdf.ln()

pdf.ln(10)

# 3. Top 5 Countries by Confirmed Cases
pdf.set_font("Arial", 'B', 14)
pdf.cell(0, 10, "3. Top 5 Countries by Confirmed Cases", ln=True)
pdf.set_font("Arial", 'B', 12)
pdf.cell(50, 10, "Country", border=1)
pdf.cell(40, 10, "Confirmed", border=1)
pdf.cell(40, 10, "Deaths", border=1)
pdf.cell(40, 10, "Recovered", border=1)
pdf.ln()

pdf.set_font("Arial", size=12)
for row in top_countries.itertuples():
    pdf.cell(50, 10, row.Country, border=1)
    pdf.cell(40, 10, str(row.Confirmed), border=1)
    pdf.cell(40, 10, str(row.Deaths), border=1)
    pdf.cell(40, 10, str(row.Recovered), border=1)
    pdf.ln()

# Save PDF
pdf.output("covid_report.pdf")


''