In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Loading dataset and handle potential file errors
try:
    df = pd.read_csv("daily-new-confirmed-covid-19-deaths-per-million-people.csv")
except:
    print("couldnt load the csv , try again ")
    exit()
print("dataset loaded sucessfully")

df["Day"] = pd.to_datetime(df["Day"])
df.rename(columns={"Daily new confirmed deaths due to COVID-19 per million people (rolling 7-day average, right-aligned)": "deaths(-per million-)"}, inplace=True)
df.dropna(subset=["deaths(-per million-)"] , inplace=True) #used to cleaan the dataset where any naan value is found in the deaths coulmn
print(f"dataset has been cleaned , now the shape of the dataset is {df.shape}")

# Get user input for countries
countries = input("Enter country names separated by commas and first letter capital: ").split(", ")
ValidCountries=[]
for country in countries:
    country=country.strip()
    if not df[df["Entity"]==country].empty:
        ValidCountries.append(country)
    else:
        print(f"\ndata for {country} not found")

# Exit if no valid countries are found
if not ValidCountries:
    print("No valid countries found in the dataset. Exiting...")
    exit()

# Set up plot for time-series data
plt.figure(figsize=(12, 6))

colors = ["red", "blue", "green", "orange", "purple" , "yellow" , "aqua"]

# Plot data for each valid country
for i, country in enumerate(ValidCountries):
    country = country.strip()  
    CountryData = df[df["Entity"] == country]

    peak = CountryData["deaths(-per million-)"].idxmax()
    peak_day = CountryData.loc[peak, "Day"]

    plt.plot(CountryData["Day"], CountryData["deaths(-per million-)"], label=f"{country} (Peak: {peak_day})", 
             color=colors[i % len(colors)], marker="o", markevery=[CountryData.index.get_loc(peak)])
  
    print(f"\n  Average Deaths per Million of {country} is : {CountryData['deaths(-per million-)'].mean():.2f}")
    print(f"  Median Deaths per Million of {country} is: {CountryData['deaths(-per million-)'].median():.2f}")
    print(f"  Max Deaths per Million of {country} is : {CountryData['deaths(-per million-)'].max():.2f}")
    print(f"  Standard Deviation of {country} is : {CountryData['deaths(-per million-)'].std():.2f}")
    
# Display time-series plot
plt.xlabel("Date")
plt.ylabel("Deaths per million")
plt.title("COVID-19 Deaths Per Million - Country Comparison")
plt.legend()
plt.show()

# Create box plot for selected countries
CountriesDeaths=[df[df["Entity"]==country]["deaths(-per million-)"]for country in ValidCountries]
plt.figure(figsize=(10, 6))
plt.boxplot(CountriesDeaths , tick_labels=ValidCountries )
plt.ylabel("Deaths per Million")
plt.title("Box Plot of COVID-19 Deaths per Million - Country Comparison")
plt.grid(True)
plt.show()