# **Project : Analyzing the trends of COVID-19 with Python**


#Problem Statement:
Given data about COVID-19 patients, write code to visualize the impact and
analyze the trend of rate of infection and recovery as well as make predictions
about the number of cases expected a week in future based on the current
trends.
#Dataset:
CSV and Excel files containing data about the number of COVID-19 confirmed
deaths and recovered patients both around the world and in India. Download Link
#Guidelines:
● Use pandas to accumulate data from multiple data files.

● Use plotly (visualization library) to create interactive visualizations.

● Use Facebook prophet library to make time series models.

● Visualize the prediction by combining these technologies.


In [1]:
# Importing required Libraries :-

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


# Warnings

import warnings
warnings.filterwarnings('ignore')

# Loading the Dataset :

In [2]:
df=pd.read_csv("/content/covid_19_clean_complete.csv")
df

FileNotFoundError: [Errno 2] No such file or directory: '/content/covid_19_clean_complete.csv'

In [None]:
df.info()

# Checking for Null values

In [None]:
df.isnull().sum()

# Dropping unnecessary ones :

In [None]:
# as removing the null values from "province/state" is not good idea as some null values consist data in other fields ,
# therefore removing the "province/state" columns itself.


df=df.drop(['Province/State'],axis=1)
df

# Renaming Required :

In [None]:
# renaming the column "Country/Region" to country to make things simpler.


df=df.rename(columns={'Country/Region':'Country'})

# Duplicate Check :

In [None]:
# checking the duplicates

df.duplicated().sum()

# Let's see the top populated rows with max date :

In [None]:
top=df[df['Date']=='2020-07-27']

top.groupby(by='Country')[['Confirmed','Deaths','Recovered','Active']].sum().reset_index()


# Seeing all confirmed cases in the dataset :

In [None]:
Confirmed=df.groupby(by='Date')['Confirmed'].sum().reset_index()
Confirmed

# Visualize the above ones

In [None]:
plt.figure(figsize=(20,7))
sns.lineplot(Confirmed,x='Date',y='Confirmed',color='Red')
plt.xticks(rotation=90)
plt.show()

# **Now let's find out the top 10 countries for recovered , deaths and active cases**

# 1. Recovered Cases for top 10

In [None]:
top_10_recovered=df.groupby(by='Country')['Recovered'].sum().sort_values(ascending=False).head(10).reset_index()
top_10_recovered

# Visualize for top-10-recovered

In [None]:
sns.barplot(data=top_10_recovered,x='Country',y='Recovered')
plt.title('Top_10_Recovered countries')
plt.xticks(rotation=45)
plt.show()

# 2. Deaths Cases for Top-10 :

In [None]:
top_10_deaths=df.groupby(by='Country')['Deaths'].sum().sort_values(ascending=False).head(10).reset_index()
top_10_deaths

# Visualize the above ones :

In [None]:
sns.barplot(data=top_10_deaths,x='Country',y='Deaths')
plt.title("Top_10_countries with Death Rate")
plt.xticks(rotation=45)
plt.show()

# 3. Active Cases for Top-10 :

In [None]:
top_10_active=df.groupby(by='Country')['Active'].sum().sort_values(ascending=False).head(10).reset_index()
top_10_active

# Visualize the above ones :

In [None]:
sns.barplot(data=top_10_active,x='Country',y='Active')
plt.title("top_10 countries with Active Cases")
plt.xticks(rotation=45)
plt.show()

# **Let us see the top 3 countries US , China & India specifically for Confirmed , Death and Recovered cases.**

In [None]:
US=df[(df['Country']=="US")]
China=df[df['Country']=='China']
India=df[df['Country']=="India"]

# Confirmed Data :

In [None]:
US_confirmed=pd.DataFrame(US,columns=['Date','Confirmed'])            # RED
China_confirmed=pd.DataFrame(China,columns=['Date','Confirmed'])      # BLUE
India_confirmed=pd.DataFrame(India,columns=['Date','Confirmed'])      # GREEN

# plotting the confirmed cases in these 3 countries

plt.figure(figsize=(20,7))
sns.pointplot(data=US_confirmed,x='Date',y='Confirmed',color='red',label='US',marker='*',markersize=5)
sns.pointplot(data=China_confirmed,x='Date',y='Confirmed',color='blue',label="China",marker='D',markersize=5)
sns.pointplot(data=India_confirmed,x='Date',y='Confirmed',color='green',linestyle='--',label="India",marker='o',markersize=5)
plt.legend(title='Country',loc='best',edgecolor='black')
plt.xticks(rotation=90)
plt.show()

# Insights:

* US has more confirmed cases with increase of time.

* India has increase in cases but not more than US.

* China maintained a constant, while no increase in cases with time.

# Deaths Data :

In [None]:
US_death=pd.DataFrame(US,columns=['Date','Deaths'])            # RED
China_death=pd.DataFrame(China,columns=['Date','Deaths'])         # BLUE
India_death=pd.DataFrame(India,columns=['Date','Deaths'])         # GREEN

# plotting the deaths data of these 3 countries
plt.figure(figsize=(20,7))
sns.lineplot(US_death,x='Date',y='Deaths',color='red',linestyle='-.',label='US')
sns.lineplot(China_death,x='Date',y='Deaths',color='blue',linestyle='-.',label='China')
sns.lineplot(India_death,x='Date',y='Deaths',color='green',linestyle='-.',label='India')
plt.legend(title='Country',loc='best')
plt.xticks(rotation=90)
plt.show()

# Insights:

* US have high Death rate over the time.
* India have less death rate as compare to US.
* China have a constant , death rate as of same we have seen in confirmed cases .

# Recovered Data :

In [None]:
US_recovered=pd.DataFrame(US,columns=['Date','Recovered'])            # RED
China_recovered=pd.DataFrame(China,columns=['Date','Recovered'])         # BLUE
India_recoverd=pd.DataFrame(India,columns=['Date','Recovered'])         # GREEN


# plotting the recovered data of these 3 countries
plt.figure(figsize=(20,7))
sns.lineplot(US_recovered,x='Date',y='Recovered',color='red',linestyle='-.',label='US')
sns.lineplot(China_recovered,x='Date',y='Recovered',color='blue',linestyle='-.',label='China')
sns.lineplot(India_recoverd,x='Date',y='Recovered',color='green',linestyle='-.',label='India')
plt.legend(title='Country',loc='best')
plt.xticks(rotation=90)
plt.show()


# Insights:

* US and India have recovery rate.
* no recovery rate in China.

# Active Cases :

In [None]:
US_active=pd.DataFrame(US,columns=['Date','Active'])            # RED
China_active=pd.DataFrame(China,columns=['Date','Active'])         # BLUE
India_active=pd.DataFrame(India,columns=['Date','Active'])         # GREEN

# plotting the active data of these three countries : -

plt.figure(figsize=(20,7))
sns.pointplot(US_active,x='Date',y='Active',color='red',linestyle='-.',label='US')
sns.pointplot(China_active,x='Date',y='Active',color='blue',linestyle='-.',label='China')
sns.pointplot(India_active,x='Date',y='Active',color='green',linestyle='-.',label='India')
plt.legend(title='Country',loc='best')
plt.xticks(rotation=90)
plt.show()

# Insights:

* US have highest no. of active cases.

* India have less active cases as compared to US.

* China is constant, as no activity on the China data.

# **Let's Forecast for next 7 Days for every category of cases**

# loading dataset again: -

In [None]:
data=pd.read_csv('/content/covid_19_clean_complete.csv',parse_dates=['Date'])
# parse_date will change the Date columns to date_time datatype

data

In [None]:
data.info()

# creating different Datasets for predictions

In [None]:
# creating new dataset for "confirmed"

confirmed=data.groupby(by='Date').sum()['Confirmed'].reset_index()

# creating new dataset for "recovered"

recovered=data.groupby(by='Date').sum()['Recovered'].reset_index()

# creating new dataset for "Deaths"

deaths=data.groupby(by='Date').sum()['Deaths'].reset_index()

# creating new dataset for "Active"

active=data.groupby(by='Date').sum()['Active'].reset_index()

# 1. Let us Forecast for "Confirmed Cases" :

In [None]:
# renaming the columns of confirmed dataset

confirmed.columns=['ds','y']

confirmed.head()

# installing Prophet

In [None]:
!pip install prophet

In [None]:
# importing Libararies :-

from prophet import Prophet

In [None]:
model = Prophet()

In [None]:
model.fit(confirmed)

In [None]:
future=model.make_future_dataframe(periods=7,freq='D')

# forecasting :

In [None]:
forecast=model.predict(future)
forecast[['ds','yhat','yhat_upper','yhat_lower']].tail(7)  #predicted

# Visualize

In [None]:
model.plot(forecast)
plt.title("Forecast of Confirmed_Cases")
plt.show()

# Insights:

* "dots" shows the datapoints or the values of the dataset(confirmed).
* 'extended blue line' shows the predicted values for 7 days.

# 2. Let's Forecast for Recovered Cases

In [None]:
# renaming the columns of recovered dataset

recovered.columns=['ds','y']

recovered.head()

# Model & Forecast

In [None]:
model01=Prophet()
model01.fit(recovered)

future01=model01.make_future_dataframe(periods=7,freq='D')

forecast01=model01.predict(future01)
forecast01[['ds','yhat','yhat_upper','yhat_lower']].tail(7)

# visualize

In [None]:
model01.plot(forecast01)
plt.title("Forecast of Recovered_Cases")
plt.show()

# Insights:

* "dots" shows the datapoints or the values of the dataset(recovered).
* 'extended blue line' shows the predicted values for 7 days.

# 3. Let's Forecast for Death Cases

In [None]:
# renaming for columns of dataset deaths

deaths.columns=['ds','y']
deaths.head()

# Model and Forecast

In [None]:
model02=Prophet()
model02.fit(deaths)

future02=model02.make_future_dataframe(periods=7,freq='D')

forecast02=model02.predict(future02)
forecast02[['ds','yhat','yhat_upper','yhat_lower']].tail(7)

# Visualize

In [None]:
model02.plot(forecast02)
plt.title("Forecast for death cases")
plt.show()

# Insights:

* "dots" shows the datapoints or the values of the dataset(deaths).
* 'extended blue line' shows the predicted values for 7 days.

# 4. Let's Forecast for the Active Cases :

In [None]:
# renaming the columns of Active dataset

active.columns=['ds','y']
active.head()

# Model and Forecast

In [None]:
model03=Prophet()
model03.fit(active)

future03=model03.make_future_dataframe(periods=7,freq='D')

forecast03=model03.predict(future03)
forecast03[['ds','yhat','yhat_upper','yhat_lower']].tail(7)

# Visualize

In [None]:
model03.plot(forecast03)
plt.title("Forecast for Active cases")
plt.show()

# Insights:

* "dots" shows the datapoints or the values of the dataset(active).
* 'extended blue line' shows the predicted values for 7 days.

# **Creating the World Map using Plotly.**

In [None]:
import plotly
import plotly.express as px

In [None]:
world=df.groupby(by='Country')[['Confirmed','Active','Deaths','Recovered']].sum().reset_index()
world.shape

# 1. World Map Visual for Active Cases.

In [None]:
figure_active = px.choropleth(world,locations='Country',
                       locationmode='country names',
                       color='Active',
                       hover_name='Country',range_color=[1,20000],
                       color_continuous_scale='Rainbow',
                       title='Countries with Active Cases')
figure_active.show()

# 2. World Map for Recovered Cases.

In [None]:
figure_recovered = px.choropleth(world,locations='Country',
                       locationmode='country names',
                       color='Recovered',
                       hover_name='Country',range_color=[1,20000],
                       color_continuous_scale='Darkmint',
                       title='Countries with Recovered Cases')
figure_recovered.show()

# 3. World Map for Confirmed Cases.

In [None]:
figure_confirmed = px.choropleth(world,locations='Country',
                       locationmode='country names',
                       color='Confirmed',
                       hover_name='Country',range_color=[1,20000],
                       color_continuous_scale='Agsunset',
                       title='Countries with Confirmed Cases')
figure_confirmed.show()

# 4. World Map for Deaths Cases.

In [None]:
figure_deaths = px.choropleth(world,locations='Country',
                       locationmode='country names',
                       color='Deaths',
                       hover_name='Country',range_color=[1,20000],
                       color_continuous_scale='haline',
                       title='Countries with Deaths Cases')
figure_deaths.show()

# **The End Thanks**