In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import geopandas as gdp
from shapely.geometry import Point,Polygon
import descartes
from IPython.display import Markdown
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
import folium 
from folium import plugins

In [None]:
df=pd.read_csv(r"D:\Users\abhisv\Downloads\covid_19_clean_complete.csv")

In [None]:
df.shape

In [None]:
df.info()

In [None]:
pd.options.display.float_format = '{:.2f}'.format

In [None]:
df.head()

In [None]:
##df.columns = df.columns.str.replace(r'\n','', regex=True)

In [None]:
df.rename(columns={'Country/Region':'Country','Province/State':'Province'},inplace=True)

In [None]:
df.columns

///No of cases per date

In [None]:
df_per_day=df.groupby('Date')[['Confirmed','Deaths','Recovered']].sum() # max for that particular day

In [None]:
df_per_day.head()

In [None]:
df_per_day.describe()

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity="all"

In [None]:
# The total confirmed cases, deaths and recovered can be seen here
df_per_day['Confirmed'].max()
df_per_day['Deaths'].max()
df_per_day['Recovered'].max()

In [None]:
# date for max case
df_per_day['Confirmed'].idxmax()
df_per_day['Deaths'].idxmax()
df_per_day['Recovered'].idxmax()

//We can observe from above the Recovering cases are increasing, hope we see end of corona soon. this update is till 3/20/20

In [None]:
# min no of cases
df_per_day['Confirmed'].min()
df_per_day['Deaths'].min()
df_per_day['Recovered'].min()

In [None]:
# date for min case
df_per_day['Confirmed'].idxmin()
df_per_day['Deaths'].idxmin()
df_per_day['Recovered'].idxmin()

///Number of cases per country

In [None]:
df.groupby(['Country'])[['Confirmed','Deaths','Recovered']].max()

In [None]:
df.groupby(['Province','Country'])[['Confirmed','Deaths','Recovered']].max()

In [None]:
#df['Country'].value_counts()  #value_counts gives the records corresponding to each countries from starting day to current day.

In [None]:
#How many countries are affected
df['Country'].unique()
df['Country'].nunique()

In [None]:
#df['Country'].value_counts().plot(kind='bar',figsize=(25,25))

### Checking distribution on map

//First step is to convert data to geodataframe

In [None]:
points=[Point(x,y) for x,y in zip(df['Long'],df['Lat'])]

In [None]:
gdp01=gdp.GeoDataFrame(df,geometry=points)

In [None]:
gdp01.head(2)

In [None]:
#map plot
gdp01.plot(figsize=(10,20))

In [None]:
# overlapping with world map
world = gdp.read_file(gdp.datasets.get_path('naturalearth_lowres'))
ax = world.plot(figsize=(20,10))
ax.axis('off')

In [None]:
# Overlap
fig,ax = plt.subplots(figsize=(20,10))
gdp01.plot(cmap='Oranges',ax=ax)
world.geometry.boundary.plot(color='Blue',edgecolor='k',linewidth=2,ax=ax)

In [None]:
gdp01[gdp01['Country']=='Sweden']

In [None]:
# Overlap
fig,ax = plt.subplots(figsize=(20,10))
gdp01[gdp01['Country']=='US'].plot(cmap='Oranges',ax=ax)
world.geometry.boundary.plot(color='Blue',edgecolor='k',linewidth=2,ax=ax)

In [None]:
df_per_day['Confirmed'].max()

In [None]:
print('Confirmed Cases around the globe : ',df_per_day['Confirmed'].max())
print('Deaths Confirmed around the globe: ',df_per_day['Deaths'].max())
print('Recovered Cases around the globe : ',df_per_day['Recovered'].max())

In [None]:
#Day wise distribution for confirmed, death and recovered

In [None]:
df_per_day.reset_index(inplace=True)

In [None]:
df_per_day.plot('Date',['Confirmed','Deaths','Recovered'],figsize=(10,10),rot=30)


In [None]:
##Global spread over time
Date_data=df[['Date','Confirmed','Deaths','Recovered']]

In [None]:
def scatterPlotCasesOverTime(df, country):
    plot = make_subplots(rows=1, cols=3, subplot_titles=("Confirmed", "Deaths", "Recovered"))

    subPlot1 = go.Scatter(
                    x=df['Date'],
                    y=df['Confirmed'],
                    name="Confirmed",
                    line_color='orange',
                    opacity=0.8)

    subPlot2 = go.Scatter(
                    x=df['Date'],
                    y=df['Deaths'],
                    name="Deaths",
                    line_color='red',
                    opacity=0.8)

    subPlot3 = go.Scatter(
                    x=df['Date'],
                    y=df['Recovered'],
                    name="Recovered",
                    line_color='green',
                    opacity=0.8)

    plot.append_trace(subPlot1, 1, 1)
    plot.append_trace(subPlot2, 1, 2)
    plot.append_trace(subPlot3, 1, 3)
    plot.update_layout(template="ggplot2", title_text = country + '<b> - Spread of the nCov Over Time</b>')

    plot.show()

In [None]:
global_data_over_time = Date_data.groupby('Date')['Confirmed','Deaths','Recovered'].sum().reset_index()
scatterPlotCasesOverTime(global_data_over_time, "<b>Global</b>")

In [None]:
def dailyreportedcasesovertime(df,country):
    #confirmed
    fig = px.bar(df, x="Date", y="Confirmed")
    layout = go.Layout(
        title=go.layout.Title(text="Daily count of confirmed cases in "+ country, x=0.5),
        font=dict(size=14),
        width=800,
        height=500,
        xaxis_title = "Date",
        yaxis_title = "Confirmed cases")

    fig.update_layout(layout)
    fig.show()
    fig = px.bar(df, x="Date", y="Deaths")
    layout = go.Layout(
        title=go.layout.Title(text="Daily count of Death cases in "+ country, x=0.5),
        font=dict(size=14),
        width=800,
        height=500,
        xaxis_title = "Date",
        yaxis_title = "Death cases")

    fig.update_layout(layout)
    fig.show()
    fig = px.bar(df, x="Date", y="Recovered")
    layout = go.Layout(
        title=go.layout.Title(text="Daily count of Recovered cases in "+ country, x=0.5),
        font=dict(size=14),
        width=800,
        height=500,
        xaxis_title = "Date",
        yaxis_title = "Recovered cases")

    fig.update_layout(layout)
    fig.show()

In [None]:
dailyreportedcasesovertime(global_data_over_time,'all over the world')

In [None]:
df[df['Country']=='China']

#### From above graphs we can observe the confirmed cases and deaths are increasing and the recovery cases are not that great, but quiet doing well.If the Confirmed cases decrease then the Recovery cases increases, means the medicine for this cases can be increased.

In [None]:
##china details
df_china=df[df['Country']=='China'][['Province','Confirmed','Deaths','Recovered']]

In [None]:
df_china.head(2)

In [None]:
df_china=df_china.groupby('Province')[['Confirmed','Deaths','Recovered']].sum()

In [None]:
df_china.reset_index(inplace=True)

In [None]:
df_china.head(2)

In [None]:
df_china.plot('Province',['Confirmed','Deaths','Recovered'],kind='bar',figsize=(10,10),fontsize=15)

In [None]:
hubei=df[df['Province']=='Hubei']

In [None]:
hubei=hubei[['Confirmed','Deaths','Recovered']]

In [None]:
hubei = hubei.iloc[0]

In [None]:
hubei

In [None]:
hubei.plot(kind='pie'
           , autopct='%1.1f%%'
           , shadow=True
           , startangle=10)

In [None]:
plot = make_subplots(rows=1, cols=3, subplot_titles=("Confirmed", "Deaths", "Recovered"))
hubei=df[df['Province']=='Hubei']
scatterPlotCasesOverTime(hubei,"<b>Hibei</b>")

In [None]:
rest_of_china=df[df['Province']!='Hubei'][['Province','Confirmed','Deaths','Recovered']]

In [None]:
rest_of_china.groupby('Province')[['Confirmed','Deaths','Recovered']].sum().reset_index()

In [None]:
rest_of_china.plot('Province',['Confirmed','Deaths','Recovered'],kind='bar',figsize=(10,10))

In [None]:
print(rest_of_china[rest_of_china['Confirmed']>500])

In [None]:
rest_of_hubei=df[df['Province']!='Hubei' & df['Country']=='China'][['Province','Confirmed','Deaths','Recovered']]