# FLATTENING OF CURVE OF CORONAVIRUS CASES

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/covid19-in-india/IndividualDetails.csv
/kaggle/input/covid19-in-india/ICMRTestingLabs.csv
/kaggle/input/covid19-in-india/population_india_census2011.csv
/kaggle/input/covid19-in-india/AgeGroupDetails.csv
/kaggle/input/covid19-in-india/StatewiseTestingDetails.csv
/kaggle/input/covid19-in-india/HospitalBedsIndia.csv
/kaggle/input/covid19-in-india/covid_19_india.csv
/kaggle/input/hospital-beds-data-india/hospital_beds.csv


# Importing required libraries

In [2]:
import plotly.graph_objects as go

# Reading confirmed cases & total number of available beds.

Here, the data for the total number of available beds all over India is taken from the below pdf in which the data is provided by **CDDEP & Princeton University** in a study published by "**Geetanjali Kapoor, Aditi Sriram, Jyoti Joshi, Ramanan Laxminarayan**".

Link: https://cddep.org/wp-content/uploads/2020/04/State-wise-estimates-of-current-beds-and-ventilators_5Apr2020-AN_gk.pdf

In [3]:
cases = pd.read_csv("/kaggle/input/covid19-in-india/covid_19_india.csv")
print(cases.shape)

beds = pd.read_csv("/kaggle/input/hospital-beds-data-india/hospital_beds.csv")
print(beds.shape)

(5686, 9)
(36, 4)


# Cleaning the given data as States name differ in a csv file.
Since the given .csv file has different names for same state like "Telangana" -> "Telangana***" or "Telengana". So, replacing the name as "Telangana" only. We have entries in state names like "Unassigned" or "Cases being reassigned to states" so, for now we are going to ignore them and removing those rows from the data.

In [4]:
cases['State/UnionTerritory'].unique()

array(['Kerala', 'Telengana', 'Delhi', 'Rajasthan', 'Uttar Pradesh',
       'Haryana', 'Ladakh', 'Tamil Nadu', 'Karnataka', 'Maharashtra',
       'Punjab', 'Jammu and Kashmir', 'Andhra Pradesh', 'Uttarakhand',
       'Odisha', 'Puducherry', 'West Bengal', 'Chhattisgarh',
       'Chandigarh', 'Gujarat', 'Himachal Pradesh', 'Madhya Pradesh',
       'Bihar', 'Manipur', 'Mizoram', 'Andaman and Nicobar Islands',
       'Goa', 'Unassigned', 'Assam', 'Jharkhand', 'Arunachal Pradesh',
       'Tripura', 'Nagaland', 'Meghalaya', 'Dadar Nagar Haveli',
       'Cases being reassigned to states', 'Sikkim', 'Daman & Diu',
       'Dadra and Nagar Haveli and Daman and Diu', 'Telangana',
       'Telangana***', 'Telengana***'], dtype=object)

In [5]:
cases['State/UnionTerritory'].replace({"Telengana" : "Telangana", "Telengana***" : "Telangana",
                                        "Telangana***" : "Telangana"}, inplace = True)

cases['State/UnionTerritory'].replace({"Daman & Diu" : "Dadra and Nagar Haveli and Daman and Diu",
                                          "Dadar Nagar Haveli" : "Dadra and Nagar Haveli and Daman and Diu"},
                                         inplace = True)
cases = cases[(cases['State/UnionTerritory'] != 'Unassigned') &
                    (cases['State/UnionTerritory'] != 'Cases being reassigned to states')]
cases['State/UnionTerritory'].unique()

array(['Kerala', 'Telangana', 'Delhi', 'Rajasthan', 'Uttar Pradesh',
       'Haryana', 'Ladakh', 'Tamil Nadu', 'Karnataka', 'Maharashtra',
       'Punjab', 'Jammu and Kashmir', 'Andhra Pradesh', 'Uttarakhand',
       'Odisha', 'Puducherry', 'West Bengal', 'Chhattisgarh',
       'Chandigarh', 'Gujarat', 'Himachal Pradesh', 'Madhya Pradesh',
       'Bihar', 'Manipur', 'Mizoram', 'Andaman and Nicobar Islands',
       'Goa', 'Assam', 'Jharkhand', 'Arunachal Pradesh', 'Tripura',
       'Nagaland', 'Meghalaya',
       'Dadra and Nagar Haveli and Daman and Diu', 'Sikkim'], dtype=object)

# Removing/Deleting unrequired columns for the ease of our analysis.

In [6]:
cases.Date = pd.to_datetime(cases.Date, dayfirst=True)

cases.drop(['Sno', 'Time', 'ConfirmedIndianNational', 'ConfirmedForeignNational'], axis = 1, inplace=True)
cases.head()

Unnamed: 0,Date,State/UnionTerritory,Cured,Deaths,Confirmed
0,2020-01-30,Kerala,0,0,1
1,2020-01-31,Kerala,0,0,1
2,2020-02-01,Kerala,0,0,2
3,2020-02-02,Kerala,0,0,3
4,2020-02-03,Kerala,0,0,3


# Calculating the total active cases per day 

In [7]:
daily_cases = cases.groupby('Date').sum().reset_index()
daily_cases['Active'] = 1

for val in daily_cases.index:
    if val != 0:
        daily_cases['Active'].loc[val] = daily_cases['Confirmed'].loc[val] - daily_cases['Cured'].loc[val-1] - daily_cases['Deaths'].loc[val-1]
    
daily_cases

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


Unnamed: 0,Date,Cured,Deaths,Confirmed,Active
0,2020-01-30,0,0,1,1
1,2020-01-31,0,0,1,1
2,2020-02-01,0,0,2,2
3,2020-02-02,0,0,3,3
4,2020-02-03,0,0,3,3
...,...,...,...,...,...
207,2020-08-24,2338035,57542,3106348,769076
208,2020-08-25,2404585,58390,3167323,771746
209,2020-08-26,2467758,59449,3234474,771499
210,2020-08-27,2523771,60472,3310234,783027


# Plotting of the active cases shows us how the cases are increasing day-by-day.

In [8]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = daily_cases.Date, y = daily_cases.Active, name = 'Active Cases'))

fig.update_layout(title = 'Daily Active Cases', xaxis_title = 'Time', yaxis_title = 'Count (in lakhs)')
fig.show()

# Calculating total active cases per state per day.
In this, we are trying to find the total number of active cases in a state at a particular day so to try to find and analyze the number of available beds we have in a state on a particular day and how much do we need more to accommodate the patients.

In [9]:
state_daily_cases = cases.sort_values(by=['State/UnionTerritory', 'Date']).reset_index(drop=True)
state_daily_cases['ActiveCases'] = 0

for st in sorted(cases['State/UnionTerritory'].unique()):
    df = state_daily_cases[state_daily_cases['State/UnionTerritory'] == st]
    for i in df.index:
        conf = state_daily_cases['Confirmed'].iloc[i]
        rec = state_daily_cases['Cured'].iloc[i-1]
        death = state_daily_cases['Deaths'].iloc[i-1]
            
        state_daily_cases['ActiveCases'].iloc[i] = conf - rec - death
    state_daily_cases['ActiveCases'].iloc[df.index[0]] = state_daily_cases['Confirmed'].iloc[df.index[0]]




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



# Plotting active cases per state per day

In [10]:
fig = go.Figure()
for st in state_daily_cases['State/UnionTerritory'].unique():
    df = state_daily_cases[state_daily_cases['State/UnionTerritory'] == st]
    fig.add_trace(go.Scatter(x = df['Date'], y = df['ActiveCases'], name = st))

fig.update_layout(title = 'Daily Active Cases', xaxis_title = 'Time', yaxis_title = 'Count (in lakhs)')
fig.show()

# Analyzing data for total available beds across India

In [11]:
total = beds.iloc[35]
print("Total beds available all over India", total)
beds.drop([35], inplace=True)
beds.head()

Total beds available all over India States                               Total
Hospital beds in public sector      713986
Hospital beds in private sector    1215706
Total hospital beds                1929692
Name: 35, dtype: object


Unnamed: 0,States,Hospital beds in public sector,Hospital beds in private sector,Total hospital beds
0,Andaman and Nicobar Islands,1075,1830,2905
1,Andhra Pradesh,23138,39397,62535
2,Arunachal Pradesh,2404,4093,6497
3,Assam,17142,29188,46330
4,Bihar,11664,19860,31524


# Calculating total number of available beds per state on a particular day after the confirmation of occupied cases.

In [12]:
beds_per_state = state_daily_cases.set_index('State/UnionTerritory').join(beds.set_index('States'))
beds_per_state['AvailableBeds'] = beds_per_state['Total hospital beds'] - beds_per_state['ActiveCases']
beds_per_state

Unnamed: 0,Date,Cured,Deaths,Confirmed,ActiveCases,Hospital beds in public sector,Hospital beds in private sector,Total hospital beds,AvailableBeds
Andaman and Nicobar Islands,2020-03-26,0,0,1,1,1075.0,1830.0,2905.0,2904.0
Andaman and Nicobar Islands,2020-03-27,0,0,1,1,1075.0,1830.0,2905.0,2904.0
Andaman and Nicobar Islands,2020-03-28,0,0,6,6,1075.0,1830.0,2905.0,2899.0
Andaman and Nicobar Islands,2020-03-29,0,0,9,9,1075.0,1830.0,2905.0,2896.0
Andaman and Nicobar Islands,2020-03-30,0,0,9,9,1075.0,1830.0,2905.0,2896.0
...,...,...,...,...,...,...,...,...,...
West Bengal,2020-08-24,108007,2794,138870,31174,78566.0,133775.0,212341.0,181167.0
West Bengal,2020-08-25,111292,2851,141837,31036,78566.0,133775.0,212341.0,181305.0
West Bengal,2020-08-26,114543,2909,144801,30658,78566.0,133775.0,212341.0,181683.0
West Bengal,2020-08-27,117857,2964,147775,30323,78566.0,133775.0,212341.0,182018.0


# DataFrame showing the capacity of patients a state can contain on a particular day

In [13]:
beds_df = beds_per_state[['Date', 'AvailableBeds']]
beds_df = pd.pivot_table(beds_per_state, values = 'AvailableBeds', index = 'Date',
                               columns = beds_per_state.index)
for st in beds_df.columns:
    val = beds[beds['States'] == st]['Total hospital beds']
    beds_df[st].fillna(int(val), inplace=True)
    
beds_df.head()

# If we want the data to be in csv format then,
# beds_df = beds_df.to_csv("beds_data.csv")

Unnamed: 0_level_0,Andaman and Nicobar Islands,Andhra Pradesh,Arunachal Pradesh,Assam,Bihar,Chandigarh,Chhattisgarh,Dadra and Nagar Haveli and Daman and Diu,Delhi,Goa,...,Puducherry,Punjab,Rajasthan,Sikkim,Tamil Nadu,Telangana,Tripura,Uttar Pradesh,Uttarakhand,West Bengal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-30,2905.0,62535.0,6497.0,46330.0,31524.0,10151.0,25438.0,2322.0,65900.0,8141.0,...,9646.0,48468.0,127173.0,4216.0,209546.0,56711.0,11970.0,206108.0,23005.0,212341.0
2020-01-31,2905.0,62535.0,6497.0,46330.0,31524.0,10151.0,25438.0,2322.0,65900.0,8141.0,...,9646.0,48468.0,127173.0,4216.0,209546.0,56711.0,11970.0,206108.0,23005.0,212341.0
2020-02-01,2905.0,62535.0,6497.0,46330.0,31524.0,10151.0,25438.0,2322.0,65900.0,8141.0,...,9646.0,48468.0,127173.0,4216.0,209546.0,56711.0,11970.0,206108.0,23005.0,212341.0
2020-02-02,2905.0,62535.0,6497.0,46330.0,31524.0,10151.0,25438.0,2322.0,65900.0,8141.0,...,9646.0,48468.0,127173.0,4216.0,209546.0,56711.0,11970.0,206108.0,23005.0,212341.0
2020-02-03,2905.0,62535.0,6497.0,46330.0,31524.0,10151.0,25438.0,2322.0,65900.0,8141.0,...,9646.0,48468.0,127173.0,4216.0,209546.0,56711.0,11970.0,206108.0,23005.0,212341.0


# Visualizing the capacity of patients per state
The line graph shows the number of beds available in a state uptill now. There are 3 states - **"Maharashtra, Andhra Pradesh, Bihar"** where the line goes down representing the negative value which indicates that there is an unavailability of that much beds and we need to have that number of beds in our hospital to accommodate each and every patient. 

*This conclusion is purely based on the given data only and does not guarantee us that it is the true value of unavailable beds since we do not have the data of those patients who are in self-isolation in their home or in isolation wards(other than hospitals) or in the railway coaches provided by the Railways. If we have those data as well then we can have a clear picture of available and unavailable beds in a state at a particular day.*

In [14]:
fig = go.Figure()
for col in beds_df.columns:
    fig.add_trace(go.Scatter(x = beds_df.index, y = beds_df[col], name = col))

fig.update_layout(title = 'Number of available beds statewise', yaxis_title = 'Number of available beds')
fig.show()

If there is anything you found it wrong, please do correct me. I would be greatful to know that. If you like my notebook please do like it and am open to any kind of feed back. Thank You.