In [1]:
# libraries

from datetime import datetime
import os
import glob
import requests
import pandas as pd
from bs4 import BeautifulSoup

# Web Scrapping

In [4]:
# web scrapping

link = 'https://www.mohfw.gov.in/'
req = requests.get(link)
soup = BeautifulSoup(req.content, "html.parser")

table = soup.find_all('table')[0]
rows = table.find_all('tr')

row_list = []

for tr in rows:
    td = tr.find_all('td')
    row = [i.text for i in td]
    row_list.append(row)
    
    df_bs = pd.DataFrame(row_list[1:len(row_list)-1], columns=row_list[0])
    
df_bs.drop('S. No.', axis=1, inplace=True)
df_bs.head()

Unnamed: 0,Name of State / UT,Total Confirmed cases (Indian National),Total Confirmed cases ( Foreign National ),Cured/Discharged,Death
0,Andhra Pradesh,1,0,0,0
1,Delhi,7,0,2,1
2,Haryana,0,14,0,0
3,Karnataka,6,0,0,1
4,Kerala,22,0,3,0


# Data Cleaning

In [5]:
# date-time information
# ---------------------

now  = datetime.now()
df_bs['Date'] = now.strftime("%m/%d/%Y") 
df_bs['Date'] = pd.to_datetime(df_bs['Date'], format='%m/%d/%Y')
df_bs.head()

Unnamed: 0,Name of State / UT,Total Confirmed cases (Indian National),Total Confirmed cases ( Foreign National ),Cured/Discharged,Death,Date
0,Andhra Pradesh,1,0,0,0,2020-03-16
1,Delhi,7,0,2,1,2020-03-16
2,Haryana,0,14,0,0,2020-03-16
3,Karnataka,6,0,0,1,2020-03-16
4,Kerala,22,0,3,0,2020-03-16


In [11]:
# latitude and longitude information
# ----------------------------------

lat = {'Delhi':28.7041,
       'Haryana':29.0588,
       'Kerala':10.8505,
       'Rajasthan':27.0238,
       'Telengana':18.1124,
       'Uttar Pradesh':26.8467,
       'Union Territory of Ladakh':34.2996,
       'Tamil Nadu':11.1271,
       'Union Territory of Jammu and Kashmir':33.7782,
       'Punjab':31.1471,
       'Karnataka':15.3173,
       'Maharashtra':19.7515,
       'Andhra Pradesh':15.9129,
       'Uttarakhand':30.0668}

long = {'Delhi':77.1025,
        'Haryana':76.0856,
        'Kerala':76.2711,
        'Rajasthan':74.2179,
        'Telengana':79.0193,
        'Uttar Pradesh':80.9462,
        'Union Territory of Ladakh':78.2932,
        'Tamil Nadu':78.6569,
        'Union Territory of Jammu and Kashmir':76.5762,
        'Punjab':75.3412,
        'Karnataka':75.7139,
        'Maharashtra':75.7139,
        'Andhra Pradesh':79.7400,
        'Uttarakhand':79.0193}

df_bs['Latitude'] = df_bs['Name of State / UT'].map(lat)
df_bs['Longitude'] = df_bs['Name of State / UT'].map(long)

df_bs.head()

Unnamed: 0,Name of State / UT,Total Confirmed cases (Indian National),Total Confirmed cases ( Foreign National ),Cured/Discharged,Death,Date,Latitude,Longitude
0,Andhra Pradesh,1,0,0,0,2020-03-16,15.9129,79.74
1,Delhi,7,0,2,1,2020-03-16,28.7041,77.1025
2,Haryana,0,14,0,0,2020-03-16,29.0588,76.0856
3,Karnataka,6,0,0,1,2020-03-16,15.3173,75.7139
4,Kerala,22,0,3,0,2020-03-16,10.8505,76.2711


In [13]:
df_bs.isna().sum()

Name of State / UT                            0
Total Confirmed cases (Indian National)       0
Total Confirmed cases ( Foreign National )    0
Cured/Discharged                              0
Death                                         0
Date                                          0
Latitude                                      0
Longitude                                     0
dtype: int64

# Saving data

In [6]:
# saving data
# -----------

file_name = now.strftime("%Y_%m_%d")+'.csv'
file_loc = r'C:\Users\imdevskp\Desktop\covid_india'
df_bs.to_csv(file_loc + file_name, index=False)

df_bs.head()

Unnamed: 0,Name of State / UT,Total Confirmed cases (Indian National),Total Confirmed cases ( Foreign National ),Cured,Death,Date,Latitude,Longitude
0,Delhi,6,0,0,0,2020-03-13,28.7041,77.1025
1,Haryana,0,14,0,0,2020-03-13,29.0588,76.0856
2,Kerala,19,0,3,0,2020-03-13,10.8505,76.2711
3,Rajasthan,1,2,0,0,2020-03-13,27.0238,74.2179
4,Telengana,1,0,0,0,2020-03-13,18.1124,79.0193
