In [1]:
# libraries

from datetime import datetime
import os
import re
import glob
import requests 
import pandas as pd
from bs4 import BeautifulSoup

# Web Scapping

In [2]:
# web scrapping

link = 'https://www.mohfw.gov.in/'
req = requests.get(link)
soup = BeautifulSoup(req.content, "html.parser")

thead = soup.find_all('thead')[-1]
# print(thead)
head = thead.find_all('tr')

tbody = soup.find_all('tbody')[-1]
body = tbody.find_all('tr')

# print(rows)

head_rows = []
body_rows = []

for tr in head:
    td = tr.find_all(['th', 'td'])
    row = [i.text for i in td]
    head_rows.append(row)
    
for tr in body:
    td = tr.find_all(['th', 'td'])
    row = [i.text for i in td]
    body_rows.append(row)
    
# print(head_rows)
    
df_bs = pd.DataFrame(body_rows[:len(body_rows)-3], columns=head_rows[0])
    
df_bs.drop('S. No.', axis=1, inplace=True)
df_bs.head(36)

Unnamed: 0,Name of State / UT,Total Confirmed cases (Including 111 foreign Nationals),Cured/Discharged/Migrated,Death
0,Andaman and Nicobar Islands,33,11,0.0
1,Andhra Pradesh,1097,231,31.0
2,Arunachal Pradesh,1,1,0.0
3,Assam,36,19,1.0
4,Bihar,251,46,2.0
5,Chandigarh,30,17,0.0
6,Chhattisgarh,37,32,0.0
7,Delhi,2625,869,54.0
8,Goa,7,7,0.0
9,Gujarat,3071,282,133.0


# Cleaning data

In [3]:
# date-time information
# ---------------------

now  = datetime.now()
df_bs['Date'] = now.strftime("%m/%d/%Y") 
df_bs['Date'] = pd.to_datetime(df_bs['Date'], format='%m/%d/%Y')
df_bs['Name of State / UT'] = df_bs['Name of State / UT'].str.replace('#', '')
df_bs.head(36)

Unnamed: 0,Name of State / UT,Total Confirmed cases (Including 111 foreign Nationals),Cured/Discharged/Migrated,Death,Date
0,Andaman and Nicobar Islands,33,11,0.0,2020-04-26
1,Andhra Pradesh,1097,231,31.0,2020-04-26
2,Arunachal Pradesh,1,1,0.0,2020-04-26
3,Assam,36,19,1.0,2020-04-26
4,Bihar,251,46,2.0,2020-04-26
5,Chandigarh,30,17,0.0,2020-04-26
6,Chhattisgarh,37,32,0.0,2020-04-26
7,Delhi,2625,869,54.0,2020-04-26
8,Goa,7,7,0.0,2020-04-26
9,Gujarat,3071,282,133.0,2020-04-26


In [4]:
df_bs['Name of State / UT'].unique()

array(['Andaman and Nicobar Islands', 'Andhra Pradesh',
       'Arunachal Pradesh', 'Assam', 'Bihar', 'Chandigarh',
       'Chhattisgarh', 'Delhi', 'Goa', 'Gujarat', 'Haryana',
       'Himachal Pradesh', 'Jammu and Kashmir', 'Jharkhand', 'Karnataka',
       'Kerala', 'Ladakh', 'Madhya Pradesh', 'Maharashtra', 'Manipur',
       'Meghalaya', 'Mizoram', 'Odisha', 'Puducherry', 'Punjab',
       'Rajasthan', 'Tamil Nadu', 'Telengana', 'Tripura', 'Uttarakhand',
       'Uttar Pradesh', 'West Bengal', '26917*'], dtype=object)

In [5]:
# latitude and longitude information
# ----------------------------------

lat = {'Delhi':28.7041, 'Haryana':29.0588, 'Kerala':10.8505, 'Rajasthan':27.0238,
       'Telengana':18.1124, 'Uttar Pradesh':26.8467, 'Ladakh':34.2996, 'Tamil Nadu':11.1271,
       'Jammu and Kashmir':33.7782, 'Punjab':31.1471, 'Karnataka':15.3173, 'Maharashtra':19.7515,
       'Andhra Pradesh':15.9129, 'Odisha':20.9517, 'Uttarakhand':30.0668, 'West Bengal':22.9868, 
       'Puducherry': 11.9416, 'Chandigarh': 30.7333, 'Chhattisgarh':21.2787, 'Gujarat': 22.2587, 
       'Himachal Pradesh': 31.1048, 'Madhya Pradesh': 22.9734, 'Bihar': 25.0961, 'Manipur':24.6637, 
       'Mizoram':23.1645, 'Goa': 15.2993, 'Andaman and Nicobar Islands': 11.7401, 'Assam' : 26.2006, 
       'Jharkhand': 23.6102, 'Arunachal Pradesh': 28.2180, 'Tripura': 23.9408, 'Nagaland': 26.1584, 
       'Meghalaya' : 25.4670}

long = {'Delhi':77.1025, 'Haryana':76.0856, 'Kerala':76.2711, 'Rajasthan':74.2179,
        'Telengana':79.0193, 'Uttar Pradesh':80.9462, 'Ladakh':78.2932, 'Tamil Nadu':78.6569,
        'Jammu and Kashmir':76.5762, 'Punjab':75.3412, 'Karnataka':75.7139, 'Maharashtra':75.7139,
        'Andhra Pradesh':79.7400, 'Odisha':85.0985, 'Uttarakhand':79.0193, 'West Bengal':87.8550, 
        'Puducherry': 79.8083, 'Chandigarh': 76.7794, 'Chhattisgarh':81.8661, 'Gujarat': 71.1924, 
        'Himachal Pradesh': 77.1734, 'Madhya Pradesh': 78.6569, 'Bihar': 85.3131, 'Manipur':93.9063, 
        'Mizoram':92.9376, 'Goa': 74.1240, 'Andaman and Nicobar Islands': 92.6586, 'Assam' : 92.9376, 
        'Jharkhand': 85.2799, 'Arunachal Pradesh': 94.7278, 'Tripura': 91.9882, 'Nagaland': 94.5624,
        'Meghalaya' : 91.3662}

df_bs['Latitude'] = df_bs['Name of State / UT'].map(lat)
df_bs['Longitude'] = df_bs['Name of State / UT'].map(long)

df_bs.head(36)

Unnamed: 0,Name of State / UT,Total Confirmed cases (Including 111 foreign Nationals),Cured/Discharged/Migrated,Death,Date,Latitude,Longitude
0,Andaman and Nicobar Islands,33,11,0.0,2020-04-26,11.7401,92.6586
1,Andhra Pradesh,1097,231,31.0,2020-04-26,15.9129,79.74
2,Arunachal Pradesh,1,1,0.0,2020-04-26,28.218,94.7278
3,Assam,36,19,1.0,2020-04-26,26.2006,92.9376
4,Bihar,251,46,2.0,2020-04-26,25.0961,85.3131
5,Chandigarh,30,17,0.0,2020-04-26,30.7333,76.7794
6,Chhattisgarh,37,32,0.0,2020-04-26,21.2787,81.8661
7,Delhi,2625,869,54.0,2020-04-26,28.7041,77.1025
8,Goa,7,7,0.0,2020-04-26,15.2993,74.124
9,Gujarat,3071,282,133.0,2020-04-26,22.2587,71.1924


In [6]:
df_bs.isna().sum()

Name of State / UT                                          0
Total Confirmed cases (Including 111 foreign Nationals)     0
Cured/Discharged/Migrated                                   0
Death                                                       1
Date                                                        0
Latitude                                                    1
Longitude                                                   1
dtype: int64

# Saving data

In [7]:
# saving data
# -----------

file_name = now.strftime("covid_data")+'.csv'
file_loc = 'C:\\Users\\AAKASH SHARMA\\Desktop\\covid datasets\\'
df_bs.to_csv(file_loc + file_name, index=False)

df_bs.head(36)

Unnamed: 0,Name of State / UT,Total Confirmed cases (Including 111 foreign Nationals),Cured/Discharged/Migrated,Death,Date,Latitude,Longitude
0,Andaman and Nicobar Islands,33,11,0.0,2020-04-26,11.7401,92.6586
1,Andhra Pradesh,1097,231,31.0,2020-04-26,15.9129,79.74
2,Arunachal Pradesh,1,1,0.0,2020-04-26,28.218,94.7278
3,Assam,36,19,1.0,2020-04-26,26.2006,92.9376
4,Bihar,251,46,2.0,2020-04-26,25.0961,85.3131
5,Chandigarh,30,17,0.0,2020-04-26,30.7333,76.7794
6,Chhattisgarh,37,32,0.0,2020-04-26,21.2787,81.8661
7,Delhi,2625,869,54.0,2020-04-26,28.7041,77.1025
8,Goa,7,7,0.0,2020-04-26,15.2993,74.124
9,Gujarat,3071,282,133.0,2020-04-26,22.2587,71.1924


In [8]:
import pandas as pd
import folium

In [9]:
cmap=folium.Map(location=[20.5937,78.9629],zoom_start=5)

In [10]:
dataset = pd.read_csv("covid_data.csv")

In [11]:
dataset.head()

Unnamed: 0,Name of State / UT,Total Confirmed cases (Including 111 foreign Nationals),Cured/Discharged/Migrated,Death,Date,Latitude,Longitude
0,Andaman and Nicobar Islands,27,11,0,2020-04-25,11.7401,92.6586
1,Andhra Pradesh,955,145,29,2020-04-25,15.9129,79.74
2,Arunachal Pradesh,1,1,0,2020-04-25,28.218,94.7278
3,Assam,36,19,1,2020-04-25,26.2006,92.9376
4,Bihar,223,46,2,2020-04-25,25.0961,85.3131


In [12]:
dataset.columns

Index(['Name of State / UT',
       'Total Confirmed cases (Including 111 foreign Nationals) ',
       'Cured/Discharged/Migrated', 'Death', 'Date', 'Latitude', 'Longitude'],
      dtype='object')

In [13]:
place = dataset[[ 'Latitude', 'Longitude']]

In [14]:
type(dataset)

pandas.core.frame.DataFrame

In [15]:
type(place)

pandas.core.frame.DataFrame

In [16]:
place = place.values.tolist()

In [17]:
place

[[11.7401, 92.6586],
 [15.9129, 79.74],
 [28.218000000000004, 94.7278],
 [26.2006, 92.9376],
 [25.0961, 85.3131],
 [30.7333, 76.7794],
 [21.2787, 81.8661],
 [28.7041, 77.1025],
 [15.2993, 74.124],
 [22.2587, 71.1924],
 [29.0588, 76.0856],
 [31.1048, 77.1734],
 [33.7782, 76.5762],
 [23.6102, 85.2799],
 [15.3173, 75.7139],
 [10.8505, 76.2711],
 [34.2996, 78.2932],
 [22.9734, 78.6569],
 [19.7515, 75.7139],
 [24.6637, 93.9063],
 [25.467, 91.3662],
 [23.1645, 92.9376],
 [20.9517, 85.0985],
 [11.9416, 79.8083],
 [31.1471, 75.3412],
 [27.0238, 74.2179],
 [11.1271, 78.6569],
 [18.1124, 79.0193],
 [23.9408, 91.9882],
 [30.0668, 79.0193],
 [26.8467, 80.9462],
 [22.9868, 87.855]]

In [18]:
type(place)

list

In [19]:
def lw(i,color):
    folium.Marker(location=point,
    popup=dataset['Name of State / UT'][i],
    icon=folium.Icon(color=color,icon='tint',icon_color="white")
                 ).add_to(cmap)

In [20]:
i=0
for point in place:
    lw(i,'red')
    i+=1

In [21]:
cmap

In [170]:
folium.LayerControl().add_to(cmap)

<folium.map.LayerControl at 0x237fba95a88>

In [171]:
cmap

In [188]:
dataset.columns


Index(['Name of State / UT',
       'Total Confirmed cases (Including 111 foreign Nationals) ',
       'Cured/Discharged/Migrated', 'Death', 'Date', 'Latitude', 'Longitude'],
      dtype='object')

In [208]:
places2= dataset[['Name of State / UT','Death']]
places2

Unnamed: 0,Name of State / UT,Death
0,Andaman and Nicobar Islands,0
1,Andhra Pradesh,29
2,Arunachal Pradesh,0
3,Assam,1
4,Bihar,2
5,Chandigarh,0
6,Chhattisgarh,0
7,Delhi,53
8,Goa,0
9,Gujarat,127


In [206]:
folium.Choropleth(
    geo_data="https://github.com/mickeykedia/India-Maps/blob/master/India_Administrative_Maps/states/india_states.geojson",
    name='choropleth',
    data=places2,
    columns=['Name of State / UT','Death'],
#    key_on='feature.id',
    fill_color='OrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Unemployment Rate (%)'
).add_to(m)

#folium.LayerControl().add_to(m)

m

JSONDecodeError: Expecting value: line 7 column 1 (char 6)

In [248]:
dataset.columns

Index(['Name of State / UT',
       'Total Confirmed cases (Including 111 foreign Nationals) ',
       'Cured/Discharged/Migrated', 'Death', 'Date', 'Latitude', 'Longitude'],
      dtype='object')

In [249]:

import pandas as pd


url = 'https://github.com/mickeykedia/India-Maps/blob/master/India_Administrative_Maps/states'
state_geo = f'{url}/india_states.geojson'
#state_unemployment = f'{url}/US_Unemployment_Oct2012.csv'
state_data = pd.read_csv("covid_data.csv")

m = folium.Map(location=[48, -102], zoom_start=3)

folium.Choropleth(
    geo_data=state_geo,
    name='choropleth',
    data=state_data,
    columns=['Name of State / UT', 'Cured/Discharged/Migrated'],
  #  key_on='feature.id',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Unemployment Rate (%)'
).add_to(m)

folium.LayerControl().add_to(m)

m

JSONDecodeError: Expecting value: line 7 column 1 (char 6)

In [235]:

url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data'
state_geo = f'{url}/india_states.geojson'
state_unemployment = f'{url}/US_Unemployment_Oct2012.csv'
s = pd.read_csv(state_unemployment)

In [237]:
type(s)

pandas.core.frame.DataFrame

In [238]:
s.head()

Unnamed: 0,State,Unemployment
0,AL,7.1
1,AK,6.8
2,AZ,8.1
3,AR,7.2
4,CA,10.1


In [239]:
s.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 2 columns):
State           50 non-null object
Unemployment    50 non-null float64
dtypes: float64(1), object(1)
memory usage: 928.0+ bytes


In [246]:
s.values
feature.id

NameError: name 'feature' is not defined