In [1]:
import requests
from bs4 import BeautifulSoup
import json
import logging
import sys

In [2]:
logger = logging.getLogger("districts-scraping")
logger.setLevel(level=logging.DEBUG)

In [3]:
logFileFormatter = logging.Formatter(
    fmt=f"%(levelname)s %(asctime)s (%(relativeCreated)d) \t %(pathname)s F%(funcName)s L%(lineno)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)
fileHandler = logging.FileHandler(filename='./districts-scraping.log')
fileHandler.setFormatter(logFileFormatter)
fileHandler.setLevel(level=logging.DEBUG)

logger.addHandler(fileHandler)

In [4]:
r = requests.get("https://www.thecivilindia.com/pages/governence/districts.html")
if r.status_code == 200:
    logger.info("Scraped State wise District info successfully.")
else:
    logger.error("There was an error: ", r.status_code)

In [5]:
soup = BeautifulSoup(r.text, 'html.parser')

In [6]:
tq_div = soup.find("div", class_="three_quarter")

In [7]:
districts_data = tq_div.select("tr")

In [8]:
del districts_data[0]

In [9]:
district_state_mapping: dict = {}

In [10]:
for data in districts_data:
    dist_data = data.find_all("td")
    if dist_data[1].find("h2") != None:
        state_name: str = dist_data[1].find("h2").get_text().strip().lower()
        continue
    dist_name: str = dist_data[1].get_text().strip().lower()
    if state_name in district_state_mapping:
        dist_list = district_state_mapping[state_name]
        dist_list.append(dist_name)
    else:
        district_state_mapping[state_name] = [dist_name]
    logger.info(f"Added {dist_name} to {state_name} successfuly")

In [11]:
with open("stateWiseDistrict.json", "w") as fp:
    json.dump(district_state_mapping, fp)
    logger.info("Data saved to json file succesfully!")