# Setting Up Required Libraries
Import required libraries handling different data formats.

In [2]:
# Import required libraries
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd

# Ensure all libraries are imported correctly
print("Libraries imported successfully.")

Libraries imported successfully.


# Handle XML API Response
Make request to the XML API endpoint, parse the XML response using ElementTree, and extract relevant data.

In [9]:
# Handle XML API Response
import urllib3
from requests.adapters import HTTPAdapter
from urllib3.util.ssl_ import create_urllib3_context

# Suppress SSL verification warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


# Create a custom adapter to handle SSL
class CustomAdapter(HTTPAdapter):
    def init_poolmanager(self, *args, **kwargs):
        context = create_urllib3_context()
        context.options |= 0x4  # Enable legacy renegotiation
        context.check_hostname = False
        kwargs["ssl_context"] = context
        return super(CustomAdapter, self).init_poolmanager(*args, **kwargs)


# Create a session with the custom adapter
session = requests.Session()
session.verify = False
session.mount("https://", CustomAdapter())

# Define the URL for the XML API
erldc_api_url = "https://app.erldc.in/api/LiveDataScheduler/Get/RegionStatistics"

# Send a GET request to the URL
response = session.get(erldc_api_url)

In [15]:
display(response.json())

{'StatId': 438940,
 'Date': '040325',
 'Time': '181458',
 'Freq': '49.92',
 'RevNo': '81',
 'DemandMet': '21657.85156',
 'DSMMet': '593.084351'}

In [11]:
# Parse the JSON response
erldc_data_json = response.json()

# Create a list with the single JSON object
erldc_data = {
    "DSMMet": erldc_data_json["DSMMet"],
    "Date": erldc_data_json["Date"],
    "DemandMet": erldc_data_json["DemandMet"],
    "Freq": erldc_data_json["Freq"],
    "RevNo": erldc_data_json["RevNo"],
    "StatId": erldc_data_json["StatId"],
    "Time": erldc_data_json["Time"],
}

for key, value in erldc_data.items():
    print(f"{key}: {value}")

DSMMet: 593.084351
Date: 040325
DemandMet: 21657.85156
Freq: 49.92
RevNo: 81
StatId: 438940
Time: 181458


# Process JSON API Response
Make request to the JSON API endpoint, parse JSON response, and extract meaningful data from the structure.

In [16]:
# Process JSON API Response

# Define the URL for the JSON API endpoint
json_api_url = "https://www.srldc.in/indexPageDataInEvery5min"

# Make a GET request to the JSON API endpoint using the existing session
response = session.get(json_api_url)

In [17]:
response.status_code

200

In [24]:
print(json.dumps(response.json(), indent=4))

{
    "localDate": {
        "year": 2025,
        "month": "MARCH",
        "chronology": {
            "id": "ISO",
            "calendarType": "iso8601"
        },
        "era": "CE",
        "leapYear": false,
        "dayOfMonth": 4,
        "monthValue": 3,
        "dayOfWeek": "TUESDAY",
        "dayOfYear": 63
    },
    "localDateForUpdate": 1741093200000,
    "dsmRate": 454.07,
    "sRDemand": 53530.32,
    "allIndiaDemand": 206687.81,
    "frequency": 49.97,
    "revisionNo": null,
    "sRDemandForIndex": 53530,
    "dsmRateForIndex": 206687
}


In [7]:
for key, value in response.json().items():
    print(f"{key}: {value}")

localDate: {'year': 2025, 'month': 'MARCH', 'chronology': {'id': 'ISO', 'calendarType': 'iso8601'}, 'era': 'CE', 'leapYear': False, 'dayOfMonth': 4, 'monthValue': 3, 'dayOfWeek': 'TUESDAY', 'dayOfYear': 63}
localDateForUpdate: 1741092900000
dsmRate: 454.07
sRDemand: 53210.93
allIndiaDemand: 205507.84
frequency: 49.97
revisionNo: None
sRDemandForIndex: 53210
dsmRateForIndex: 205507


# Web Scraping Setup
Configure BeautifulSoup for HTML parsing and make request to the target website.

Newnr: https://newnr.nrldc.in/

In [None]:
# Web Scraping Setup

# Define the URL for the target website
newnr_url = "https://newnr.nrldc.in/"

# Make a GET request to the target website
response = session.get(newnr_url, verify=False)

In [29]:
response.status_code

200

In [32]:
type(response.text)

str

In [None]:
# display(HTML(response.text)) # It will embed the HTML content in the Jupyter notebook
print(response.text)

<!-- JavaScript for handling external link clicks -->

<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <title>NRLDC</title>

    <link rel="shortcut icon" href="https://newnr.nrldc.in/images/favicon.ico" type="image/x-icon">
    <link rel="icon" href="https://newnr.nrldc.in/images/favicon.ico" type="image/x-icon">

    <meta name="csrf_token" content="QTXqEygwNCTdBQaUTXwAOwkWtSpM0fS45Fr4TV1j" />
    <!-- Google Font: Source Sans Pro -->
    <link rel="stylesheet"
        href="https://fonts.googleapis.com/css?family=Source+Sans+Pro:300,400,400i,700&display=fallback">
    <!-- Font Awesome -->
    <link rel="stylesheet" href="https://newnr.nrldc.in/css/fontawesome.min.css">
    <!-- icheck bootstrap -->
    <link rel="stylesheet" href="https://newnr.nrldc.in/css/icheck-bootstrap.min.css">
    <link rel="stylesheet" href="https://newnr.nrldc.in/css/bootstrap-datepicker.css">
    <link rel="st

In [38]:
# Parse the HTML content using BeautifulSoup

# Create a BeautifulSoup object
soup = BeautifulSoup(response.text, "html.parser")

# Print the title of the webpage
print(soup.title)

<title>NRLDC</title>


In [None]:
# Find all the the "h2 class="m-0" elements
h2_elements = soup.find_all("h2", class_="m-0")

for h2 in h2_elements:
    print(h2.text)

Frequency
All India Demand
NR Demand
Peak Demand Today
NR Generation
NR RE Generation
Current Sch Revision
Last Updated On


In [None]:
# Find all content divs
content_divs = soup.find_all("div", class_="content")

display(content_divs)

[<div class="content">
 <h2 class="m-0">Frequency</h2>
 <p>49.89 Hz</p>
 </div>,
 <div class="content">
 <h2 class="m-0">All India Demand</h2>
 <p>207411 MW</p>
 </div>,
 <div class="content">
 <h2 class="m-0">NR Demand</h2>
 <p>54584 MW</p>
 </div>,
 <div class="content">
 <h2 class="m-0">Peak Demand Today</h2>
 <p>61114 MW</p>
 </div>,
 <div class="content">
 <h2 class="m-0">NR Generation</h2>
 <p>61575 MW</p>
 </div>,
 <div class="content">
 <h2 class="m-0">NR RE Generation</h2>
 <p>817 MW</p>
 </div>,
 <div class="content">
 <h2 class="m-0">Current Sch Revision</h2>
 <p>120</p>
 </div>,
 <div class="content">
 <h2 class="m-0">Last Updated On</h2>
 <p>04-03-25 18:35:00</p>
 </div>,
 <div class="content">
 <h4></h4>
 <span>06 Jan 2025</span>
 </div>,
 <div class="content">
 <h4></h4>
 <span>06 Jan 2025</span>
 </div>,
 <div class="content">
 <h4></h4>
 <span>06 Jan 2025</span>
 </div>,
 <div class="content">
 <h4></h4>
 <span>06 Jan 2025</span>
 </div>,
 <div class="content">
 <h4></

In [52]:
# For every content div, find the h2 and p elements
for content_div in content_divs:
    h2 = content_div.find("h2", class_="m-0")
    p = content_div.find("p")
    if h2 and p:
        print(f"{h2.text}: {p.text}")

Frequency: 49.89 Hz
All India Demand: 207411 MW
NR Demand: 54584 MW
Peak Demand Today: 61114 MW
NR Generation: 61575 MW
NR RE Generation: 817 MW
Current Sch Revision: 120
Last Updated On: 04-03-25 18:35:00


# Another website setup 
WRLDC: https://wrldc.in/content/English/index.aspx

In [65]:
# Define the URL for the target website
wrldc_url = "https://wrldc.in/content/English/index.aspx"

# Make a GET request to the target website
wrldc_response = session.get(wrldc_url, verify=False)
wrldc_response.status_code

200

After manual checking into the html response, I came to know that the required value, pairs are in ```<div class="box">```

In [66]:
# Create a BeautifulSoup object
soup = BeautifulSoup(wrldc_response.text, "html.parser")

# Find all the the <div class="box"> elements
box_elements = soup.find_all("div", class_="box")
box_elements

[<div class="box">
 <span>
                                                         date time
                                                     </span> <strong id="dataDateTime">
                                                         04 Mar 2025 19:04
                                                     </strong>
 </div>,
 <div class="box">
 <span>
                                                         Demand
                                                     </span> <strong id="dataDemand">
                                                         71005
                                                     </strong>
 </div>,
 <div class="box">
 <span>
                                                         Frequency
                                                     </span> <strong id="dataFrequency">
                                                         49.89
                                                     </strong>
 </div>,
 <div class="box" style="display:none;">


In [67]:
# Create a dictionary to store the results
data_dict = {}

# Loop through each box element
for box in box_elements:
    # Find the span and strong elements
    span_text = box.find("span").text.strip()
    strong_element = box.find("strong")

    # Get the ID and value
    if strong_element:
        data_id = strong_element.get("id")
        data_value = strong_element.text.strip()
        data_dict[data_id] = {"label": span_text, "value": data_value}

# Convert to DataFrame for better visualization

df = pd.DataFrame.from_dict(data_dict, orient="index")
display(df)

Unnamed: 0,label,value
dataDateTime,date time,04 Mar 2025 19:04
dataDemand,Demand,71005
dataFrequency,Frequency,49.89
dataDeviationRate,Deviation Rate (Rs/Unit),8
dataRenewable,Renewable,6400
dataRevision,Revision nos,122


In [74]:
box_elements = soup.find_all("div", class_="box")

for box in box_elements:
    span_text = box.find("span").text.strip()
    strong_element = box.find("strong")
    if strong_element:
        data_id = strong_element.get("id")
        data_value = strong_element.text.strip()
        print(f"{span_text:<25}: {data_value}")

date time                : 04 Mar 2025 19:04
Demand                   : 71005
Frequency                : 49.89
Deviation Rate (Rs/Unit) : 8
Renewable                : 6400
Revision nos             : 122
