In [46]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# List of URLs to scrape
urls = [
    'https://www.screener.in/company/VOLTAS/consolidated/',
    'https://www.screener.in/company/BLUESTARCO/consolidated/',
    'https://www.screener.in/company/CROMPTON/consolidated/',
    'https://www.screener.in/company/ORIENTELEC/',
    'https://www.screener.in/company/HAVELLS/consolidated/',
    'https://www.screener.in/company/SYMPHONY/consolidated/',
    'https://www.screener.in/company/WHIRLPOOL/'
]

# Lists to hold the extracted values for each category per company
company_names = []
reserves_list = []
borrowings_list = []
total_liabilities_list = []
fixed_assets_list = []
investments_list = []
total_assets_list = []

# Function to scrape a single URL
def scrape_company_data(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract the company name from the URL
    company_name = url.split('/')[-3].upper() if 'consolidated' in url else url.split('/')[-2].upper()
    company_names.append(company_name)

    # Initialize variables to store the latest values for each feature
    reserves = None
    borrowings = None
    total_liabilities = None
    fixed_assets = None
    investments = None
    total_assets = None

    # Find the relevant section in the HTML
    sec = soup.find_all('section', id='balance-sheet')
    
    # Loop through the section to find the data table
    for s in sec:
        lvl1 = s.find_all('div', class_='responsive-holder fill-card-width')
        for l in lvl1:
            tables = l.find_all('table')
            for t in tables:
                tbodies = t.find_all('tbody')
                for tbody in tbodies:
                    tr_elements = tbody.find_all('tr')

                    # Loop through each row in the table
                    for tr in tr_elements:
                        td_elements = tr.find_all('td')
                        if len(td_elements) > 1:
                            # Extract the label (first column)
                            label = td_elements[0].get_text(strip=True).lower()

                            # Extract the last numerical value from the row (i.e., the most recent value)
                            try:
                                last_value = float(td_elements[-1].get_text(strip=True).replace(',', ''))
                            except ValueError:
                                last_value = None  # In case of invalid number

                            # Assign the last value to the correct category
                            if 'reserves' in label:
                                reserves = last_value
                            elif 'borrowings' in label:
                                borrowings = last_value
                            elif 'total liabilities' in label:
                                total_liabilities = last_value
                            elif 'fixed assets' in label:
                                fixed_assets = last_value
                            elif 'investments' in label:
                                investments = last_value
                            elif 'total assets' in label:
                                total_assets = last_value

    # Append the latest values for this company to the lists
    reserves_list.append(reserves)
    borrowings_list.append(borrowings)
    total_liabilities_list.append(total_liabilities)
    fixed_assets_list.append(fixed_assets)
    investments_list.append(investments)
    total_assets_list.append(total_assets)

# Loop through each URL and scrape data
for url in urls:
    scrape_company_data(url)

# Create a DataFrame to store the data
df = pd.DataFrame({
    'Company': company_names,
    'Reserves': reserves_list,
    'Borrowings': borrowings_list,
    'Total Liabilities': total_liabilities_list,
    'Fixed Assets': fixed_assets_list,
    'Investments': investments_list,
    'Total Assets': total_assets_list
})

# Show the DataFrame
df

Unnamed: 0,Company,Reserves,Borrowings,Total Liabilities,Fixed Assets,Investments,Total Assets
0,VOLTAS,5787.0,744.0,11994.0,548.0,3508.0,11994.0
1,BLUESTARCO,2569.0,243.0,6611.0,1165.0,267.0,6611.0
2,CROMPTON,2871.0,683.0,6082.0,3197.0,689.0,6082.0
3,ORIENTELEC,618.0,112.0,1451.0,236.0,37.0,1451.0
4,HAVELLS,7384.0,303.0,12433.0,3988.0,20.0,12433.0
5,SYMPHONY,735.0,170.0,1210.0,327.0,373.0,1210.0
6,WHIRLPOOL,3098.0,223.0,5358.0,942.0,597.0,5358.0


In [47]:
df.to_csv("Pokemon_Item_Inventory.csv",index=False)