In [None]:
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import numpy as np
import time
import calendar
from datetime import datetime
import requests
from bs4 import BeautifulSoup

In [1]:
def download_revenue_profit(code, name):
    """
    Creates the revenue profit file.

    Parameters
    ----------
    code : string
        security code of the company.
    name : 
        security id of the company.

    Methods:
    --------

    create_driver : creates the chrome driver.

    download : extracts the data from the page and saves to a csv file.

    """
    path = os.path.join(os.getcwd(), os.path.join("Data", "Revenue"))

    if not os.path.exists(path):
        os.makedirs("Data/Revenue")

    def create_driver():
        """
        Creates a Chrome Driver.

        Returns
        --------
        driver : driver
            chrome web driver.
        """

        chromeOptions = webdriver.ChromeOptions()
        chromeOptions.add_argument("--headless")
        chromeOptions.add_experimental_option(
            "prefs", {"download.default_directory": path})
        driver = webdriver.Chrome(
            ChromeDriverManager().install(), options=chromeOptions)
        return driver

    def download():
        """
        extracts the data from the page and saves to a csv file.
        """
        columns = ["security code", "security name", 'revenue',
                   'income', 'expenditure', 'profit', 'eps', "year", "quartile"]
        code_df = pd.DataFrame(columns=columns)
        for q in range(55, 109):
            try:
                url = "https://www.bseindia.com/corporates/results.aspx?Code=" + \
                    str(code) + "&Company=" + str(name) + \
                    "&qtr=" + str(q) + "&RType=D"
                driver.get(url)
                html = driver.page_source
                soup = BeautifulSoup(html, "lxml")

                table = soup.find_all(
                    "table", attrs={"id": "ContentPlaceHolder1_tbl_typeID"})
                table = pd.read_html(str(table))[0]
                table = table[[0, 1]]
                table.dropna(inplace=True)
                table = table.transpose()
                table.columns = table.iloc[0]
                table = table[1:]
                table.columns = map(str.lower, table.columns)
                table.drop(["description"], inplace=True, axis=1)
                try:
                    table["date begin"] = pd.to_datetime(table["date begin"])
                    date = table.iloc[0]["date begin"]
                    table["quartile"] = (date.month-1)//3 + 1
                    table["year"] = date.year
                    table["security name"] = name
                    table["security code"] = code
                    cols = table.columns
                    mycols = ['revenue', 'income',
                              'expenditure', 'profit', 'eps']
                    row = {}
                    row["security name"] = name
                    row["security code"] = code
                    row["year"] = date.year
                    row["quartile"] = (date.month-1)//3 + 1
                    for my in mycols:
                        try:
                            res = [c for c in cols if my in c]
                            if my == "income":
                                p = [c for c in res if "total income" == c]
                                res = p or res
                            elif my == "profit":
                                p = [c for c in res if "net profit" == c]
                                res = p or res
                            elif my == "expenditure":
                                p = [c for c in cols if "expenses" in c]
                                res = p or res
                            elif my == "eps":
                                a = "Basic for discontinued & continuing operation"
                                b = "Diluted for discontinued & continuing operation"
                                p = [c for c in cols if a.lower()
                                     in c or b.lower() in c]
                                res = p or res
                            elif my == "revenue":
                                p = [c for c in cols if "sales" in c]
                                res = p or res
                                # row["revenue"] = table[res].values[0][0]
                                # continue
                                pass
                            row[my] = table[res].values[0][0]
                        except:
                            row[my] = ""
                            traceback.print_exc()
                    code_df = code_df.append(row, ignore_index=True)
                except:
                    traceback.print_exc()
            except:
                traceback.print_exc()
        code_df.to_csv(os.path.join(path, str(code)+".csv"), index=None)
    driver = create_driver()
    download()

In [2]:
def download_from_money_control(code):

    def create_driver():
        chromeOptions = webdriver.ChromeOptions()
        # chromeOptions.add_argument("--headless")
        chromeOptions.add_experimental_option("prefs",{"download.default_directory":path})
        driver = webdriver.Chrome(ChromeDriverManager().install(), options = chromeOptions)
        return driver
    
    def download():
        url = "https://www.moneycontrol.com/"
        driver.get(url)
        time.sleep(1)
        driver.get(url)
        search = '//*[@id="search_str"]'
        search = driver.find_element_by_xpath(search)
        search.clear()
        search.send_keys(code)
        search.send_keys(Keys.RETURN)
        quaterly = "/html/body/div[10]/div[2]/div[2]/div[2]/div[18]/div[2]/div/div[2]/div[2]/div[2]/div[2]/div/ul/li[3]/a"
        quaterly = driver.find_element_by_xpath(quaterly)
        myurl = quaterly.get_attribute("href")
        df = pd.DataFrame(columns=["year","quartile","eps"])
        datetime_format = "%b '%y"
        for i in range(1,12):
            url = myurl.replace("#","/"+str(i)+"#")
            driver.get(url)
            start = "/html/body/section/div[2]/div/div[2]/div[2]/div/div[2]/div/div[1]/table/tbody/tr[1]"
            head = driver.find_element_by_xpath(start)
            values_tag = '/html/body/section/div[2]/div/div[2]/div[2]/div/div[2]/div/div[1]/table/tbody/tr[34]'
            values = driver.find_element_by_xpath(values_tag)
            for date,eps in zip(head.find_elements_by_tag_name("td")[1:-1],values.find_elements_by_tag_name("td")[1:-1]):
                date = datetime.strptime(date.text.strip(),datetime_format)
                year = date.year
                month = date.month
                try:
                    eps = float(eps.text)
                except:
                    eps = np.nan
                q = (month-1)//3 + 1
                df = df.append(pd.DataFrame([[year,q,eps]],columns=["year","quartile","eps"]),ignore_index=True)
        return df

    def modify_save(df):
        rev = pd.read_csv(os.path.join(revenuepath,code+".csv"))
        mod = pd.merge(rev,df,on=["year","quartile"])
        mod["eps"] = mod.apply(lambda x : x["eps_x"] if not np.isnan(x["eps_x"]) else x["eps_y"] if not np.isnan(x["eps_y"]) else np.nan,axis=1)
        mod.drop(columns=["eps_x","eps_y"],inplace=True)
        mod.to_csv(os.path.join(revenuepath,str(code)+".csv"),index=None)

    driver = create_driver()
    df = download()
    driver.quit()
    modify_save(df)

In [None]:
# download_revenue_profit("500002","ABB India Limited")

In [None]:
# download_from_money_control("500002")