In [3]:
import requests
import pandas as pd
import concurrent.futures
import time
from bs4 import BeautifulSoup

class IpoScrape:
    def __init__(self, start_date: str, end_date: str) -> None:
        self.type = 'table'
        self.classtype = 'W(100%)'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
        }
        self.baseurl = f'https://finance.yahoo.com/calendar/ipo?day='
        self.dates = self.__update_dates(start_date, end_date)
        self.urls = self.__update_urls()
        self.ipo_info = []

    def __update_dates(self, start_date: str, end_date: str) -> list:
        """
        Private method to update the list of dates to be scraped.
        """
        dates = list(pd.date_range(start=start_date, end=end_date))
        dates = [str(i.date()) for i in dates]
        return dates

    def __update_urls(self) -> list:
        """
        Private method to update the list of urls to be scraped.
        """
        return [self.baseurl + i for i in self.dates]

    def __scrape_ipo_info(self, url: str) -> list:
        """
        Private method to scrape IPO information from a single url.
        """
        time.sleep(2)
        results = []
        with requests.Session() as session:
            session.headers.update(self.headers)
            response = session.get(url)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, 'html.parser')
            ipo_table = soup.find('table', {'class': "W(100%)"})
            if ipo_table is not None:
                # Find all the rows in the table
                rows = ipo_table.find_all('tr')
                for row in rows:
                    cells = row.find_all('td')
                    if len(cells) > 0:
                        Symbol = cells[0].text
                        Name = cells[1].text
                        Exchange = cells[2].text
                        Date = cells[3].text
                        Price_range = cells[4].text
                        Price = cells[5].text
                        Currency = cells[6].text
                        Shares = cells[7].text
                        Actions = cells[8].text
                        results.append({'Symbol': Symbol, 'Name': Name, 'Exchange': Exchange, 'Date': Date,
                                        'PriceRange': Price_range, 'Price': Price, 'Currency': Currency,
                                        'Shares': Shares, 'Actions': Actions})

        return results

    def scrape(self) -> None:
        """
        Method to scrape IPO information from all urls using threads.
        """
        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
            results = list(executor.map(self.__scrape_ipo_info, self.urls))
        self.ipo_info = results

    def get_results(self) -> list:
        """
        Method to return the scraped IPO information.
        """
        return self.ipo_info

    @property
    def urls_(self) -> list:
        """
        Method to return the list of urls to be scraped.
        """
        return self.urls

    def get_dataframe(self):
      """ call this method to convert the results to pandas DataFrame"""
        new_res = []
        for i in self.ipo_info:
            if len(i) != 0:
                new_res += i
        return pd.DataFrame(new_res)


In [4]:
ipodata = IpoScrape('2022-07-01','2022-07-02')
ipodata.scrape()

In [6]:
ipodata.get_dataframe()

Unnamed: 0,Symbol,Name,Exchange,Date,PriceRange,Price,Currency,Shares,Actions
0,ARKO.JK,PT Arkora Hydro Tbk,Jakarta,"Jun 20, 2022",0.02 - 0.02,300.0,IDR,579900000,Priced
1,CHEM.JK,PT Chemstar Indonesia Tbk,Jakarta,"Jun 20, 2022",0.01 - 0.01,150.0,IDR,500000000,Priced
2,TRGU.JK,PT Cerestar Indonesia Tbk,Jakarta,"Jun 17, 2022",0.01 - 0.01,210.0,IDR,1500000000,Priced
3,SWID.JK,PT Saraswanti Indoland Develop,Jakarta,"Jun 17, 2022",0.01 - 0.01,200.0,IDR,340000000,Priced
4,ALTE.L,Alteration Earth PLC,LSE,"Jun 17, 2022",0.12 - 0.12,0.1,GBP,9000000,Priced
5,ALTE.L,Alteration Earth PLC,LSE,"Jun 17, 2022",0.05 - 0.05,0.04,GBP,8999998,Priced
6,OCN.AX,Oceana Lithium Ltd,ASX,"Apr 04, 2022",0.15 - 0.15,0.2,AUD,30000000,Priced
7,GEV.PR,GEVORKYAN as,Prague,"Jun 01, 2022",10.79 - 13.14,248.0,CZK,2932800,Priced
