# **Scraping PSE End of Day Data**

The goal of this project is to automate the process of scraping Blue Chip and Class A stocks data from the Philippine Stock Exchange Index (PSEI). The data is sourced from Investagrams, a platform providing stock market analytics and historical data.

#### *Here's the data of the Company that scrape*

1. Ayala Corporation - **AC**
2. Aboitiz Equity Ventures, Inc. - **AEV**
3. Alliance Global Group, Inc. - **AGI**
4. Ayala Land, Inc. - **ALI**
5. Aboitiz Power Corporation - **AP**
6. BDO Unibank, Inc. - **BDO**
7. Belle Corporation - **BEL**
8. Bloomberry Resorts Corporation - **BLOOM**
9. Bank of the Philippine Islands - **BPI**
10. Cosco Capital, Inc. - **COSCO**
11. Century Properties Group, Inc. - **CPG**
12. DMCI Holdings, Inc. - **DMC**
13. D&L Industries, Inc. - **DNL**
14. Empire East Land Holdings, Inc. - **ELI**
15. East West Banking Corporation - **EW**
16. First Gen Corporation - **FGEN**
17. Globe Telecom, Inc. - **GLO**
18. GT Capital Holdings, Inc. - **GTCAP**
19. International Container Terminal Services, Inc. - **ICT**
20. Jollibee Foods Corporation - **JFC**
21. JG Summit Holdings, Inc. - **JGS**
22. LT Group, Inc. - **LTG**
23. Metropolitan Bank & Trust Company - **MBT**
24. Megaworld Corporation - **MEG**
25. Manila Electric Company (MERALCO) - **MER**
26. Manila Water Company, Inc. - **MWC**
27. Megawide Construction Corporation - **MWIDE**
28. Nickel Asia Corporation - **NIKL**
29. Oriental Vision Mining Philippines Corp. - **OV**
30. Philippine Business Bank, Inc. - **PBB**
31. Petron Corporation - **PCOR**
32. Puregold Price Club, Inc. - **PGOLD**
33. Philippine National Bank - **PNB**
34. Philex Mining Corporation - **PX**
35. Rizal Commercial Banking Corporation - **RCB**
36. RFM Corporation - **RFM**
37. Robinsons Land Corporation - **RLC**
38. Robinsons Retail Holdings, Inc. - **RRHI**
39. Semirara Mining and Power Corporation - **SCC**
40. Security Bank Corporation - **SECB**
41. Shang Properties, Inc. - **SHNG**
42. SM Investments Corporation - **SM**
43. San Miguel Corporation - **SMC**
44. SM Prime Holdings, Inc. - **SMPH**
45. PLDT Inc. - **TEL**
46. Union Bank of the Philippines - **UBP**
47. Universal Robina Corporation - **URC**
48. Vista Land & Lifescapes, Inc. - **VLL**


In [1]:
import asyncio
import nest_asyncio
import pandas as pd
from bs4 import BeautifulSoup

from playwright.async_api import async_playwright

nest_asyncio.apply()

In [None]:
pse_stocks = ['AC', 'AEV', 'AGI', 'ALI', 'AP', 'BDO', 'BEL', 'BLOOM', 'BPI', 'COSCO', 'CPG', 'DMC', 'DNL', 'ELI', 'EW', 'FGEN', 'GLO', 'GTCAP', 'ICT', 'JFC', 'JGS', 'LTG', 'MBT', 'MEG', 'MER', 'MWC', 'MWIDE', 'NIKL', 'OV', 'PBB', 'PCOR', 'PGOLD', 'PNB', 'PX', 'RCB', 'RFM', 'RLC', 'RRHI', 'SCC', 'SECB', 'SHNG', 'SM', 'SMC', 'SMPH', 'TEL', 'UBP', 'URC', 'VLL']

In [None]:
async def scraping_data(url):
    soup = None
    browser = None
    try:
        async with async_playwright() as p:
            browser = await p.chromium.launch(headless=True)
            page = await browser.new_page()

            await page.goto(url)
            await page.click("a.nav-link[data-ng-click=\"switchTab('Historical')\"]")
            await asyncio.sleep(2)
            await page.wait_for_selector("#assetInfoDivContainer", timeout=20000)

            rendered_html = await page.content()
            soup = BeautifulSoup(rendered_html, "html.parser")
        
    except Exception as e:
        print(f"An error occurred: {e}")

    finally:
        if browser:
            await browser.close()

    return soup.find('div', {'id':'stockHistoricalData'}).find('table')

def scrape_with_playwright(url):
    loop = asyncio.get_event_loop()
    if loop.is_running():
        return loop.create_task(scraping_data(url))
    else:
        return loop.run_until_complete(scraping_data(url))


for pstocks in pse_stocks:
    table_EOD_stocks = ''
    table_html = scrape_with_playwright(f'https://www.investagrams.com/Stock/PSE:{pstocks}')
    end_of_data_list = []
    if isinstance(table_html, asyncio.Future): 
        table_html = asyncio.run(table_html)
        table_EOD_stocks = table_html.find('tbody')
        
        for trow in table_EOD_stocks.find_all('tr'):
            eod_data = {}
            eod_data['Date'] = trow.find_all('td')[0].getText()
            eod_data['Close'] = trow.find_all('td')[1].getText()
            eod_data['Change'] = trow.find_all('td')[2].getText()
            eod_data['Change Percent'] = trow.find_all('td')[3].getText()
            eod_data['Volume'] = trow.find_all('td')[4].getText()
            eod_data['Shares'] = trow.find_all('td')[5].getText()
            eod_data['Net Foreign'] = trow.find_all('td')[6].getText()
            eod_data['Low'] = trow.find_all('td')[7].getText()
            eod_data['High'] = trow.find_all('td')[8].getText()
            eod_data['Open'] = trow.find_all('td')[9].getText()
            eod_data['Trades'] = trow.find_all('td')[10].getText()

            end_of_data_list.append(eod_data)

        df = pd.DataFrame(end_of_data_list)
        df['Close'] = df['Close'].replace(',', '',regex=True).astype(float)
        df['Low'] = df['Low'].replace(',', '',regex=True).astype(float)
        df['High'] = df['High'].replace(',', '',regex=True).astype(float)
        df['Open'] = df['Open'].replace(',', '',regex=True).astype(float)
        df['Date'] = pd.to_datetime(df['Date'])

        df.to_csv(f'data/{pstocks}.csv')