# Scrapping Market Info in Argentina using Selenium and lxml

In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
import requests
import re 
import pandas as pd
import datetime
import investpy
import numpy as np
import lxml

#For Options
def options_chain():
    
    #Data From Allaria Ledesma
    page = requests.get('https://www.allaria.com.ar/es/Opcion#0')
    doc = lxml.html.fromstring(page.content)

    stock=[]
    ticker=[]
    opt_type=[]
    exp_date=[]
    strike=[]
    price=[]
    bid_q=[]
    bid_p=[]
    ask_p=[]
    ask_q=[]
    volume_q=[]
    volume_money=[]
    try:
        for t in range(1,1000):
            if t%2!=0:
                #Paths
                stock_path='//*[@id="tableOpcionesAcciones"]/tbody/tr['+str(t)+']/td[1]/text()'
                ticker_path='//*[@id="tableOpcionesAcciones"]/tbody/tr['+str(t)+']/td[2]/a/strong/text()'
                type_path='//*[@id="tableOpcionesAcciones"]/tbody/tr['+str(t)+']/td[3]/text()'
                exp_path='//*[@id="tableOpcionesAcciones"]/tbody/tr['+str(t)+']/td[4]/text()'
                strike_path='//*[@id="tableOpcionesAcciones"]/tbody/tr['+str(t)+']/td[5]/text()'
                price_path='//*[@id="tableOpcionesAcciones"]/tbody/tr['+str(t)+']/td[6]/strong/text()'
                bidq_path='//*[@id="tableOpcionesAcciones"]/tbody/tr['+str(t)+']/td[8]/text()'
                bidp_path='//*[@id="tableOpcionesAcciones"]/tbody/tr['+str(t)+']/td[9]/text()'
                askp_path='//*[@id="tableOpcionesAcciones"]/tbody/tr['+str(t)+']/td[10]/text()'
                askq_path='//*[@id="tableOpcionesAcciones"]/tbody/tr['+str(t)+']/td[11]/text()'
                volumeq_path='//*[@id="tableOpcionesAcciones"]/tbody/tr['+str(t)+']/td[12]/text()'
                volmoney_path='//*[@id="tableOpcionesAcciones"]/tbody/tr['+str(t)+']/td[13]/text()'

                #stock
                stock.append(re.sub(r"[,. ]","",doc.xpath(stock_path)[0]))
                #ticker
                ticker.append(doc.xpath(ticker_path)[0])
                #Option type
                opt_type.append(re.sub(r" ","",doc.xpath(type_path)[0]))
                #Exp date
                exp_date.append(re.sub(r" ","",doc.xpath(exp_path)[0]))
                #strike
                strike.append(float(re.sub(r"[\r\n ]","",doc.xpath(strike_path)[0])))
                #price
                price.append(float(re.sub(r"[,. ]","",doc.xpath(price_path)[0]))/100)
                #Bid Q
                bid_q.append(float(re.sub(r"[.,]","",doc.xpath(bidq_path)[0]))/100)
                #Bid Price
                bid_p.append(float(re.sub(r"[.,]","",doc.xpath(bidp_path)[0]))/100)
                #Ask Price
                ask_p.append(float(re.sub(r"[.,]","",doc.xpath(askp_path)[0]))/100)
                #Ask Q
                ask_q.append(float(re.sub(r"[.,]","",doc.xpath(askq_path)[0]))/100)
                #Volume Q
                if re.sub(r"[\r\n., ]","",doc.xpath(volumeq_path)[0])!='':
                    volume_q.append(float(re.sub(r"[\r\n., ]","",doc.xpath(volumeq_path)[0]))/100)
                else:
                    volume_q.append(0)
                #Volume Money
                if re.sub(r"[\r\n., ]","",doc.xpath(volmoney_path)[0])!='':
                    volume_money.append(float(re.sub(r"[\r\n., ]","",doc.xpath(volmoney_path)[0]))/100)
                else:
                    volume_money.append(0)
    except:
        pass

    options_chain=pd.DataFrame({'Ticker':ticker,'Stock':stock,'Type':opt_type,'Expiration':exp_date,'Strike':strike,
                                   'Last price':price,'Bid Q':bid_q,'Bid price':bid_p,'Ask price':ask_p,'Ask Q':ask_q,
                                    'Volume(Q)':volume_q,'Volume($)':volume_money})
    return options_chain

In [2]:
#For local Stocks and Cedears historical data
def get_historical(stock,start,end=datetime.datetime.strftime(datetime.datetime.today(),'%d/%m/%Y'),frecuency='Daily'):

    df = investpy.get_stock_historical_data(stock=stock,
                                        country='argentina',
                                        from_date=start,
                                        to_date=end,interval=frecuency)
    return df

In [3]:
#For Interest Rates
def interest_rates():
    option = Options()
    option.add_argument('headless')

    browser=webdriver.Chrome('/Users/federicoglancszpigel/Desktop/chromedriver',options=option)
    web_page='https://www.cronista.com/MercadosOnline/tasas.html'

    while browser.current_url!=web_page:
        try:
            browser.get(web_page)
        except:
            pass

    if browser.current_url==web_page:
        try:
            rate_name=[]
            rate_value=[]
            for t in range(2,5):
                name_path='//*[@id="mercados-list"]/div['+str(t)+']/ul/li[1]'
                value_path='//*[@id="mercados-list"]/div['+str(t)+']/ul/li[3]'
                rate_name.append(browser.find_element_by_xpath(name_path).text)
                rate_value.append(float(re.sub(r"[.,]","",browser.find_element_by_xpath(value_path).text))/100)
        except:
            pass
        browser.close()
        rates=pd.DataFrame({'Rate name':rate_name,'Rate value':rate_value})
        return rates

In [4]:
#CEDEAR's Chain
def cedears_chain():    
    option = Options()
    option.add_argument('headless')

    browser=webdriver.Chrome('/Users/federicoglancszpigel/Desktop/chromedriver',options=option)
    web_page='https://www.invertironline.com/mercado/cotizaciones/argentina/acciones/cedears'

    while browser.current_url!=web_page:
        try:
            browser.get(web_page)
        except:
            pass

    if browser.current_url==web_page:
        browser.find_element_by_xpath('//*[@id="cotizaciones_length"]/label/select/option[4]').click()
        company=[]
        ticker=[]
        price=[]
        variation=[]
        p_open=[]
        p_low=[]
        p_high=[]
        volume_money=[]
        try:
            for t in range(1,1000):
                ticker_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[1]/a/b').text
                company_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[1]/a/span').text
                price_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[2]').text
                variation_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[3]').text
                open_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[8]').text
                low_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[9]').text
                high_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[10]').text
                volmoney_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[12]').text

                ticker.append(ticker_path)
                company.append(company_path)
                price.append(float(re.sub(r"[,.]","",price_path))/100)
                variation.append(variation_path)
                p_open.append(float(re.sub(r"[,.]","",open_path))/100)
                p_low.append(float(re.sub(r"[,.]","",low_path))/100)
                p_high.append(float(re.sub(r"[,.]","",high_path))/100)
                volume_money.append(float(re.sub(r"[,.]","",volmoney_path))/100)
        except:
            pass

        cedears_chain=pd.DataFrame({'Ticker':ticker,'Company':company,'Last Price':price,
                                'Change':variation,'Open':p_open,'High':p_high,
                                'Low':p_low,'Volume($)':volume_money})
    while browser.current_url!='http://www.sinelefantesblancos.com.ar/inversiones/CEDEAR.php':
        try:
            browser.get('http://www.sinelefantesblancos.com.ar/inversiones/CEDEAR.php')
        except:
            pass

    if browser.current_url=='http://www.sinelefantesblancos.com.ar/inversiones/CEDEAR.php':
        cedear=[]
        conversion_rate=[]
        dividend_freq=[]
        try:
            for t in range(2,1000):
                ticker_path=browser.find_element_by_xpath('/html/body/table[2]/tbody/tr['+str(t)+']/td[3]').text
                conversion_path=browser.find_element_by_xpath('/html/body/table[2]/tbody/tr['+str(t)+']/td[7]').text
                dividend_path=browser.find_element_by_xpath('/html/body/table[2]/tbody/tr['+str(t)+']/td[8]').text

                cedear.append(ticker_path)
                conversion_rate.append(conversion_path)
                dividend_freq.append(dividend_path)
        except:
            pass
    browser.close()
    more_info=pd.DataFrame({'Ticker':cedear,'Conv.Rate':conversion_rate,'Dividend Freq':dividend_freq})
    
    df=pd.merge(cedears_chain,more_info,on='Ticker')
    return df

In [5]:
#Local Stocks Chain
def stocks_chain():    
    option = Options()
    option.add_argument('headless')

    browser=webdriver.Chrome('/Users/federicoglancszpigel/Desktop/chromedriver',options=option)
    web_page='https://www.invertironline.com/mercado/cotizaciones/argentina/acciones/panel-general'

    while browser.current_url!=web_page:
        try:
            browser.get(web_page)
        except:
            pass

    if browser.current_url==web_page:
        browser.find_element_by_xpath('//*[@id="cotizaciones_length"]/label/select/option[4]').click()
        company=[]
        ticker=[]
        price=[]
        variation=[]
        p_open=[]
        p_low=[]
        p_high=[]
        volume_money=[]
        volume_q=[]
        try:
            for t in range(1,200):
                ticker_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[1]/a/b').text
                company_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[1]/a/span').text
                price_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[2]').text
                variation_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[3]').text
                open_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[8]').text
                low_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[9]').text
                high_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[10]').text
                volmoney_path=browser.find_element_by_xpath('//*[@id="cotizaciones"]/tbody/tr['+str(t)+']/td[12]').text

                ticker.append(ticker_path)
                company.append(company_path)
                price.append(float(re.sub(r"[,.]","",price_path))/100)
                variation.append(variation_path)
                p_open.append(float(re.sub(r"[,.]","",open_path))/100)
                p_low.append(float(re.sub(r"[,.]","",low_path))/100)
                p_high.append(float(re.sub(r"[,.]","",high_path))/100)
                volume_money.append(float(re.sub(r"[,.]","",volmoney_path))/100)
        except:
            pass

        stocks_chain=pd.DataFrame({'Ticker':ticker,'Company':company,'Last Price':price,
                                'Change':variation,'Open':p_open,'High':p_high,
                                'Low':p_low,'Volume($)':volume_money})
        
        return stocks_chain

In [6]:
#Bonds Chain
def bonds_chain():
    option = Options()
    #option.add_argument('headless')
    browser=webdriver.Chrome('/Users/federicoglancszpigel/Desktop/chromedriver',options=option)
    web_page='https://www.byma.com.ar/bonos-iamc/'
    
    while browser.current_url!=web_page:
        browser.get(web_page)
        browser.implicitly_wait(10)

        
    if browser.current_url==web_page:
        bond=[]
        ticker=[]
        accrued_interest=[]
        last_traded=[]
        technical_v=[]
        parity=[]
        price=[]
        ytm=[]
        vr=[]
        md=[]
        category=[]
        try:
            for t in range(2,200):
                bond_path=browser.find_element_by_xpath('//*[@id="dataBonos"]/tbody/tr['+str(t)+']/td[1]').text
                ticker_path=browser.find_element_by_xpath('//*[@id="dataBonos"]/tbody/tr['+str(t)+']/td[2]').text
                ainterest_path=browser.find_element_by_xpath('//*[@id="dataBonos"]/tbody/tr['+str(t)+']/td[3]').text
                last_path=browser.find_element_by_xpath('//*[@id="dataBonos"]/tbody/tr['+str(t)+']/td[4]').text
                technicalv_path=browser.find_element_by_xpath('//*[@id="dataBonos"]/tbody/tr['+str(t)+']/td[6]').text
                parity_path=browser.find_element_by_xpath('//*[@id="dataBonos"]/tbody/tr['+str(t)+']/td[7]').text
                price_path=browser.find_element_by_xpath('//*[@id="dataBonos"]/tbody/tr['+str(t)+']/td[8]').text
                ytm_path=browser.find_element_by_xpath('//*[@id="dataBonos"]/tbody/tr['+str(t)+']/td[9]').text
                vr_path=browser.find_element_by_xpath('//*[@id="dataBonos"]/tbody/tr['+str(t)+']/td[10]').text
                md_path=browser.find_element_by_xpath('//*[@id="dataBonos"]/tbody/tr['+str(t)+']/td[12]').text
                cat_path=browser.find_element_by_xpath('//*[@id="dataBonos"]/tbody/tr['+str(t)+']/td[11]').text
                
                bond.append(bond_path)
                ticker.append(ticker_path)
                if ainterest_path=='-':
                    accrued_interest.append(0)
                else:
                    accrued_interest.append(float(re.sub(r"[,]","",ainterest_path)))
                    
                last_traded.append(last_path)
                
                if technicalv_path=='-':
                    technical_v.append(0)
                else:
                    technical_v.append(float(re.sub(r"[,]","",technicalv_path)))
                if parity_path=='-':
                    parity.append(0)
                else:
                    parity.append(float(re.sub(r"[,]","",parity_path)))
                if price_path=='-':
                    price.append(0)
                else:
                    price.append(float(re.sub(r"[,]","",price_path)))
                if ytm_path=='-':
                    ytm.append(0)
                else:
                    ytm.append(float(re.sub(r"[,]","",ytm_path)))
                if vr_path=='-':
                    vr.append(0)
                else:
                    vr.append(float(re.sub(r"[,]","",vr_path)))
                if md_path=='-':
                    md.append(0)
                else:
                    md.append(float(re.sub(r"[,]","",md_path)))
                category.append(cat_path)
        except:
            pass
        browser.close()
        bonds_chain=pd.DataFrame({'Bond':bond,'Ticker':ticker,'Accrued Interest':accrued_interest,'Last traded':last_traded,'Technical Value':technical_v,'Parity':parity,'Price':price,'YTM':ytm,'VR':vr,'MD':md,'Category':category})
        return bonds_chain