In [4]:
import pandas as pd
import numpy as np
import os
import time
os.environ['KMP_DUPLICATE_LIB_OK']='True'
from selenium import webdriver
from selenium.webdriver.common.by import By
import re
from selenium.webdriver.common.keys import Keys

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('disable-infobars')
chrome_options.add_argument('--disable-notifications')
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-popup-blocking')

In [7]:
class IL_index_symbol_extractor:
    
    def __init__(self):
        
        self.index_num_of_stocks = 0 
        self.symbols = 0 
        
    def get_symbols(self,index_num_of_stocks,path_to_save):
        
        self.index_num_of_stocks = index_num_of_stocks
        self.symbols = IL_index_symbol_extractor.get_TESA_english_symbols(self.index_num_of_stocks)
        df = pd.DataFrame(self.symbols,columns=[f'Symbol TESA {self.index_num_of_stocks}'])
        df.to_csv(path_to_save)
        
        return self.symbols
    
    def isEnglish(s):
        try:
            s.encode(encoding='utf-8').decode('ascii')
        except UnicodeDecodeError:
            return s

    def get_TESA_hebrew_symbols(index_number_in_tel_aviv):

        map_dict = {35:4,
                    90:6,
                    125:3}

        df = pd.read_html(f"https://www.bizportal.co.il/capitalmarket/indices/indexcomposition/333{map_dict[index_number_in_tel_aviv]}3333")
        tel_aviv_symbols = list(df[0]['שם הנייר'].values)
        tel_aviv_symbols = IL_index_symbol_extractor.constraint_1(tel_aviv_symbols)

        return tel_aviv_symbols

    def constraint_1(tel_aviv_symbols):

        if "תדיראן גרופ" in tel_aviv_symbols:
            tel_aviv_symbols[tel_aviv_symbols.index('תדיראן גרופ')] = 'תדיראן'
        if "הבורסה לניע בתא" in tel_aviv_symbols:
            tel_aviv_symbols.remove("הבורסה לניע בתא")
        if "פתאל החזקות" in tel_aviv_symbols:   
            tel_aviv_symbols.remove("פתאל החזקות")

        return tel_aviv_symbols

    def convert_symbols_from_hebrew_to_english(tel_aviv_symbols):

        symbols_partly = IL_index_symbol_extractor.convert_first_symbols(tel_aviv_symbols)
        hebrew_names = IL_index_symbol_extractor.find_hebrew_names_in_List(symbols_partly)
        symbols_english_new,symbols_to_find = IL_index_symbol_extractor.convert_remaining_symbols(hebrew_names)
        symbols_english_new = IL_index_symbol_extractor.correct_symbol_names(symbols_english_new)
        filter_dict = IL_index_symbol_extractor.get_filter_dict(symbols_partly,symbols_english_new)
        symbols = IL_index_symbol_extractor.get_list_of_symbols_in_english(symbols_partly,filter_dict)

        return symbols

    def get_TESA_english_symbols(index_number_in_tel_aviv):

        tel_aviv_symbols = IL_index_symbol_extractor.get_TESA_hebrew_symbols(index_number_in_tel_aviv)
        symbols_in_english = IL_index_symbol_extractor.convert_symbols_from_hebrew_to_english(tel_aviv_symbols)

        return symbols_in_english


    def convert_first_symbols(tel_aviv_symbols):

        WEB_DRIVER_PATH = '/Users/edenshrian/Desktop/Documents/Eden Shrian/Eden/Chrome Driver/chromedriver'
        URL = "https://iknowfirst.co.il/symbols"
        
        driver = webdriver.Chrome(executable_path= WEB_DRIVER_PATH, options=chrome_options)
        driver.get(URL)

        symbols_in_english = []
        for symbol in tel_aviv_symbols:

            driver.find_element_by_xpath("/html/body/div[2]/div[3]/div/div/div/div[1]/div/div/div/div[1]/div/div[2]/div/div[2]/label/input").clear()
            driver.find_element_by_xpath("/html/body/div[2]/div[3]/div/div/div/div[1]/div/div/div/div[1]/div/div[2]/div/div[2]/label/input").send_keys(symbol)
            symbol_in_english = driver.find_element_by_xpath("/html/body/div[2]/div[3]/div/div/div/div[1]/div/div/div/div[1]/div/div[2]/div/table/tbody/tr/td[1]").text
            if symbol_in_english == 'לא נמצאה התאמה':
                symbols_in_english.append(symbol)
            else:
                symbols_in_english.append(symbol_in_english)

        driver.close()
        symbols_in_english = IL_index_symbol_extractor.constraint_2(symbols_in_english)

        return symbols_in_english

    def constraint_2(symbols_in_english):

        if "או פי סי אנרגיה" in symbols_in_english:
            symbols_in_english[symbols_in_english.index('או פי סי אנרגיה')] = 'או.פי.סי'
        if "אשטרום קבוצה" in symbols_in_english:
            symbols_in_english[symbols_in_english.index('אשטרום קבוצה')] = 'אשטרום'
        if "עזריאלי קבוצה" in symbols_in_english:
            symbols_in_english[symbols_in_english.index('עזריאלי קבוצה')] = 'עזריאלי'
        if "ישראמקו יהש" in symbols_in_english:
            symbols_in_english[symbols_in_english.index('ישראמקו יהש')] = 'ישראמקו'

        return symbols_in_english


    def find_hebrew_names_in_List(symbols_in_english):

        list_of_hebrews = [IL_index_symbol_extractor.isEnglish(symbol) for symbol in symbols_in_english]
        res = [i for i in list_of_hebrews if i]

        return res

    def convert_remaining_symbols(res):

        symbols = []
        symbols_to_find = []
        WEB_DRIVER_PATH = '/Users/edenshrian/Desktop/Documents/Eden Shrian/Eden/Chrome Driver/chromedriver'
        URL = "https://il.investing.com/equities/"
        
        driver = webdriver.Chrome(executable_path= WEB_DRIVER_PATH, options=chrome_options)
        driver.get(URL)

        for symbol in res:
            try:
                driver.find_element_by_xpath("/html/body/div[5]/header/div[1]/div/div[3]/div[1]/input").clear()
                driver.find_element_by_xpath("/html/body/div[5]/header/div[1]/div/div[3]/div[1]/input").send_keys(symbol)
                time.sleep(2.0)
                driver.find_element_by_xpath("/html/body/div[5]/header/div[1]/div/div[3]/div[1]/label").click()
                time.sleep(2.0)
                symbol = driver.find_element_by_xpath("/html/body/div[5]/section/div/div[2]/div[2]/div[1]/a/span[2]").text
                symbol = symbol + '.TA'
                symbols.append(symbol)
            except:
                symbols_to_find.append(symbol)

        driver.close()

        return symbols,symbols_to_find

    def intersection(lst1, lst2):
        lst3 = [value for value in lst1 if value in lst2]
        return lst3

    def correct_symbol_names(symbols):

        symbols_tel_aviv_new = []
        for symbol in symbols:
            if len(symbol) > 8:
                symbol = symbol[2:6] + '.TA'
            else:
                symbol = symbol
            symbols_tel_aviv_new.append(symbol)

        return symbols_tel_aviv_new

    def get_filter_dict(symbols_after_first_link,symbols_after_second):

        indexes = []
        for i in range(len(symbols_after_first_link)):
            s = symbols_after_first_link[i]
            try:
                s.encode(encoding='utf-8').decode('ascii')
            except UnicodeDecodeError:
                indexes.append(i)

        filter_dict = dict(zip(indexes,symbols_after_second))

        return filter_dict

    def change_p_in_symbol(symbol):

        a = symbol.split('.')
        if a[0][-1] == 'p':
            a[0] = a[0][:-1] + '-L'
            symbol = a[0] + '.' + a[1]
        else:
            symbol = symbol

        return symbol

    def get_list_of_symbols_in_english(symbols_in_english,filter_dict):

        symbols = []
        for i in range(len(symbols_in_english)):

            s = symbols_in_english[i]
            try:
                s.encode(encoding='utf-8').decode('ascii')
            except UnicodeDecodeError:
                new_name = filter_dict[i]
                symbols.append(new_name)
            else:
                symbols.append(s)

        symbols = [IL_index_symbol_extractor.change_p_in_symbol(symbol) for symbol in symbols]
        if "PHOE1.TA" in symbols:
            symbols[symbols.index("PHOE1.TA")] = 'PHOE.TA'

        return symbols

In [8]:
IL_symbolizer = IL_index_symbol_extractor()

optional_indexes = [35,90,125]
symbols = []
for index in optional_indexes:
    symbol_list = IL_symbolizer.get_symbols(index,f"/Users/edenshrian/Desktop/Documents/Eden Shrian/Eden/Projects/Israel/TA{index}/symbols_{index}.csv")
    symbols.append(symbol_list)
    
indexes_symbols_dict = dict(zip(optional_indexes,symbols))

In [9]:
indexes_symbols_dict[35]

['OPCE.TA',
 'OPK.TA',
 'ORA.TA',
 'ICL.TA',
 'ESLT.TA',
 'ALHE.TA',
 'ELTR.TA',
 'AMOT.TA',
 'ENRG.TA',
 'ARPT.TA',
 'ASHG.TA',
 'BEZQ.TA',
 'BIG.TA',
 'FIBI.TA',
 'DSCT.TA',
 'PHOE.TA',
 'HARL.TA',
 'ILCO.TA',
 'TSEM.TA',
 'TEVA.TA',
 'FIBI.TA',
 'LPSN.TA',
 'MVNE.TA',
 'MZTF.TA',
 'MTRN.TA',
 'MLSR.TA',
 'NVMI.TA',
 'NICE.TA',
 'AZRG.TA',
 'POLI.TA',
 'PRGO.TA',
 'SAE.TA',
 'STRS.TA',
 'SKBN.TA',
 'SPEN.TA']

In [10]:
indexes_symbols_dict[90]

['AUGN.TA',
 'AUDC.TA',
 'AZRM.TA',
 'IES.TA',
 'IDIN.TA',
 'INRM.TA',
 'ALLT.TA',
 'ELLO.TA',
 'ASPF.TA',
 'ELCO.TA',
 'ELCRE.TA',
 'ECP.TA',
 'ELWS.TA',
 'ENLT.TA',
 'ENOG.TA',
 'APLP.TA',
 'AFRE.TA',
 'AQUA.TA',
 'EQTL.TA',
 'ARGO.TA',
 'BVC.TA',
 'BONS.TA',
 'ORL.TA',
 'BCNV.TA',
 'GZT.TA',
 'GILT.TA',
 'GNCL.TA',
 'GNRS.TA',
 'DORL.TA',
 'DIPL.TA',
 'DLEKG.TA',
 'DEDR-L.TA',
 'DLEA.TA',
 'DELT.TA',
 'DLTI.TA',
 'DIMRI.TA',
 'DANE.TA',
 'DNYA.TA',
 'ILDC.TA',
 'ONE.TA',
 'VRDS.TA',
 'HLAN.TA',
 'TRX.TA',
 'YHNF.TA',
 'ISCD.TA',
 'ISCN.TA',
 'ISRA-L.TA',
 'ISRS.TA',
 'CLIS.TA',
 'LVPR.TA',
 'MGDL.TA',
 'MDTR.TA',
 'MGOR.TA',
 'MGIC.TA',
 'MTRX.TA',
 'DIFI.TA',
 'MLTM.TA',
 'MMHD.TA',
 'MNRT.TA',
 'ISHI.TA',
 'MAXO.TA',
 'NYAX.TA',
 'NVLG.TA',
 'NOFR.TA',
 'NTML.TA',
 'HAP.TA',
 'PTBL.TA',
 'NFTA.TA',
 'SMT.TA',
 'SPNS.TA',
 'SLARL.TA',
 'CEL.TA',
 'ARAD.TA',
 'FOX.TA',
 'FORTY.TA',
 'PZOL.TA',
 'FIBIH.TA',
 'PLSN.TA',
 'PTNR.TA',
 'PERI.TA',
 'CGEN.TA',
 'CAMT.TA',
 'KEN.TA',
 'KRUR

In [11]:
indexes_symbols_dict[125]

['OPCE.TA',
 'AUGN.TA',
 'AUDC.TA',
 'OPK.TA',
 'ORA.TA',
 'AZRM.TA',
 'ICL.TA',
 'IES.TA',
 'IDIN.TA',
 'INRM.TA',
 'ESLT.TA',
 'ALLT.TA',
 'ELLO.TA',
 'ALHE.TA',
 'ASPF.TA',
 'ELCO.TA',
 'ELTR.TA',
 'ELCRE.TA',
 'ECP.TA',
 'ELWS.TA',
 'AMOT.TA',
 'ENLT.TA',
 'ENOG.TA',
 'ENRG.TA',
 'APLP.TA',
 'AFRE.TA',
 'AQUA.TA',
 'EQTL.TA',
 'ARGO.TA',
 'ARPT.TA',
 'ASHG.TA',
 'BVC.TA',
 'BONS.TA',
 'ORL.TA',
 'BEZQ.TA',
 'BIG.TA',
 'FIBI.TA',
 'BCNV.TA',
 'GZT.TA',
 'GILT.TA',
 'GNCL.TA',
 'GNRS.TA',
 'DORL.TA',
 'DSCT.TA',
 'DIPL.TA',
 'DLEKG.TA',
 'DEDR-L.TA',
 'DLEA.TA',
 'DELT.TA',
 'DLTI.TA',
 'DIMRI.TA',
 'DANE.TA',
 'DNYA.TA',
 'ILDC.TA',
 'PHOE.TA',
 'HARL.TA',
 'ONE.TA',
 'VRDS.TA',
 'ILCO.TA',
 'HLAN.TA',
 'TSEM.TA',
 'TEVA.TA',
 'TRX.TA',
 'YHNF.TA',
 'ISCD.TA',
 'ISCN.TA',
 'ISRA-L.TA',
 'ISRS.TA',
 'CLIS.TA',
 'FIBI.TA',
 'LVPR.TA',
 'LPSN.TA',
 'MVNE.TA',
 'MGDL.TA',
 'MDTR.TA',
 'MGOR.TA',
 'MGIC.TA',
 'MZTF.TA',
 'MTRX.TA',
 'MTRN.TA',
 'DIFI.TA',
 'MLSR.TA',
 'MLTM.TA',
 'MMHD.T