In [13]:
import pandas as pd
import numpy as np
import bs4 as bs
import urllib.request
import json
import time

pd.set_option('display.max_columns', None)

In [178]:
def define_url(ticker, periods, financialgroup='XI_29'): #XI_29, UFRS, UFRS_K
    
    url = "https://www.isyatirim.com.tr/_layouts/15/IsYatirim.Website/Common/Data.aspx/"+\
           "MaliTablo?companyCode={}&exchange=TRY&financialGroup={}".format(ticker, financialgroup)+\
           "&year1={}&period1={}".format(periods[0][:4], str(int(periods[0][-1])*3))+\
           "&year2={}&period2={}".format(periods[1][:4], str(int(periods[1][-1])*3))+\
           "&year3={}&period3={}".format(periods[2][:4], str(int(periods[2][-1])*3))+\
           "&year4={}&period4={}".format(periods[3][:4], str(int(periods[3][-1])*3))#+\
           #"&_=1620763995205"
    
    return url

In [179]:
def get_data(url):
    
    s = urllib.request.urlopen(url).read()
    data = json.loads(s)
    
    return data

In [180]:
def get_financials(ticker, periods, financialgroup='XI_29'):
    
    
    url = define_url(ticker, periods, financialgroup)
    data = get_data(url)

    #check if all periods are missing.
    if data['value'] != list():
        itemCode_list, itemDescTr_list = [], []
        value_list1, value_list2, value_list3, value_list4 = [], [], [], []
        
        #check if some periods are missing. Only available periods will be added to dataframe.
        val_check1 = data['value'][0]['value1'] != None
        val_check2 = data['value'][0]['value2'] != None
        val_check3 = data['value'][0]['value3'] != None
        val_check4 = data['value'][0]['value4'] != None
        
        for elem in data['value']:
            itemCode_list.append(elem['itemCode'])
            itemDescTr_list.append(elem['itemDescTr'])
            if val_check1:
                value_list1.append(elem['value1'])
            if val_check2:
                value_list2.append(elem['value2'])
            if val_check3:
                value_list3.append(elem['value3'])
            if val_check4:
                value_list4.append(elem['value4'])

        temp_dict = {'itemCode' : itemCode_list, 'itemDescTr' : itemDescTr_list}

        if val_check1:
            temp_dict[periods[0]] = value_list1
        if val_check2:
            temp_dict[periods[1]] = value_list2
        if val_check3:
            temp_dict[periods[2]] = value_list3
        if val_check4:
            temp_dict[periods[3]] = value_list4

        temp_df = pd.DataFrame(temp_dict)

    else:
        temp_df = None

    return temp_df

In [181]:
def get_all_hist_financials(ticker, period_lists, financialgroup='XI_29', return_error_flag=True, verbose=True):
    
    check =True
    check_count = 0
    error_flag = False
    while check:
        try:
            #Get historical financial statements of the company.
            count = 0
            df = pd.DataFrame(columns = ['itemCode', 'itemDescTr'])
            for periods in period_lists:

                temp_df = get_financials(ticker, periods, financialgroup)

                if temp_df is not None:
                    df = df.merge(temp_df, on=['itemCode', 'itemDescTr'], how='outer')

                #Break request loop if none of current periods exists for the company.
                else:
                    break

                time.sleep(2)
                if verbose:
                    print(count, periods)
                count+=1
            check = False
        #Retry if connection error happens (Max 5 retries)
        except Exception:
            check_count +=1
            if check_count > 5:
                check = False
                error_flag = True
    
    if return_error_flag:
        return df, error_flag
    else:
        return df

In [192]:
isyatırım_df = pd.read_excel("/Users/dorukcanga/Downloads/tumhisse-3.xlsx")

In [193]:
ticker_list = list(isyatırım_df.sort_values('Hacim (TL)', ascending=False).Hisse.values)

ticker_list = [i.replace('                                     \u200b','').replace(' ','') for i in ticker_list]

In [194]:
period_lists = [
    ['2024/4', '2024/3', '2024/2', '2024/1'],
    ['2023/4', '2023/3', '2023/2', '2023/1'],
    ['2022/4', '2022/3', '2022/2', '2022/1'],
    ['2021/4', '2021/3', '2021/2', '2021/1'],
    ['2020/4', '2020/3', '2020/2', '2020/1'],
    ['2019/4', '2019/3', '2019/2', '2019/1'],
    ['2018/4', '2018/3', '2018/2', '2018/1'],
    ['2017/4', '2017/3', '2017/2', '2017/1'],
    ['2016/4', '2016/3', '2016/2', '2016/1'],
    ['2015/4', '2015/3', '2015/2', '2015/1'],
    ['2014/4', '2014/3', '2014/2', '2014/1'],
    ['2013/4', '2013/3', '2013/2', '2013/1'],
    ['2012/4', '2012/3', '2012/2', '2012/1'],
    ['2011/4', '2011/3', '2011/2', '2011/1'],
    ['2010/4', '2010/3', '2010/2', '2010/1'],
    ['2009/4', '2009/3', '2009/2', '2009/1'],
    ['2008/4', '2008/3', '2008/2', '2008/1']
]

In [195]:
period_lists2 = [
    #['2024/4', '2024/3', '2024/2', '2024/1'],
    ['2023/4', '2023/3', '2023/2', '2023/1'],
    ['2022/4', '2022/3', '2022/2', '2022/1'],
    ['2021/4', '2021/3', '2021/2', '2021/1'],
    ['2020/4', '2020/3', '2020/2', '2020/1'],
    ['2019/4', '2019/3', '2019/2', '2019/1'],
    ['2018/4', '2018/3', '2018/2', '2018/1'],
    ['2017/4', '2017/3', '2017/2', '2017/1'],
    ['2016/4', '2016/3', '2016/2', '2016/1'],
    ['2015/4', '2015/3', '2015/2', '2015/1'],
    ['2014/4', '2014/3', '2014/2', '2014/1'],
    ['2013/4', '2013/3', '2013/2', '2013/1'],
    ['2012/4', '2012/3', '2012/2', '2012/1'],
    ['2011/4', '2011/3', '2011/2', '2011/1'],
    ['2010/4', '2010/3', '2010/2', '2010/1'],
    ['2009/4', '2009/3', '2009/2', '2009/1'],
    ['2008/4', '2008/3', '2008/2', '2008/1']
]

In [196]:
ufrs_list = ['YKBNK', 'ISCTR', 'AKBNK', 'GARAN', 'ALTIN', 'VAKBN', 'HALKB', 'SKBNK',
    'TSKB', 'TURSG', 'ANSGR', 'KTLEV', 'AKGRT', 'ALBRK', 'ISFIN', 'VAKFN',
    'ITTFH', 'AGESA', 'RAYSG', 'ULUFA', 'ANHYT', 'DOCO', 'ICBCT', 'GLCVY',
    'CRDFA', 'SEKFK', 'IDEAS', 'GARFA', 'QNBFB', 'BRKVY', 'KLNMA', 'ISBTR',
    'LIDFA', 'QNBFL', 'ISATR', 'ISKUR'
            ]

In [197]:
error_list = []

In [198]:
save_path = "/Users/dorukcanga/Desktop/vid/data/bist/historical_financials/"

In [199]:
ticker_list2 = ['YKBNK', 'ISCTR', 'AKBNK', 'GARAN', 'ALTIN', 'VAKBN', 'HALKB', 'SKBNK',
    'TSKB', 'TURSG', 'ANSGR', 'KTLEV', 'AKGRT', 'ALBRK', 'ISFIN', 'VAKFN',
    'ITTFH', 'AGESA', 'RAYSG', 'ULUFA', 'ANHYT', 'DOCO', 'ICBCT', 'GLCVY',
    'CRDFA', 'SEKFK', 'IDEAS', 'GARFA', 'QNBFB', 'BRKVY', 'KLNMA', 'ISBTR',
    'LIDFA', 'QNBFL', 'ISATR', 'ISKUR'
            ]

In [17]:
%%time

for ticker in ticker_list:
    
    financialgroup = 'UFRS_K' if ticker in ufrs_list else 'XI_29'
    
    df, error_flag = get_all_hist_financials(ticker, period_lists, financialgroup=financialgroup, return_error_flag=True, verbose=False)
    deneme_no = 1
    if len(df) == 0:
        df, error_flag = get_all_hist_financials(ticker, period_lists2, financialgroup=financialgroup, return_error_flag=True, verbose=False)
        deneme_no = 2
    
    if error_flag:
        error_list.append(ticker)
        print(ticker, "-> Error!")
    else:
        df.to_csv(save_path+ticker+'.csv', index=False)
        print(ticker, "-> Saved! Deneme:", deneme_no)
        
    time.sleep(10)

THYAO -> Saved! Deneme: 1
TUPRS -> Saved! Deneme: 1
YKBNK -> Saved! Deneme: 2
ISCTR -> Saved! Deneme: 2
KCHOL -> Saved! Deneme: 1
AKBNK -> Saved! Deneme: 2
BORLS -> Saved! Deneme: 1
EREGL -> Saved! Deneme: 1
BRSAN -> Saved! Deneme: 1
PETKM -> Saved! Deneme: 1
BIMAS -> Saved! Deneme: 1
GUBRF -> Saved! Deneme: 1
GARAN -> Saved! Deneme: 2
ODAS -> Saved! Deneme: 1
KONTR -> Saved! Deneme: 1
ASELS -> Saved! Deneme: 1
ASTOR -> Saved! Deneme: 1
SISE -> Saved! Deneme: 1
MIATK -> Saved! Deneme: 1
FROTO -> Saved! Deneme: 1
PGSUS -> Saved! Deneme: 1
TOASO -> Saved! Deneme: 1
SAHOL -> Saved! Deneme: 1
SASA -> Saved! Deneme: 1
KOZAL -> Saved! Deneme: 2
EKGYO -> Saved! Deneme: 1
TCELL -> Saved! Deneme: 1
ALTIN -> Saved! Deneme: 2
ZOREN -> Saved! Deneme: 1
VAKBN -> Saved! Deneme: 2
FENER -> Saved! Deneme: 2
KRDMD -> Saved! Deneme: 1
EUPWR -> Saved! Deneme: 1
INGRM -> Saved! Deneme: 1
ULKER -> Saved! Deneme: 1
HEKTS -> Saved! Deneme: 1
GESAN -> Saved! Deneme: 1
YEOTK -> Saved! Deneme: 1
OYAKC -> Saved!

In [200]:
%%time

for ticker in ticker_list2:
    
    financialgroup = 'UFRS_K' if ticker in ufrs_list else 'XI_29'
    
    df, error_flag = get_all_hist_financials(ticker, period_lists, financialgroup=financialgroup, return_error_flag=True, verbose=False)
    deneme_no = 1
    if len(df) == 0:
        df, error_flag = get_all_hist_financials(ticker, period_lists2, financialgroup=financialgroup, return_error_flag=True, verbose=False)
        deneme_no = 2
    
    if error_flag:
        error_list.append(ticker)
        print(ticker, "-> Error!")
    else:
        df.to_csv(save_path+ticker+'.csv', index=False)
        print(ticker, "-> Saved! Deneme:", deneme_no)
        
    time.sleep(10)

YKBNK -> Saved! Deneme: 1
ISCTR -> Saved! Deneme: 1
AKBNK -> Saved! Deneme: 1
GARAN -> Saved! Deneme: 1
ALTIN -> Saved! Deneme: 2
VAKBN -> Saved! Deneme: 1
HALKB -> Saved! Deneme: 1
SKBNK -> Saved! Deneme: 1
TSKB -> Saved! Deneme: 1
TURSG -> Saved! Deneme: 1
ANSGR -> Saved! Deneme: 1
KTLEV -> Saved! Deneme: 2
AKGRT -> Saved! Deneme: 1
ALBRK -> Saved! Deneme: 1
ISFIN -> Saved! Deneme: 2
VAKFN -> Saved! Deneme: 2
ITTFH -> Saved! Deneme: 2
AGESA -> Saved! Deneme: 1
RAYSG -> Saved! Deneme: 2
ULUFA -> Saved! Deneme: 2
ANHYT -> Saved! Deneme: 1
DOCO -> Saved! Deneme: 2
ICBCT -> Saved! Deneme: 1
GLCVY -> Saved! Deneme: 2
CRDFA -> Saved! Deneme: 2
SEKFK -> Saved! Deneme: 2
IDEAS -> Saved! Deneme: 2
GARFA -> Saved! Deneme: 2
QNBFB -> Saved! Deneme: 1
BRKVY -> Saved! Deneme: 2
KLNMA -> Saved! Deneme: 1
ISBTR -> Saved! Deneme: 1
LIDFA -> Saved! Deneme: 2
QNBFL -> Saved! Deneme: 2
ISATR -> Saved! Deneme: 1
ISKUR -> Saved! Deneme: 2
CPU times: user 5.27 s, sys: 688 ms, total: 5.96 s
Wall time: 17mi

In [201]:
error_list

[]