# Generate financial time series (FTS) data

In [1]:
import re
import sys
import warnings
import pandas as pd # type: ignore
import yfinance as yf # type: ignore

sys.path.append('../modules')
import get_financial_time_series as get_fts # type: ignore

warnings.filterwarnings("ignore")
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', None)

## Global variables

In [2]:
input_path_raw = "../input_files/raw_data"
input_path_processed = "../input_files/processed_data"
input_path_data_dictionary = "../input_files/data_dictionary"
output_path = "../output_files"
input_generation_date = "2024-08-21"
start_date = "2000-01-01"

## Market tickers and components

To add new markets, make sure you have a table in CSV format with the ticker assigned in Yahoo Finance for the different stocks in the same market along with their real name. In that case, add in ```stock_index_list``` a pair ```(key, value)``` where:
* ```key```**:** is the market ticker and,
* ```value```**:** is the name given to the stock dictionary of the market (_see in input_files/data_dictionary_ for examples).

In [3]:
# Standard and Poor's 500 - S&P 500 (United States)
# Source https://en.wikipedia.org/wiki/List_of_S%26P_500_companies
# "2000-01-01" - "2022-12-31"
# GEV: Data doesn't exist for startDate = 946702800, endDate = 1672462800")
# KVUE: Data doesn't exist for startDate = 946702800, endDate = 1672462800")
# SOLV: Data doesn't exist for startDate = 946702800, endDate = 1672462800")
# VLTO: Data doesn't exist for startDate = 946702800, endDate = 1672462800")

# Mexico Price and Quote Index (Mexico)
# source https://es.finance.yahoo.com/quote/%5EMXX/components/?guccounter=1&guce_referrer=aHR0cHM6Ly93d3cuZ29vZ2xlLmNvbS8&guce_referrer_sig=AQAAAICc-QDC-sGwCChRyZU_MzY5IZIpLgF6EOVbbq-P7L_F6E0ur0Zz-MAwE1zXvNewk1qXCrCh4tU6A48XLFTPTaxgwdtcHisQnftQhKrJjlhLFjzRswWW8ybr08epnyjiWfra8DQjOdPTIIBrNB86THdyO7MnRmHpizHkFyNPL1Np
# "2005-01-03" - "2022-12-31"

# DAX Performance Index (Germany)
# source https://markets.businessinsider.com/index/components/dax
# "2001-01-01" - "2022-12-31"

stock_index_list = {
    "buxbd" : ["^BUX.BD", "Budapest Stock Index", "Hungary", "Emergent"],
    "case30" : ["^CASE30", "Cairo and Alexandria Stock Exchange", "Egypt", "Emergent"],
    "gdat" : ["GD.AT", "Athens Stock Exchange", "Greece", "Emergent"],
    "gdaxi" : ["^GDAXI", "DAX Performance Index", "Germany", "Developed"],
    "gspc" : ["^GSPC", "Standard and Poor's 500 - S&P 500", "United States", "Developed"],
    "icolcapcl" : ["ICOLCAP.CL", "iShares MSCI COLCAP Stock Fund", "Colombia", "Emergent"],
    "ipsa" : ["^IPSA", "Chilean stock market index", "Chile", "Emergent"],
    "jkse" : ["^JKSE", "Jakarta Stock Exchange Composite index", "Indonesia", "Emergent"],
    "mxx" : ["^MXX", "Mexico Price and Quote Index", "Mexico", "Emergent"],
    "nsei" : ["^NSEI", "National Stock Exchange of India", "India", "Emergent"]
} 

# Merge all tickers and markets
df = []
for stock_name, stock_dict in stock_index_list.items():
    # Define tickers list
    df_ = pd.read_csv("{}/tickers_{}.csv".format(input_path_data_dictionary, stock_name), low_memory = False)
    df_["market_ticker"] = stock_dict[0]
    df_["market_name"] = stock_dict[1]
    df_["market_country"] = stock_dict[2]
    df_["market_type"] = stock_dict[3]
    df.append(df_)

df = pd.concat(df)

## All markets data

After merge all markets in single DataFrame, all data is download and merge in single dataframe to take full advantage of function parallelization

In [4]:
df_all_markets = []
for market_ticker in df["market_ticker"].unique():
    print("#####----------------------------------- {} -----------------------------------#####".format(market_ticker.upper()))    
    
    # Local parameters
    df_local = df[df["market_ticker"] == market_ticker]
    market_name    = df_local["market_name"].unique()[0]
    market_country = df_local["market_country"].unique()[0]
    market_type    = df_local["market_type"].unique()[0]

    # Define tickers list
    list_stock_indexes = df_local[["ticker", "ticker_name"]].set_index("ticker")["ticker_name"].to_dict()

    # Download time series
    df_market = get_fts.process_financial_time_series(
        ticker_dict = list_stock_indexes,
        initial_date = start_date,
        final_date = input_generation_date,
        interval = "1d"
    )

    # Add market information
    df_market["market_ticker"] = market_ticker
    df_market["market_name"] = market_name
    df_market["market_country"] = market_country
    df_market["market_type"] = market_type

    df_all_markets.append(df_market)
    
    print("################################################## DONE ##################################################")    

# Save data in input files for no reprocessing
df_all_markets = pd.concat(df_all_markets)
df_all_markets.insert(0, "market_ticker", df_all_markets.pop("market_ticker"))
df_all_markets.insert(1, "market_name", df_all_markets.pop("market_name"))
df_all_markets.insert(2, "market_country", df_all_markets.pop("market_country"))
df_all_markets.insert(3, "market_type", df_all_markets.pop("market_type"))

df_all_markets.to_csv(
    "{}/df_all_markets_{}.csv".format(input_path_processed, re.sub("-", "", input_generation_date)),
    index = False
)

#####----------------------------------- ^BUX.BD -----------------------------------#####
----------------------------------- YAHOO FINANCE DATA -----------------------------------

- Download 4IG.BD with initial 5088 rows and 3340 rows after profiling
- Processed 4IG.BD : 4IG NYRT
- Download AKKO.BD with initial 3461 rows and 2463 rows after profiling
- Processed AKKO.BD : AKKO INVEST NYRT
- Download ALTEO.BD with initial 3039 rows and 1619 rows after profiling
- Processed ALTEO.BD : ALTEO ENERGY SERVICES PUBLIC LIMITED COMPANY
- Download ANY.BD with initial 4772 rows and 3910 rows after profiling
- Processed ANY.BD : ANY BIZTONSÁGI NYOMDA NYRT.
- Download APPENINN.BD with initial 3621 rows and 3138 rows after profiling
- Processed APPENINN.BD : APPENINN VAGYONKEZELO HOLDING SA



1 Failed download:
['CIG.BD']: YFInvalidPeriodError("%ticker%: Period 'max' is invalid, must be one of ['1d', '5d']")


- Download AUTOWALLIS.BD with initial 2846 rows and 1651 rows after profiling
- Processed AUTOWALLIS.BD : AUTOWALLIS PLC
- Download GSPARK.BD with initial 6222 rows and 5154 rows after profiling
- Processed GSPARK.BD : FIRST TRUST HORIZON MANAGED VOL
- Download MTELEKOM.BD with initial 6319 rows and 5678 rows after profiling
- Processed MTELEKOM.BD : MAGYAR TELEKOM
- Download MASTERPLAST.BD with initial 3174 rows and 2198 rows after profiling
- Processed MASTERPLAST.BD : MASTERPLAST SHARE
- Download MOL.BD with initial 5316 rows and 5041 rows after profiling
- Processed MOL.BD : MOL MAGYAR OLAJ- ÉS GÁZIPARI SA
- Download OPUS.BD with initial 4251 rows and 2909 rows after profiling
- Processed OPUS.BD : OPUS GLOBAL NYRT
- Download OTP.BD with initial 5754 rows and 5491 rows after profiling
- Processed OTP.BD : OTP BANK NYRT
- Download PANNERGY.BD with initial 4275 rows and 3539 rows after profiling
- Processed PANNERGY.BD : PANNERGY PLC
- Download RICHTER.BD with initial 5400 rows and 5


1 Failed download:
['AUTO.CA']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')


- Download CCAP.CA with initial 3644 rows and 3157 rows after profiling
- Processed CCAP.CA : CITADEL CAPITAL
- Download CIEB.CA with initial 5128 rows and 3655 rows after profiling
- Processed CIEB.CA : CREDIT AGRICOLE EGYPT SAE
- Download CIRA.CA with initial 4869 rows and 2475 rows after profiling
- Processed CIRA.CA : CAIRO FOR INVESTMENT AND REAL ESTATE DEVELOPMENT SAE
- Download CLHO.CA with initial 1911 rows and 1618 rows after profiling
- Processed CLHO.CA : CLHO.CA
- Download COMI.CA with initial 5986 rows and 4683 rows after profiling
- Processed COMI.CA : COMMERCIAL INTERNATIONAL BANK EGYPT
- Download EAST.CA with initial 5982 rows and 3995 rows after profiling
- Processed EAST.CA : EASTERN TOBACCO
- Download EFID.CA with initial 2312 rows and 1507 rows after profiling
- Processed EFID.CA : EFID.CA
- Download EKHO.CA with initial 5585 rows and 3844 rows after profiling
- Processed EKHO.CA : EGYPTIAN KUWAITI HOLDING
- Download EKHOA.CA with initial 706 rows and 640 rows after


1 Failed download:
['MNHD.CA']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')


- Download ORAS.CA with initial 735 rows and 51 rows after profiling
- Processed ORAS.CA : ORAS.CA
- Download ORWE.CA with initial 5983 rows and 4472 rows after profiling
- Processed ORWE.CA : ORWE.CA
- Download PHDC.CA with initial 4039 rows and 3468 rows after profiling
- Processed PHDC.CA : PALM HILLS DEVELOPMENTS
- Download RMDA.CA with initial 1135 rows and 995 rows after profiling
- Processed RMDA.CA : RMDA.CA
- Download SKPC.CA with initial 4793 rows and 4140 rows after profiling
- Processed SKPC.CA : SIDI KERIR PETROCHEMICALS
- Download SWDY.CA with initial 4545 rows and 3925 rows after profiling
- Processed SWDY.CA : ELSWEDY ELECTRIC
- Download TALM.CA with initial 804 rows and 546 rows after profiling
- Processed TALM.CA : TALM.CA
- Download TMGH.CA with initial 4156 rows and 3618 rows after profiling
- Processed TMGH.CA : TALAAT MOUSTAFA GROUP
################################################## DONE ##################################################
#####---------------------


1 Failed download:
['AESGENER.SN']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')


- Download AGUAS-A.SN with initial 6234 rows and 4940 rows after profiling
- Processed AGUAS-A.SN : AGUAS ANDINAS S.A.
- Download ANDINA-B.SN with initial 6232 rows and 5524 rows after profiling
- Processed ANDINA-B.SN : EMBOTELLADORA ANDINA S.A.
- Download BCI.SN with initial 4158 rows and 4062 rows after profiling
- Processed BCI.SN : BANCO DE CRÉDITO E INVERSIONES
- Download BSANTANDER.SN with initial 5973 rows and 5488 rows after profiling
- Processed BSANTANDER.SN : BANCO SANTANDER-CHILE
- Download CAP.SN with initial 6234 rows and 5706 rows after profiling
- Processed CAP.SN : CAP S.A.
- Download CCU.SN with initial 6234 rows and 5731 rows after profiling
- Processed CCU.SN : COMPAÑÍA CERVECERÍAS UNIDAS S.A.
- Download CENCOSUD.SN with initial 6234 rows and 5052 rows after profiling
- Processed CENCOSUD.SN : CENCOSUD S.A.
- Download CHILE.SN with initial 4158 rows and 4021 rows after profiling
- Processed CHILE.SN : BANCO DE CHILE
- Download CMPC.SN with initial 6234 rows and 571


1 Failed download:
['ITAUCORP.SN']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')


- Download LTM.SN with initial 6234 rows and 5714 rows after profiling
- Processed LTM.SN : LATAM AIRLINES GROUP S.A.
- Download MALLPLAZA.SN with initial 1509 rows and 1471 rows after profiling
- Processed MALLPLAZA.SN : PLAZA S.A.
- Download PARAUCO.SN with initial 6234 rows and 5027 rows after profiling
- Processed PARAUCO.SN : PARQUE ARAUCO S.A.
- Download RIPLEY.SN with initial 4794 rows and 4629 rows after profiling
- Processed RIPLEY.SN : RIPLEY CORP S.A.
- Download SECURITY.SN with initial 6234 rows and 4248 rows after profiling
- Processed SECURITY.SN : GRUPO SECURITY S.A.
- Download SONDA.SN with initial 4453 rows and 4280 rows after profiling
- Processed SONDA.SN : SONDA S.A.
- Download SQM-B.SN with initial 6234 rows and 5862 rows after profiling
- Processed SQM-B.SN : SOCIEDAD QUÍMICA Y MINERA DE CHILE S.A.
- Download VAPORES.SN with initial 6232 rows and 5179 rows after profiling
- Processed VAPORES.SN : COMPAÑÍA SUD AMERICANA DE VAPORES S.A.
#############################


1 Failed download:
['SQBI.JK']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')


- Download SQMI.JK with initial 4967 rows and 2143 rows after profiling
- Processed SQMI.JK : SQMI.JK
- Download SULI.JK with initial 5513 rows and 3275 rows after profiling
- Processed SULI.JK : SULI.JK
- Download TFCO.JK with initial 4664 rows and 1055 rows after profiling
- Processed TFCO.JK : TFCO.JK
- Download TRST.JK with initial 5951 rows and 2815 rows after profiling
- Processed TRST.JK : TRST.JK
- Download UNIC.JK with initial 5957 rows and 2126 rows after profiling
- Processed UNIC.JK : UNIC.JK
- Download VOKS.JK with initial 5575 rows and 2678 rows after profiling
- Processed VOKS.JK : VOKS.JK
################################################## DONE ##################################################
#####----------------------------------- ^MXX -----------------------------------#####
----------------------------------- YAHOO FINANCE DATA -----------------------------------

- Download AC.MX with initial 5505 rows and 4138 rows after profiling
- Processed AC.MX : ARCA CONTINE


1 Failed download:
['AMXL.MX']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')


- Download ASURB.MX with initial 6060 rows and 4451 rows after profiling
- Processed ASURB.MX : GRUPO AEROPORTUARIO DEL SURESTE, S.A.B. DE C.V.
- Download BBAJIOO.MX with initial 1814 rows and 1794 rows after profiling
- Processed BBAJIOO.MX : BANCO DEL BAJÍO, S.A., INSTITUCIÓN DE BANCA MÚLTIPLE
- Download BIMBOA.MX with initial 6257 rows and 6021 rows after profiling
- Processed BIMBOA.MX : GRUPO BIMBO, S.A.B. DE C.V.
- Download BOLSAA.MX with initial 4066 rows and 3982 rows after profiling
- Processed BOLSAA.MX : BOLSA MEXICANA DE VALORES, S.A.B. DE C.V.
- Download CEMEXCPO.MX with initial 6257 rows and 6043 rows after profiling
- Processed CEMEXCPO.MX : CEMEX, S.A.B. DE C.V.
- Download CUERVO.MX with initial 1895 rows and 1871 rows after profiling
- Processed CUERVO.MX : BECLE, S.A.B. DE C.V.
- Download FEMSAUBD.MX with initial 6257 rows and 6103 rows after profiling
- Processed FEMSAUBD.MX : FOMENTO ECONÓMICO MEXICANO, S.A.B. DE C.V.
- Download GAPB.MX with initial 4652 rows and 45


1 Failed download:
['IENOVA.MX']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')


- Download KIMBERA.MX with initial 6257 rows and 6037 rows after profiling
- Processed KIMBERA.MX : KIMBERLY-CLARK DE MÉXICO, S.A.B. DE C.V.



1 Failed download:
['KOFL.MX']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')

1 Failed download:
['LIVEPOLC1.MX']: YFPricesMissingError('$%ticker%: possibly delisted; No price data found  (1d 2000-01-01 -> 2024-08-21)')


- Download LABB.MX with initial 4063 rows and 3956 rows after profiling
- Processed LABB.MX : GENOMMA LAB INTERNACIONAL, S.A.B. DE C.V.
$LIVEPOLC1.MX: possibly delisted; No price data found  (1d 2000-01-01 -> 2024-08-21)
- Download MEGACPO.MX with initial 4215 rows and 4074 rows after profiling
- Processed MEGACPO.MX : MEGACABLE HOLDINGS, S.A.B. DE C.V.



1 Failed download:
['MEXCHEM.MX']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')


- Download OMAB.MX with initial 4454 rows and 4371 rows after profiling
- Processed OMAB.MX : GRUPO AEROPORTUARIO DEL CENTRO NORTE, S.A.B. DE C.V.



1 Failed download:
['PEOLES.MX']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')


- Download PINFRA.MX with initial 6257 rows and 4828 rows after profiling
- Processed PINFRA.MX : PROMOTORA Y OPERADORA DE INFRAESTRUCTURA, S.A.B. DE C.V.



1 Failed download:
['SITESB1.MX']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')


- Download TLEVISACPO.MX with initial 6201 rows and 5877 rows after profiling
- Processed TLEVISACPO.MX : GRUPO TELEVISA, S.A.B.
################################################## DONE ##################################################
#####----------------------------------- ^NSEI -----------------------------------#####
----------------------------------- YAHOO FINANCE DATA -----------------------------------

- Download ADANIENT.NS with initial 5497 rows and 5421 rows after profiling
- Processed ADANIENT.NS : ADANI ENTERPRISES LIMITED
- Download APOLLOHOSP.NS with initial 5495 rows and 5451 rows after profiling
- Processed APOLLOHOSP.NS : APOLLO HOSPITALS ENTERPRISE LIMITED
- Download BAJAJ-AUTO.NS with initial 5498 rows and 5406 rows after profiling
- Processed BAJAJ-AUTO.NS : BAJAJ AUTO LIMITED
- Download BAJAJFINSV.NS with initial 5464 rows and 5386 rows after profiling
- Processed BAJAJFINSV.NS : BAJAJ FINSERV LTD.
- Download BAJFINANCE.NS with initial 5495 rows and 5428 rows af