In [None]:
#!/usr/bin/env python
"""
Create two CSVs:
  1) selected_funds.csv – mapping for the codes in codes.txt
  2) all_tefas_funds.csv – every fund on TEFAS (takes ~3-4 min)

Requires: requests, beautifulsoup4, pandas, tqdm
> pip install requests beautifulsoup4 pandas tqdm
"""

import re, csv, time
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
import pandas as pd

HEAD = {"User-Agent": "Mozilla/5.0"}

# ------------------------------- helpers
def parse_header(h2_text: str):
    """
    Split 'AGESA HAYAT VE EMEKLİLİK A.Ş. OKS STANDART EMEKLİK YATIRIM FONU' into
    ('AGESA HAYAT VE EMEKLİLİK A.Ş.', 'OKS STANDART EMEKLİLİK YATIRIM FONU')
    """
    m = re.split(r"\s+A\.Ş\.\s+", h2_text, maxsplit=1)
    if len(m) == 2:
        return m[0] + " A.Ş.", m[1]
    # fallback – first two words as company
    parts = h2_text.split(" ", 2)
    return parts[0] + " " + parts[1], parts[-1]

def fetch_fund(code: str):
    url = f"https://www.tefas.gov.tr/FonAnaliz.aspx?FonKod={code}"
    r   = requests.get(url, headers=HEAD, timeout=15)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")
    h2   = soup.find("h2")            # fund header lives in the only <h2>
    if not h2:
        return None, None
    company, name = parse_header(h2.get_text(strip=True))
    return company, name

# ------------------------------- 1) selected list
codes = """AAJ ABE ACV AE1 AE2 AE3 AE4 AEA AEB AEC AEG AEH AEI AEK AEL AEN AEP AER AET AEU AEY AEZ AFH AFJ AFP AG1 AG2 AG3 AG4 AGA AGB AGD AGE AGG AGH AGM AGT AH0 AH1 AH2 AH3 AH4 AH5 AH6 AH8 AH9 AHB AHC AHJ AHL AIE AIP AJA AJB AJC AJF AJG AJH AJP AJR AJT AJV AJY AJZ ALI ALJ ALR ALS ALU ALZ AMG AMR AMY AMZ ANE ANG ANJ ANK ANP ANS AO1 AO2 APG ATE ATK AUG AVB AVD AVE AVG AVH AVJ AVK AVL AVN AVP AVR AVU AVY AYJ AZA AZD AZH AZK AZL AZS AZY BBH BEE BEF BEH BEI BEK BEO BGE BGK BHS BHT BKB BNA BNB BNK BNL BNS BNZ BPC BPE BPF BPG BPH BPI BPJ BPK BPL BPN BPO BPR BPS BPU BZY CFA CFB CFC CFD CFE CFK CFY CGE CGG CHA CHC CHD CHG CHH CHI CHK CHL CHM CHN CHO CHS CHT EHG EIE EIF EIG EIH EIK EST FEA FEF FEI FEN FEO FER FES FET FGF FGH FIC FIE FIG FIH FII FIK FIM FIR FIU FIV FIY FIZ FJG FYL FYN FYU FYY GCK GCN GCS GCT GCV GCY GDV GEA GED GEF GEG GEH GEK GEL GES GEU GEV GFH GHA GHD GHE GHF GHG GHH GHI GHJ GHK GHL GHM GHN GHO GHP GHT GHU GHV GHY GHZ GKB GRA HEA HEB HEC HED HEE HEG HEI HEK HEL HEP HER HES HET HHB HHE HHG HHM HHN HHY HS1 IEA IEB IEE IEF IEG IEH IEK IER IGE KEA KEB KED KEF KEG KEH KEK KES KET KEY KEZ KJM KKS KOA KOE KOS KTZ MEA MHA MHB MHC MHD MHE MHG MHH MHI MHK MHL MHM MHN MHO MHR MHS MHT MHU MHV MHY MHZ MZL MZN MZP NHA NHM NHN PRC PRS RZM RZN TBJ TJY TKV TML TYJ VEB VED VEE VEG VEH VEI VEK VEL VEO VEP VER VES VET VEU VEV VEY VGA VGB VGC VGD VGE VGF VGG VGH VGK VGP VGT VGY VGZ VKE VKJ VVA VVD VVE VVM VVU VVZ VYB YZD ZHB ZHD ZHE ZHF ZHG""".split()

rows = []
print("Downloading selected funds …")
for code in tqdm(codes, ncols=80):
    try:
        company, name = fetch_fund(code)
    except Exception as e:
        print(f"{code}: {e}")
        company = name = None
    rows.append((code, company, name))
pd.DataFrame(rows, columns=["kod", "portfoy_sirketi", "fon_adi"]).to_csv(
    "selected_funds.csv", index=False, encoding="utf-8"
)

# ------------------------------- 2) TEFAS full list
def get_all_codes():
    """Scrape master list from FONANALIZ page"""
    url  = "https://www.tefas.gov.tr/FONANALIZ.ASPX"
    soup = BeautifulSoup(requests.get(url, headers=HEAD, timeout=15).text,
                         "html.parser")
    codes = set()      # duplicates exist
    for a in soup.select("a[href^='FonAnaliz.aspx?FonKod=']"):
        code = re.search(r"FonKod=([A-Z0-9]+)", a["href"]).group(1)
        codes.add(code)
    return sorted(codes)

print("Downloading entire TEFAS catalogue …")
all_rows = []
for code in tqdm(get_all_codes(), ncols=80):
    try:
        company, name = fetch_fund(code)
    except Exception as e:
        company = name = None
    all_rows.append((code, company, name))

pd.DataFrame(all_rows, columns=["kod", "portfoy_sirketi", "fon_adi"]).to_csv(
    "all_tefas_funds.csv", index=False, encoding="utf-8"
)

print("\nDone!  ➜  selected_funds.csv   &   all_tefas_funds.csv  created.")
