TÜFE dosyasını oku

cpi processing

In [None]:
import pandas as pd

def process_data(input_file_path, output_file_path="cpi_data_eom.csv"):
    single_column_series = pd.read_excel(input_file_path, header=None, usecols=[0], dtype=str).squeeze("columns").dropna()
    data_csv_strings = single_column_series.iloc[1:]
    split_data = data_csv_strings.str.split(',', expand=True)
    
    df_processed = pd.DataFrame()
    df_processed['Period_Raw'] = split_data.iloc[:, 0].str.strip().str.strip('"')
    df_processed['CPI_Raw'] = split_data.iloc[:, 1].str.strip().str.strip('"')
    
    df_processed['Date'] = pd.to_datetime(df_processed['Period_Raw'], format='%m-%Y')
    df_processed['Annual_CPI'] = df_processed['CPI_Raw'].str.replace(',', '.', regex=False).astype(float)
    df_processed['Date'] = df_processed['Date'] + pd.offsets.MonthEnd(0)
    
    df_processed = df_processed.sort_values(by='Date')
    
    start_date = pd.to_datetime('2006-01-01')
    end_date = pd.to_datetime('2024-12-31')
    
    df_filtered = df_processed[(df_processed['Date'] >= start_date) & (df_processed['Date'] <= end_date)]
    
    df_final_output = df_filtered[['Date', 'Annual_CPI']].set_index('Date')
    df_final_output.to_csv(output_file_path)

if __name__ == '__main__':
    input_excel_file = "tcmb_data/tüfe.xlsx" 
    output_csv_file = "tcmb_data/cpi_annual_eom_2006-2024.csv"
    process_data(input_excel_file, output_file_path=output_csv_file)

In [13]:
import pandas as pd
from pathlib import Path
import re

def clean_price_file(
        file_path: str | Path,
        date_col: int | str | None = None,   # None → ilk sütun
        price_col: int | str | None = None,  # None → ikinci sütun
        n_rows: int | None = 227,
        date_format: str | None = None,      # ör. "%Y-%m"
        csv_sep: str = ","
) -> pd.DataFrame:
    fp = Path(file_path)

    # --------- 1) OKU
    df_raw = (pd.read_excel(fp, header=0)
              if fp.suffix.lower() in {".xlsx", ".xls"}
              else pd.read_csv(fp, header=0))

    if n_rows is not None:
        df_raw = df_raw.iloc[:n_rows]

    # --------- 2) KOLON SEÇ
    date_col  = 0 if date_col  is None else date_col
    price_col = 1 if price_col is None else price_col

    date_series  = df_raw.iloc[:, date_col]  if isinstance(date_col,  int) else df_raw[date_col]
    price_series = df_raw.iloc[:, price_col] if isinstance(price_col, int) else df_raw[price_col]

    df = pd.DataFrame({"date": date_series, "price": price_series})

    # --------- 3) DÖNÜŞÜMLER
    # 3a) Tarih → datetime64, sonra ay sonuna kaydır
    df["date"] = pd.to_datetime(df["date"], format=date_format, errors="coerce") \
                   .add(pd.offsets.MonthEnd(0))            # ay sonu

    # 3b) Fiyat string → float
    def _clean_number(s: str) -> str:
        if pd.isna(s):
            return ""
        s = str(s).strip()
        s = re.sub(r"[^\d,.\-]", "", s)         # harf vs. sil
        # binlik ayraç "." varsa kaldır, ondalık "," → "."
        if "," in s:
            whole, frac = s.split(",", 1)
            whole = whole.replace(".", "")
            s = whole + "." + frac
        else:
            s = s.replace(".", "")              # "1.234" → "1234"
        return s
    df["price"] = pd.to_numeric(df["price"].map(_clean_number), errors="coerce")

    # --------- 4) TEMİZLE
    df = df.dropna(subset=["date", "price"]).reset_index(drop=True)

    # --------- 5) SADECE date & price KALSIN
    df = df[["date", "price"]]

    # --------- 6) CSV'YE YAZ
    out_csv = fp.with_stem(fp.stem + "_clean").with_suffix(".csv")
    df.to_csv(out_csv, index=False, sep=csv_sep)
    print(f"✓ Temiz dosya kaydedildi → {out_csv}")

    return df


In [None]:
df_clean = clean_price_file("tcmb_data/TCMB_altın_rezerv.xlsx", date_format="%Y-%m")
df_clean = clean_price_file("tcmb_data/TCMB_döviz_rezerv.xlsx", date_format="%Y-%m")
df_clean = clean_price_file("tcmb_data/avrupa_brent_petrol.xlsx", date_format="%Y-%m")
df_clean = clean_price_file("tcmb_data/cari_işlemler_hesabı.xlsx", date_format="%Y-%m")

✓ Temiz dosya kaydedildi → tcmb_data\TCMB_altın_rezerv_clean.csv
✓ Temiz dosya kaydedildi → tcmb_data\TCMB_döviz_rezerv_clean.csv
✓ Temiz dosya kaydedildi → tcmb_data\avrupa_brent_petrol_clean.csv
✓ Temiz dosya kaydedildi → tcmb_data\cari _işlemler_hesabı_clean.csv
