## Récupération et préparation des données


In [None]:
import matplotlib as mpl, seaborn as sns
print("matplotlib =", mpl.__version__)
print("seaborn    =", sns.__version__)



matplotlib = 3.5.2
seaborn    = 0.13.2


In [None]:
from typing import List, Dict
import os
import re
import numpy as np
import pandas as pd
from fredapi import Fred
 
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
from skfin.dataloaders.cache import CacheManager

  from .autonotebook import tqdm as notebook_tqdm


Type de plt : <class 'module'>


AttributeError: 'RcParams' object has no attribute '_get'

In [2]:

# Param
FRED_API_KEY = os.getenv("FRED_API_KEY", "2d98c6caa7753b549869a87c5636fea0").strip()
# --- Fenêtre temporelle ---
DATE_START = pd.Timestamp("1982-01-01")
# Septembre 2025 inclus → on prend le dernier jour de mois
DATE_END = (pd.Timestamp("2025-09-01") + pd.offsets.MonthEnd(1))

date_start_str = DATE_START.strftime("%Y-%m-%d")
date_end_str   = DATE_END.strftime("%Y-%m-%d")

def _is_valid_fred_key(key: str) -> bool:
    return bool(re.fullmatch(r"[a-z0-9]{32}", key))

if not _is_valid_fred_key(FRED_API_KEY):
    raise ValueError(
        "FRED_API_KEY invalide: il faut exactement 32 caractères alphanumériques en minuscules. "
        f"Reçu: '{FRED_API_KEY}' (len={len(FRED_API_KEY)}). "
        "Corrige la clé (souvent un espace en trop)."
    )

series_ids = [
    "FEDFUNDS",
    "DGS1","DGS2","DGS5","DGS7","DGS10","DGS3MO",
    "T10Y2Y","T10Y3M",
    "NFCI","M2SL","CPIAUCSL","UNRATE",
    "INDPRO","MICH","PPIACO","CPIENGSL","BUSLOANS",
    "MPRIME","CPILFESL","TCU","UMCSENT", "CPIAUCSL", "NASDAQCOM"
]

fred = Fred(api_key=FRED_API_KEY)

# fonction qui trouve le dernier point non-NaN dans le mois
def last_valid_in_bucket(s: pd.Series):
    s = s.dropna()
    return s.iloc[-1] if not s.empty else np.nan

# détecter la fréquence officielle FRED
meta_rows = []
for sid in series_ids:
    try:
        info = fred.get_series_info(sid)
        freq = str(info.get("frequency", "")).strip()
        meta_rows.append({"series_id": sid, "frequency": freq})
    except Exception as e:
        # Si la métadonnée échoue, on traite par défaut comme haute fréquence
        meta_rows.append({"series_id": sid, "frequency": "UNKNOWN", "error": repr(e)})

meta = pd.DataFrame(meta_rows)
is_monthly = meta["frequency"].str.lower().str.startswith("monthly")
monthly_ids: List[str] = meta.loc[is_monthly, "series_id"].tolist()
hi_freq_ids: List[str] = meta.loc[~is_monthly, "series_id"].tolist()

# --- Étape 2 : télécharger et construire les deux DataFrame ---
# 2a) Mensuel natif
monthly_series: Dict[str, pd.Series] = {}
for sid in monthly_ids:
    try:
        s = fred.get_series(sid,
                            observation_start=date_start_str,
                            observation_end=date_end_str)
        s.index = pd.to_datetime(s.index)
        # Déjà mensuel → on s’aligne en fin de mois et on filtre par sécurité
        s = s.resample("M").last().loc[DATE_START:DATE_END]
        monthly_series[sid] = s.rename(sid)
    except Exception as e:
        print(f"[WARN] {sid}: {e}")

df_monthly_native = (
    pd.concat(monthly_series.values(), axis=1).sort_index()
    if monthly_series else pd.DataFrame()
)

# --- Étape 2b : séries haute fréquence → EOM (avec fenêtre) ---
hi_freq_series: Dict[str, pd.Series] = {}
for sid in hi_freq_ids:
    try:
        s = fred.get_series(sid,
                            observation_start=date_start_str,
                            observation_end=date_end_str)
        s.index = pd.to_datetime(s.index)
        # Agrégation fin de mois via "dernier point valide du mois"
        s_eom = s.resample("M").apply(last_valid_in_bucket).loc[DATE_START:DATE_END]
        hi_freq_series[sid] = s_eom.rename(sid)
    except Exception as e:
        print(f"[WARN] {sid}: {e}")

df_monthly_eom_from_highfreq = (
    pd.concat(hi_freq_series.values(), axis=1).sort_index()
    if hi_freq_series else pd.DataFrame()
)

# --- Fusion + filtre final de sécurité ---
fred_all = pd.concat(
    [df_monthly_native, df_monthly_eom_from_highfreq], axis=1
).sort_index().loc[DATE_START:DATE_END]


In [3]:
print(fred_all.head(5))
print(fred_all.tail(5))

            FEDFUNDS    M2SL  CPIAUCSL  UNRATE   INDPRO  MICH  PPIACO  \
1982-01-31     13.22  1770.4      94.4     8.6  48.7877   5.1    99.7   
1982-02-28     14.78  1774.5      94.7     8.9  49.7839   5.2    99.8   
1982-03-31     14.68  1786.5      94.7     9.0  49.4477   4.2    99.6   
1982-04-30     14.94  1803.9      95.0     9.3  48.9913   4.7    99.6   
1982-05-31     14.45  1815.4      95.9     9.4  48.6669   3.5    99.8   

            CPIENGSL  BUSLOANS  MPRIME  ...   DGS1   DGS2   DGS5   DGS7  \
1982-01-31     100.6  354.8455   15.75  ...  14.04  14.24  14.24  14.23   
1982-02-28      98.0  360.7137   16.56  ...  14.27  14.44  14.12  14.08   
1982-03-31      96.6  363.9746   16.50  ...  14.30  14.52  14.39  14.37   
1982-04-30      94.2  371.0565   16.50  ...  13.67  13.99  13.90  13.88   
1982-05-31      95.7  375.9823   16.50  ...  12.93  13.62  13.76  13.86   

            DGS10  DGS3MO  T10Y2Y  T10Y3M     NFCI  NASDAQCOM  
1982-01-31  14.14   13.08   -0.10    1.06  1.8

In [None]:
CACHE_DIR = Path("data")
CACHE_DIR.mkdir(parents=True, exist_ok=True)

cm = CacheManager(cache_dir=CACHE_DIR)

FILENAME = CACHE_DIR / "FRED_ALL.parquet"

# Sauvegarde correcte des deux DataFrames
cm.save_to_cache(fred_all, FILENAME)

In [7]:
# Inspection initiale
print("Shape :", fred_all.shape)
print("\nAperçu :")
display(fred_all.head())
 
print("\nInfo générale :")
fred_all.info()
 
print("\nStatistiques descriptives :")
display(fred_all.describe().T)
 
# Vérification des doublons
n_duplicates = fred_all.duplicated().sum()
print(f"\nNombre de lignes dupliquées : {n_duplicates}")
 
if n_duplicates > 0:
    fred_all = fred_all.drop_duplicates()
    print("Doublons supprimés.")
 
# Vérification des NaN
print("\nValeurs manquantes par colonne :")
missing = fred_all.isna().sum()
display(missing[missing > 0])
 
# % de NaN
print("\nPourcentage de valeurs manquantes :")
display((fred_all.isna().mean() * 100).sort_values(ascending=False))
 
# plt.figure(figsize=(10,4))
sns.heatmap(fred_all.isna(), cbar=False)
plt.title("Carte des valeurs manquantes")
plt.show()
 
 
# Vérification des types
print("\nTypes de données :")
print(fred_all.dtypes)
 
# Conversion automatique si certaines colonnes devraient être numériques
fred_all = fred_all.apply(pd.to_numeric, errors='ignore')
 
# Vérification d’index temporel
if isinstance(fred_all.index, pd.DatetimeIndex):
    print("\nIndex temporel détecté ")
    print(f"Fréquence estimée : {pd.infer_freq(fred_all.index)}")
else:
    print("\ Pas d'index temporel, vérifie la colonne 'date' ou équivalent.")

Shape : (525, 23)

Aperçu :


Unnamed: 0,FEDFUNDS,M2SL,CPIAUCSL,UNRATE,INDPRO,MICH,PPIACO,CPIENGSL,BUSLOANS,MPRIME,...,DGS1,DGS2,DGS5,DGS7,DGS10,DGS3MO,T10Y2Y,T10Y3M,NFCI,NASDAQCOM
1982-01-31,13.22,1770.4,94.4,8.6,48.7877,5.1,99.7,100.6,354.8455,15.75,...,14.04,14.24,14.24,14.23,14.14,13.08,-0.1,1.06,1.89649,188.39
1982-02-28,14.78,1774.5,94.7,8.9,49.7839,5.2,99.8,98.0,360.7137,16.56,...,14.27,14.44,14.12,14.08,14.03,13.0,-0.41,1.03,1.99715,179.43
1982-03-31,14.68,1786.5,94.7,9.0,49.4477,4.2,99.6,96.6,363.9746,16.5,...,14.3,14.52,14.39,14.37,14.18,13.99,-0.34,0.19,2.10028,175.65
1982-04-30,14.94,1803.9,95.0,9.3,48.9913,4.7,99.6,94.2,371.0565,16.5,...,13.67,13.99,13.9,13.88,13.87,13.15,-0.12,0.72,2.25169,184.7
1982-05-31,14.45,1815.4,95.9,9.4,48.6669,3.5,99.8,95.7,375.9823,16.5,...,12.93,13.62,13.76,13.86,13.71,11.97,0.09,1.74,2.57979,178.54



Info générale :
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 525 entries, 1982-01-31 to 2025-09-30
Freq: M
Data columns (total 23 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   FEDFUNDS   525 non-null    float64
 1   M2SL       525 non-null    float64
 2   CPIAUCSL   525 non-null    float64
 3   UNRATE     524 non-null    float64
 4   INDPRO     524 non-null    float64
 5   MICH       525 non-null    float64
 6   PPIACO     524 non-null    float64
 7   CPIENGSL   525 non-null    float64
 8   BUSLOANS   525 non-null    float64
 9   MPRIME     525 non-null    float64
 10  CPILFESL   525 non-null    float64
 11  TCU        524 non-null    float64
 12  UMCSENT    525 non-null    float64
 13  DGS1       525 non-null    float64
 14  DGS2       525 non-null    float64
 15  DGS5       525 non-null    float64
 16  DGS7       525 non-null    float64
 17  DGS10      525 non-null    float64
 18  DGS3MO     525 non-null    float64
 19  T10Y2Y

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
FEDFUNDS,525.0,3.945086,3.226451,0.05,0.98,4.1,5.8,14.94
M2SL,525.0,8164.966476,5984.594968,1770.4,3414.5,6079.7,11576.6,22212.5
CPIAUCSL,525.0,191.114091,60.397558,94.4,142.3,185.1,236.918,324.368
UNRATE,524.0,5.970802,1.825936,3.4,4.6,5.6,7.1,14.8
INDPRO,524.0,83.969242,18.096866,46.8713,64.335625,91.0977,99.45175,104.2115
MICH,525.0,3.17981,0.731158,0.4,2.8,3.0,3.3,6.6
PPIACO,524.0,158.638338,47.920488,99.3,118.675,139.75,197.225,280.251
CPIENGSL,525.0,164.927421,65.9062,82.1,103.1,142.9,217.488,332.281
BUSLOANS,525.0,1266.440331,743.640166,354.8455,630.8649,1016.2395,1730.8096,3035.7977
MPRIME,525.0,6.743048,2.868167,3.25,4.0,7.15,8.5,16.56



Nombre de lignes dupliquées : 0

Valeurs manquantes par colonne :


UNRATE    1
INDPRO    1
PPIACO    1
TCU       1
dtype: int64


Pourcentage de valeurs manquantes :


TCU          0.190476
UNRATE       0.190476
INDPRO       0.190476
PPIACO       0.190476
DGS1         0.000000
NFCI         0.000000
T10Y3M       0.000000
T10Y2Y       0.000000
DGS3MO       0.000000
DGS10        0.000000
DGS7         0.000000
DGS5         0.000000
DGS2         0.000000
FEDFUNDS     0.000000
UMCSENT      0.000000
M2SL         0.000000
CPILFESL     0.000000
MPRIME       0.000000
BUSLOANS     0.000000
CPIENGSL     0.000000
MICH         0.000000
CPIAUCSL     0.000000
NASDAQCOM    0.000000
dtype: float64

AttributeError: 'RcParams' object has no attribute '_get'

In [None]:

df["FEDFUNDS_minus_DGS3MO"] = df["FEDFUNDS"] - df["DGS3MO"]
df = df.drop(columns=["DGS3MO"])


  from .autonotebook import tqdm as notebook_tqdm
