In [1]:
import pandas as pd 
import numpy as np

In [3]:
df=pd.read_csv("Natixis.csv",sep=";")
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True, errors='coerce')
df['Cours de Natixis'] = (
    df['Cours de Natixis']
    .astype(str)                       # transformer en string pour manipuler
    .str.replace("€", "", regex=False) # enlever symbole euro si présent
    .str.replace(" ", "", regex=False) # enlever les espaces
    .str.replace(",", ".", regex=False) # remplacer virgule par point
)

df['Cours de Natixis'] = pd.to_numeric(df['Cours de Natixis'], errors='coerce')

In [5]:
df['Rendement'] = (
    df['Cours de Natixis'].shift(-1) - df['Cours de Natixis']
) / df['Cours de Natixis']

In [7]:
df_sorted = df.sort_values(by="Rendement", ascending=True)

In [15]:
df_sorted["Gain"] = 0
df_sorted["Perte"] = 0

df_sorted.loc[df_sorted["Rendement"] >= 0, "Gain"] = df_sorted["Rendement"]
df_sorted.loc[df_sorted["Rendement"] < 0, "Perte"] = df_sorted["Rendement"]

In [17]:
df_sorted.head()

Unnamed: 0,Date,Cours de Natixis,Rendement,Gain,Perte
377,2016-06-23,4.15,-0.171325,0.0,-0.171325
344,2016-05-09,4.386,-0.069083,0.0,-0.069083
1015,2018-12-18,4.453,-0.063328,0.0,-0.063328
357,2016-05-26,4.752,-0.061448,0.0,-0.061448
404,2016-08-01,3.643,-0.060664,0.0,-0.060664


In [19]:
df_sorted.tail()

Unnamed: 0,Date,Cours de Natixis,Rendement,Gain,Perte
163,2015-08-24,5.405,0.064755,0.064755,0.0
402,2016-07-28,3.425,0.075912,0.075912,0.0
325,2016-04-12,4.437,0.078431,0.078431,0.0
590,2017-04-21,5.793,0.090281,0.090281,0.0
1022,2018-12-31,4.119,,0.0,0.0


In [23]:
import math

gains = df_sorted["Gain"][df_sorted["Gain"] > 0].values
pertes = np.abs(df_sorted["Perte"][df_sorted["Perte"] < 0].values)  # valeur absolue

# Trier les séries en ordre croissant (nécessaire pour la formule)
gains_sorted = np.sort(gains)
pertes_sorted = np.sort(pertes)

def pickands_estimator(data_sorted, k):
    """Calcule l'estimateur de Pickands pour une série triée (croissante).
       Retourne np.nan si 4*k >= n (condition d'utilisation)."""
    n = len(data_sorted)
    if 4 * k >= n or k <= 0:
        return np.nan
    # indices : Python 0-based, correspondances avec la notation théorique
    x1 = data_sorted[n - k]      # X_{n-k+1}
    x2 = data_sorted[n - 2*k]    # X_{n-2k+1}
    x4 = data_sorted[n - 4*k]    # X_{n-4k+1}
    # protéger contre division par zéro ou valeurs non valides
    denom = (x2 - x4)
    numer = (x1 - x2)
    if denom <= 0 or numer <= 0:
        return np.nan
    xi_hat = (1.0 / np.log(2.0)) * np.log(numer / denom)
    return xi_hat

# --- Définition des k selon sqrt(n) et ln(n) ---
def k_grid_by_root_and_log(n, multipliers_root=[0.5,1,2], multipliers_log=[1,2,3]):
    """Construit une liste de k basée sur sqrt(n) et ln(n) en appliquant quelques multiplicateurs.
       Renvoie une sorted unique list d'entiers k >= 1."""
    k_set = set()
    sqrt_n = math.sqrt(n)
    ln_n = math.log(n) if n > 1 else 1.0
    for m in multipliers_root:
        k = int(max(1, round(m * sqrt_n)))
        k_set.add(k)
    for m in multipliers_log:
        k = int(max(1, round(m * ln_n)))
        k_set.add(k)
    # ajouter quelques valeurs voisines pour diagnostic
    for delta in (-2, -1, 1, 2):
        for base in list(k_set):
            kk = base + delta
            if kk >= 1:
                k_set.add(kk)
    k_list = sorted(k_set)
    return k_list

# Préparer k pour chaque queue (doit tenir compte de la taille n)
n_g = len(gains_sorted)
n_p = len(pertes_sorted)

k_vals_gains = k_grid_by_root_and_log(n_g, multipliers_root=[0.5,1,2], multipliers_log=[1,2])
k_vals_pertes = k_grid_by_root_and_log(n_p, multipliers_root=[0.5,1,2], multipliers_log=[1,2])

# Filtrer k pour respecter 4k < n (sinon np.nan apparaîtra mais on peut nettoyer)
k_vals_gains = [k for k in k_vals_gains if 4*k < n_g]
k_vals_pertes = [k for k in k_vals_pertes if 4*k < n_p]

# --- Calcul des estimateurs ---
xi_gains = [pickands_estimator(gains_sorted, k) for k in k_vals_gains]
xi_pertes = [pickands_estimator(pertes_sorted, k) for k in k_vals_pertes]

# --- Présentation synthétique sous forme de DataFrame ---
df_gains = pd.DataFrame({"k": k_vals_gains, "xi_pickands": xi_gains})
df_pertes = pd.DataFrame({"k": k_vals_pertes, "xi_pickands": xi_pertes})

print("Gains (taille n = {}) :".format(n_g))
print(df_gains.to_string(index=False))
print("\nPertes (val.abs) (taille n = {}) :".format(n_p))
print(df_pertes.to_string(index=False))

Gains (taille n = 498) :
 k  xi_pickands
 3     0.187453
 4     0.265881
 5     0.617418
 6     0.577234
 7    -0.514279
 8    -0.200566
 9     0.037896
10    -0.221849
11    -0.494691
12    -0.481445
13    -0.163641
14    -0.160102
15    -0.375464
19    -0.187875
20     0.269286
21     0.481509
22     0.719551
23     0.734107
24     0.841917
25     0.770575
42    -0.428528
43    -0.411161
44    -0.579568
45    -0.544749
46    -0.587489
47    -0.637922
48    -0.695223

Pertes (val.abs) (taille n = 519) :
 k  xi_pickands
 3    -0.288217
 4    -0.114751
 5    -0.594509
 6    -0.508972
 7    -0.681349
 8    -0.657596
 9    -0.696109
10    -0.224965
11    -0.375021
12    -0.586676
13    -0.334516
14    -0.264639
15    -0.158809
16     0.096698
20     0.174917
21     0.308702
22     0.408191
23     0.390648
24     0.359029
25     0.374371
26     0.200996
43     0.013559
44     0.007342
45     0.038555
46     0.147013
47     0.270476
48     0.228684
49     0.261319
