In [175]:
import numpy as np
import pandas as pd

def load_data(file_path):
    # Charger les données : [temps, prix]
    data = pd.read_csv(file_path, skiprows=4)

    data.columns = ["Date", "Price"]

    #data["Date"] = pd.to_datetime(data["Date"], format="%m/%d/%Y")

    data = data.sort_values(by="Date")

    return data.to_numpy()

def select_sample(data, time_start, time_end):
    # Sélectionne l’échantillon principal
    mask = (data[:,0] >= time_start) & (data[:,0] <= time_end)
    return data[mask]

data = load_data("WTI_Spot_Price_daily.csv")

sample = select_sample(data, "2010-01-01", "2019-12-31")

In [176]:
import numpy as np
import pandas as pd

def load_data(file_path):
    # Charger les données : [temps, prix]
    data = pd.read_csv(file_path, skiprows=4)
    
    data.columns = ["Date", "Price"]
    
    # Convertir la colonne "Date" en datetime64 de NumPy
    data['Date'] = pd.to_datetime(data['Date'], format="%m/%d/%Y").values.astype('datetime64[D]')
    
    data = data.sort_values(by="Date")
    
    return data.to_numpy()

def select_sample(data, time_start, time_end):
    # Convertir time_start et time_end en datetime64
    time_start = np.datetime64(pd.to_datetime(time_start, format="%m/%d/%Y"))
    time_end = np.datetime64(pd.to_datetime(time_end, format="%m/%d/%Y"))
    
    # Sélectionne l’échantillon principal
    mask = (data[:,0] >= time_start) & (data[:,0] <= time_end)

    sample_data = data[mask]
    sample_data[:,0] = np.linspace(1, len(sample_data), len(sample_data))


    return sample_data.astype(float)

def generate_subintervals(data):
    """
    Génère les sous-intervalles suivant la logique du pseudo-code.
    - On calcule delta comme le max entre ((time_end - time_start)*0.75/21 jours) et 21 jours.
    - subinterval_end varie de time_end à time_end - 6 semaines (42 jours) par pas de 7 jours.
    - subinterval_start varie de time_start à time_end-(time_end-time_start)/4 par pas de delta.
    """

    time_start = data[0,0]
    time_end = data[-1,0]

    three_weeks = 21.0  # jours
    six_weeks = 42.0
    one_week = 7.0
    total_days = (time_end - time_start)
    delta = max((total_days * 0.75) / three_weeks, three_weeks)

    subintervals = []
    # Pour simplifier, on considère data[:,0] en jours continus
    for sub_end in np.arange(time_end, time_end - six_weeks, -one_week):
        for sub_start in np.arange(time_start, time_end - total_days/4, delta):
            mask = (data[:,0] >= sub_start) & (data[:,0] <= sub_end)
            sub_data = data[mask]            
            if len(sub_data) > 0:
                subintervals.append((sub_start, sub_end, sub_data))
    return subintervals




In [170]:
from numba import jit

def initialize_population(param_bounds, population_size):
    low = [v[0] for v in param_bounds.values()]
    high = [v[1] for v in param_bounds.values()]
    return np.random.uniform(low, high, size=(population_size, len(param_bounds)))

def selection(population, fitness):
    # Sélection par tournoi simple
    selected = []
    for _ in range(len(population)):
        i, j = np.random.choice(len(population), 2, replace=False)
        selected.append(population[i] if fitness[i] < fitness[j] else population[j])
    return np.array(selected)

def crossover(parents, prob):
    offspring = []
    for i in range(0, len(parents), 2):
        p1, p2 = parents[i], parents[i+1]
        if np.random.rand() < prob:
            cp = np.random.randint(1, len(p1))
            child1 = np.concatenate((p1[:cp], p2[cp:]))
            child2 = np.concatenate((p2[:cp], p1[cp:]))
            offspring += [child1, child2]
        else:
            offspring += [p1, p2]
    return np.array(offspring)

def mutate(offspring, prob, param_bounds):
    keys = list(param_bounds.keys())
    for i in range(len(offspring)):
        if np.random.rand() < prob:
            mp = np.random.randint(len(param_bounds))
            low, high = param_bounds[keys[mp]]
            offspring[i, mp] = np.random.uniform(low, high)
    return offspring

def immigration_operation(populations, fitness_values):
    # Meilleur de la pop m remplace le pire de la pop m+1
    for m in range(len(populations) - 1):
        f = fitness_values[m]
        best_idx = np.argmin(f)
        best_chrom = populations[m][best_idx]

        # Calculer fitness pour population m+1
        f_next = fitness_values[m+1]
        worst_idx = np.argmax(f_next)
        populations[m+1][worst_idx] = best_chrom
    return populations

def calculate_f_g(t_c, t, alpha, omega, phi):
    """
    Calculate the vectors f_j and g_j for a given t_c, alpha, omega, and phi.
    """
    
    f = (t_c - t) ** alpha
    g = f * np.cos(omega * np.log(t_c - t) + phi)
    return f, g

def predict_price(t, A, B, C, t_c, alpha, omega, phi):
    """
    Predict price using the LPPL model.
    
    Parameters:
    - t : ndarray : Time points
    - A, B, C : float : Linear parameters
    - t_c : float : Critical time
    - alpha : float : Power-law exponent
    - omega : float : Angular frequency
    - phi : float : Phase

    Returns:
    - predicted : ndarray : Predicted prices
    """
    # Calculate f and g
    f = (t_c - t) ** alpha
    g = f * np.cos(omega * np.log(t_c - t) + phi)
    
    # Predicted price using the LPPL model
    predicted = A + B * f + C * g
    
    return predicted

def calculate_linear_parameters(t, y, t_c, alpha, omega, phi):
    """
    Calculate the linear parameters A, B, and C using the LPPL model.
    """
    # Calculate f and g
    f, g = calculate_f_g(t_c, t, alpha, omega, phi)
    
    # Construct the V matrix
    V = np.column_stack((np.ones_like(f), f, g))
        
    # Compute the normal equations
    # (V_T @ V)^(-1) @ (V_T @ y)
    params = np.linalg.inv(V.T @ V) @ (V.T @ y)  # A, B, C
    
    return params  # [A, B, C]

def RSS(chromosome, data):

    t = np.array(data[:,0])
    y = np.array(data[:,1])


    t_c, alpha, omega, phi = chromosome

    A, B, C = calculate_linear_parameters(t, y, t_c, alpha, omega, phi)

    # Predicted price using the LPPL model
    predicted = predict_price(t, A, B, C, t_c, alpha, omega, phi)
    
    # Residual Sum of Squares
    rss = np.sum((y - predicted) ** 2)

    return rss

def calculate_fitness(population, data):
    fitness = np.empty(len(population))
    for i in range(len(population)):
        fitness[i] = RSS(population[i], data)
    return fitness


In [177]:
# Exemple d'utilisation
file_path = 'WTI_Spot_Price_daily.csv'
data = load_data(file_path)

time_start = "04/01/2003"  # date de début (exemple)
time_end = "11/14/2016"    # date de fin (exemple)

sample = select_sample(data, time_start, time_end)

subintervals = generate_subintervals(sample)

num_populations = 10
population_size = 100
MaxGen = 500
StopGen = 50
selection_probability = 0.9

# Paramètres globaux
best_solutions = []

# Boucle sur les sous-intervalles
for (sub_start, sub_end, sub_data) in subintervals:
    # Définir les bornes des paramètres pour le sous-intervalle actuel
    param_bounds = {
        "t_c": (sub_end, sub_end + 365*10),  # 10 ans après la fin du sample en jours
        "omega": (0, 40),
        "phi": (0, 2*np.pi),
        "alpha": (0.1, 0.9)
    }

    # Définir le crossover et mutation probability par population
    crossover_prob = np.random.uniform(0.001, 0.05, size=num_populations)
    mutation_prob = np.random.uniform(0.001, 0.05, size=num_populations)

    # Initialiser les populations
    populations = [initialize_population(param_bounds, population_size) for _ in range(num_populations)]

    # Calculer fitness initial
    fitness_values = []
    bestObjV = np.inf
    bestChrom = None

    for m in range(num_populations):
        fit = calculate_fitness(populations[m], sub_data)
        fitness_values.append(fit)
        # Minimum fitness pour cette pop
        local_min = np.min(fit)
        if local_min < bestObjV:
            bestObjV = local_min
            bestChrom = populations[m][np.argmin(fit)]

    # gen, gen0
    gen = 1
    gen0 = 0

    # Boucle du MPGA
    while gen0 < StopGen and gen <= MaxGen:

        print(f"Generation: {gen} | Best RSS: {bestObjV}")
        # Opérations génétiques
        new_populations = []
        new_fitness_values = []

        for m in range(num_populations):
            # Sélection
            fit = fitness_values[m]
            selected = selection(populations[m], fit)
            # Crossover
            offspring = crossover(selected, crossover_prob[m])
            # Mutation
            mutated = mutate(offspring, mutation_prob[m], param_bounds)
            new_populations.append(mutated)

        # Immigration
        populations = immigration_operation(new_populations, fitness_values)

        # Recalculer fitness
        fitness_values = []
        for m in range(num_populations):
            fit = np.array([RSS(ch, sub_data) for ch in populations[m]])
            fitness_values.append(fit)

        # Trouver le meilleur global du loop courant
        newbestObjV = np.inf
        for m in range(num_populations):
            local_min = np.min(fitness_values[m])
            if local_min < newbestObjV:
                newbestObjV = local_min
                newbestChrom = populations[m][np.argmin(fitness_values[m])]


        if newbestObjV < bestObjV:
            bestObjV = newbestObjV
            bestChrom = newbestChrom
            gen0 = 0
        else:
            gen0 += 1

        gen += 1

    # Sauvegarder le résultat pour ce sous-intervalle
    best_solutions.append((sub_start, sub_end, bestObjV, bestChrom))



Generation: 1 | Best RSS: 855410.5364557912
Generation: 2 | Best RSS: 855410.5364557912
Generation: 3 | Best RSS: 855410.5364557912
Generation: 4 | Best RSS: 855410.5364557912
Generation: 5 | Best RSS: 855410.5364557912
Generation: 6 | Best RSS: 855410.5364557912
Generation: 7 | Best RSS: 850769.2339469279
Generation: 8 | Best RSS: 850769.2339469279
Generation: 9 | Best RSS: 850769.2339469279
Generation: 10 | Best RSS: 850769.2339469279
Generation: 11 | Best RSS: 843671.0214954212
Generation: 12 | Best RSS: 843671.0214954212
Generation: 13 | Best RSS: 843671.0214954212
Generation: 14 | Best RSS: 842626.2516803108
Generation: 15 | Best RSS: 842626.2516803108
Generation: 16 | Best RSS: 842626.2516803108
Generation: 17 | Best RSS: 842626.2516803108
Generation: 18 | Best RSS: 842626.2516803108
Generation: 19 | Best RSS: 842626.2516803108
Generation: 20 | Best RSS: 842626.2516803108
Generation: 21 | Best RSS: 842626.2516803108
Generation: 22 | Best RSS: 842626.2516803108
Generation: 23 | Be

  params = np.linalg.inv(V.T @ V) @ (V.T @ y)  # A, B, C


Generation: 1 | Best RSS: 843517.968548886
Generation: 2 | Best RSS: 843517.968548886
Generation: 3 | Best RSS: 843517.968548886
Generation: 4 | Best RSS: 843517.968548886
Generation: 5 | Best RSS: 843517.968548886
Generation: 6 | Best RSS: 833847.5925519131
Generation: 7 | Best RSS: 833847.5925519131
Generation: 8 | Best RSS: 833847.5925519131
Generation: 9 | Best RSS: 833847.5925519131
Generation: 10 | Best RSS: 833847.5925519131
Generation: 11 | Best RSS: 833847.5925519131
Generation: 12 | Best RSS: 833847.5925519131
Generation: 13 | Best RSS: 833847.5925519131
Generation: 14 | Best RSS: 833847.5925519131
Generation: 15 | Best RSS: 833847.5925519131
Generation: 16 | Best RSS: 833847.5925519131
Generation: 17 | Best RSS: 833847.5925519131
Generation: 18 | Best RSS: 833847.5925519131
Generation: 19 | Best RSS: 833847.5925519131
Generation: 20 | Best RSS: 832812.9177291319
Generation: 21 | Best RSS: 832812.9177291319
Generation: 22 | Best RSS: 832812.9177291319
Generation: 23 | Best RS

In [193]:
data = best_solutions

# Convertir la liste de tuples en DataFrame
df = pd.DataFrame(data, columns=["sub_start", "sub_end", "bestObjV", "bestChjson"])

# Séparer l'array de la colonne 4 en 4 colonnes distinctes
df[['t_c', 'omega', 'phi', 'alpha']] = pd.DataFrame(df['bestChjson'].tolist(), index=df.index)

# Supprimer la colonne originale 'ArrayCol' si nécessaire
df.drop(columns=['bestChjson'], inplace=True)

df.to_csv("results.csv", index=False)

In [None]:
# Exemple d'utilisation
file_path = 'WTI_Spot_Price_daily.csv'
data = load_data(file_path)

time_start = "04/01/2003"  # date de début (exemple)
time_end = "11/14/2016"    # date de fin (exemple)

sample = select_sample(data, time_start, time_end)

subintervals = generate_subintervals(sample)

num_populations = 5
population_size = 100
MaxGen = 100
StopGen = 10
selection_probability = 0.9

# Paramètres globaux
best_solutions = []

# Boucle sur les sous-intervalles
for (sub_start, sub_end, sub_data) in subintervals:
    # Définir les bornes des paramètres pour le sous-intervalle actuel
    param_bounds = {
        "t_c": (sub_end, sub_end + 365*10),  # 10 ans après la fin du sample en jours
        "omega": (0, 40),
        "phi": (0, 2*np.pi),
        "alpha": (0.1, 0.9)
    }

    # Définir le crossover et mutation probability par population
    crossover_prob = np.random.uniform(0.001, 0.05, size=num_populations)
    mutation_prob = np.random.uniform(0.001, 0.05, size=num_populations)

    # Initialiser les populations
    populations = [initialize_population(param_bounds, population_size) for _ in range(num_populations)]

    # Calculer fitness initial
    fitness_values = []
    bestObjV = np.inf
    bestChrom = None

    for m in range(num_populations):
        fit = calculate_fitness(populations[m], sub_data)
        fitness_values.append(fit)
        # Minimum fitness pour cette pop
        local_min = np.min(fit)
        if local_min < bestObjV:
            bestObjV = local_min
            bestChrom = populations[m][np.argmin(fit)]

    # gen, gen0
    gen = 1
    gen0 = 0

    # Boucle du MPGA
    while gen0 < StopGen and gen <= MaxGen:
        # Opérations génétiques
        new_populations = []
        new_fitness_values = []

        for m in range(num_populations):
            # Sélection
            fit = fitness_values[m]
            selected = selection(populations[m], fit)
            # Crossover
            offspring = crossover(selected, crossover_prob[m])
            # Mutation
            mutated = mutate(offspring, mutation_prob[m], param_bounds)
            new_populations.append(mutated)

        # Immigration
        populations = immigration_operation(new_populations, fitness_values)

        # Recalculer fitness
        fitness_values = []
        for m in range(num_populations):
            print(f"Calculating fitness for population {m}...")
            fit = np.array([RSS(ch, sub_data) for ch in populations[m]])
            fitness_values.append(fit)

        # Trouver le meilleur global du loop courant
        current_best = np.inf
        for m in range(num_populations):
            local_min = np.min(fitness_values[m])
            if local_min < current_best:
                current_best = local_min

        if current_best < bestObjV:
            bestObjV = current_best
            gen0 = 0
        else:
            gen0 += 1

        gen += 1

    # Sauvegarder le résultat pour ce sous-intervalle
    best_solutions.append((sub_start, sub_end, bestObjV, bestChrom))


In [172]:
best_solutions

[(1.0,
  3427.0,
  837986.5726893762,
  array([3.78267668e+03, 5.58786971e-01, 3.81780106e-01, 3.22489512e-01])),
 (123.35714285714286,
  3427.0,
  848532.9596357916,
  array([5.41435390e+03, 2.04611505e-01, 2.27445319e+00, 8.60069319e-01])),
 (245.71428571428572,
  3427.0,
  777469.596100403,
  array([5.27859515e+03, 1.17867027e+00, 1.35788563e+00, 4.13169915e-01])),
 (368.07142857142856,
  3427.0,
  809648.142023571,
  array([6.79343461e+03, 1.11376129e+00, 2.98972261e+00, 6.34144207e-01])),
 (490.42857142857144,
  3427.0,
  762954.4788408489,
  array([3.99532106e+03, 1.13196876e+00, 2.61846949e+00, 5.47548651e-01])),
 (612.7857142857143,
  3427.0,
  739191.4137826505,
  array([4.12520304e+03, 6.04555501e-01, 9.49566551e-01, 2.84510017e-01])),
 (735.1428571428571,
  3427.0,
  729885.5881738679,
  array([5.26401989e+03, 7.08469353e-01, 3.96112834e+00, 4.23793194e-01])),
 (857.5,
  3427.0,
  753549.9994574463,
  array([4.74171776e+03, 3.65353667e-01, 6.04093193e+00, 8.79963728e-01])),


In [None]:
sub_data = sample

bestChrom = [4000, 0.5, 6.0, 3.14]

RSS(bestChrom, sub_data)

(3427,)
4000 0.5 6.0 3.14


TypeError: loop of ufunc does not support argument 0 of type int which has no callable log method

In [118]:
np.log(np.array(sample[:,1]))

TypeError: loop of ufunc does not support argument 0 of type float which has no callable log method

In [130]:
import numpy as np

# Exemple de tableau NumPy avec des valeurs positives
sample = sample.astype('float64')

# Appliquer np.log sur la deuxième colonne
log_values = np.log(sample[:, 1])
print(log_values)

[3.38371207 3.35165694 3.36901848 ... 3.79818219 3.770229   3.76792166]


In [126]:
sample

# Exemple de tableau NumPy avec des valeurs positives
sample_test = np.array([[1, 2], [2, 3], [3, 4]])
sample_test


array([[1, 2],
       [2, 3],
       [3, 4]])

In [129]:
sample.astype('float64')

array([[1.000e+00, 2.948e+01],
       [2.000e+00, 2.855e+01],
       [3.000e+00, 2.905e+01],
       ...,
       [3.425e+03, 4.462e+01],
       [3.426e+03, 4.339e+01],
       [3.427e+03, 4.329e+01]])

In [64]:
len(subintervals)

126

In [37]:
time_end = 9806
time_start = 0

three_weeks = 21.0  # jours
six_weeks = 42.0
one_week = 7.0
total_days = (time_end - time_start)
delta = max((total_days * 0.75) / three_weeks, three_weeks)

subintervals = []
# Pour simplifier, on considère data[:,0] en jours continus
for sub_end in np.arange(time_end, time_end - six_weeks, -one_week):
    for sub_start in np.arange(time_start, time_end - total_days/4, delta):
        sub_start = np.floor(sub_start)
        sub_end = np.floor(sub_end)

        sub_data = data[(data[:,0] >= sub_start) & (data[:,0] <= sub_end)]

0.0 9806.0
350.2142857142857 9806.0
700.4285714285714 9806.0
1050.642857142857 9806.0
1400.857142857143 9806.0
1751.0714285714287 9806.0
2101.285714285714 9806.0
2451.5 9806.0
2801.714285714286 9806.0
3151.9285714285716 9806.0
3502.1428571428573 9806.0
3852.357142857143 9806.0
4202.571428571428 9806.0
4552.785714285715 9806.0
4903.0 9806.0
5253.214285714286 9806.0
5603.428571428572 9806.0
5953.642857142857 9806.0
6303.857142857143 9806.0
6654.071428571428 9806.0
7004.285714285715 9806.0
0.0 9799.0
350.2142857142857 9799.0
700.4285714285714 9799.0
1050.642857142857 9799.0
1400.857142857143 9799.0
1751.0714285714287 9799.0
2101.285714285714 9799.0
2451.5 9799.0
2801.714285714286 9799.0
3151.9285714285716 9799.0
3502.1428571428573 9799.0
3852.357142857143 9799.0
4202.571428571428 9799.0
4552.785714285715 9799.0
4903.0 9799.0
5253.214285714286 9799.0
5603.428571428572 9799.0
5953.642857142857 9799.0
6303.857142857143 9799.0
6654.071428571428 9799.0
7004.285714285715 9799.0
0.0 9792.0
350.2

In [41]:
data

array([[Timestamp('1986-01-02 00:00:00'), 25.56],
       [Timestamp('1986-01-03 00:00:00'), 26.0],
       [Timestamp('1986-01-06 00:00:00'), 26.53],
       ...,
       [Timestamp('2024-12-05 00:00:00'), 68.58],
       [Timestamp('2024-12-06 00:00:00'), 68.58],
       [Timestamp('2024-12-09 00:00:00'), 68.65]], dtype=object)

In [40]:
len(subintervals)

126