In [5]:
import numpy as np

In [8]:
x = np.array([2, 3, 1, -2, -1, 4])
x.clip(min = 0)

array([2, 3, 1, 0, 0, 4])

In [3]:
def cost_function(X, y, model, risk_reward_ratio = 2, leverage = 2, transaction_cost=0.01):
    
    y_pred = model.predict(X)
    
    num_trades = len(y_pred)
    
    total_cost = 0
    total_profit = 0
    num_winning_trades = 0
    
    for i in range(num_trades):
        
        if y_pred[i] == 1:
            position_size = leverage / model.predict_proba(X)[i][1]
        else:
            position_size = leverage / model.predict_proba(X)[i][0]
        
        # Montant de la perte potentielle pour ce trade
        potential_loss = position_size * X[i]['Price'] * (1 - y[i]) * risk_reward_ratio
        
        # Montant de la recompense potentielle pour ce trade
        potential_reward = position_size * X[i]['Price'] * y[i] * risk_reward_ratio
        
        # Cout de la transaction
        trade_cost = position_size * X[i]['Price'] * transaction_cost
        
        # Verifier si le trade est gagnant ou perdant
        if y_pred[i] == y[i]:
            total_profit += potential_reward
            num_winning_trades += 1
        else:
            total_profit -= potential_loss
        
        total_cost += trade_cost
    
    total_profit -= total_cost

Pour que la fonction de coût puisse être optimisée avec GridSearchCV, il faut la définir sous la forme d'une fonction qui prend comme arguments les hyperparamètres à optimiser et les données d'entraînement. Voici un exemple de code qui illustre cela :

In [2]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

def custom_loss(y_true, y_pred, risk_reward_ratio, leverage):
    
    diff = y_pred - y_true
    
    # Perte potentielle en cas de stop loss
    potential_loss = diff.clip(max = 0)
    
    # Gain potentiel en cas de take profit
    potential_gain = diff.clip(min = 0) * risk_reward_ratio
    
    # Gain potentiel en utilisant l'effet de levier si la probabilite de gagner est elevee
    proba_win = (y_pred > y_true).mean()
    if proba_win > 0.5:
        potential_gain *= leverage
    
    # Gain final
    gain = potential_gain.sum() + potential_loss.sum()
    
    return gain


scorer = make_scorer(custom_loss, greater_is_better=True,
                     risk_reward_ratio=2, leverage = 1)

def optimize_cost_function(X_train, y_train, risk_reward_ratio, leverage):
    
    model = RandomForestClassifier()
    model.fit(X_train, y_train)
    
    # Score de la fonction de cout a l'aide de la validation croise
    scores = cross_val_score(model, X_train, y_train,
                             cv = 6, scoring = scorer)
    
    return scores.mean()

In [4]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'risk_reward_ratio' : [1, 2, 3],
    'leverage' : [1, 3, 5, 10]
}

In [None]:
estimator = GridSearchCV(
    model, param_grid = param_grid,
    scoring = make_scorer(custom_loss, greater_is_better=True),
    cv = 5,
    n_jobs = -1
)

estimator.fit(X_train, y_train)

# Affichage des meilleurs hyperparamètres et du meilleur score
print("Meilleurs hyperparamètres : ", estimator.best_params_)
print("Meilleur score : ", estimator.best_score_)

## Avec le critere de Kelly

In [None]:
import numpy as np

def kelly_position_size(prob_win, prob_loss, reward_to_risk_ratio, capital):
    """
    Fonction qui calcule la taille de position optimale en utilisant le modèle de Kelly.
    
    Parameters:
    prob_win (float): Probabilité de gagner.
    prob_loss (float): Probabilité de perdre.
    reward_to_risk_ratio (float): Ratio de récompense sur risque.
    capital (float): Capital disponible.
    
    Returns:
    float: La taille de position optimale en pourcentage du capital disponible.
    """
    b = reward_to_risk_ratio
    p = prob_win
    q = prob_loss
    
    Kelly_fraction = ((p * b) - q) / b
    position_size = kelly_fraction * capital
    
    return position_size

def compute_cost(predictions, actual, prob_win, reward_to_risk_ratio, capital):
    diff = actual * predictions
    potential_gain = diff.clip(min = 0) * reward_to_risk_ratio
    potential_loss = diff.clip(max = 0)
    
    total_gain = np.sum(potential_gain)
    total_loss = np.sum(potential_loss)
    net_gain = total_gain + total_loss
    
    position_size = kelly_position_size(prob_win, 1 - prob_win, reward_to_risk_ratio, capital)
    cost = net_gain / (position_size * capital)
    
    return cost
    

In [None]:
from sklearn.model_selection import GridSearchCV

def cost_function_wrapper(params):
    prob_win, reward_to_risk_ratio, capital = params
    return compute_cost(predictions, actual, prob_win, reward_to_risk_rat)

# Définir les paramètres à optimiser
param_grid = {
    'prob_win': [0.5, 0.6, 0.7, 0.8, 0.9],
    'reward_to_risk_ratio': [1, 2, 3, 4, 5],
    'capital': [1000, 5000, 10000, 50000]
}

# Créer l'objet GridSearchCV
grid_search = GridSearchCV(estimator=None, param_grid=param_grid, scoring=cost_function_wrapper)

# Lancer la recherche en grille
grid_search.fit(X_train, y_train)

# Afficher les meilleurs paramètres trouvés
print(grid_search.best_params_)

## Critere de kelly avec levier

In [None]:
def kelly_criterion_leverage(proba_win, risk_reward_ratio, leverage):
    
    kelly_fraction = (proba_win * (risk_reward_ratio + 1) - 1) / risk_reward_ratio
    position_size = kelly_fraction * leverage

# Esperance de gain

In [None]:
def pps_cout(y_true, y_pred, equity, kelly_factor, prev_gain, prev_losses):
    
    proba_win = (y_pred > y_true).mean()
    proba_loss = 1 - proba_win
    risk_reward_ratio = abs(np.nanpercentile(y_true, 5)) / abs(np.nanpercentile(y_true, 95))
    
    if equity == 0:
        position_size = 0
    else:
        if prev_gains + prev_losses > 0:
            win_ratio = prev_gains / (prev_gains + prev_losses)
            position_size = kelly_factor * (2 * win_ratio * proba_win - proba_loss) / risk_reward_ratio
        else:
            position_size = kelly_factor * (2 * proba_win - proba_loss) / risk_reward_ratio
    
    potential_gain = (y_pred - y_true) * position_size
    potential_loss = (y_pred - y_true) * position_size
    
    return -potential_gain.mean() + 0.5 np.square(np.nan_to_num(potential_loss)).mean()

In [None]:
kelly_factor_values = [0.1, 0.2, 0.3, 0.4, 0.5]

prev_gains_values = [0, 10, 20, 30, 40]
prev_losses_values = [0, 10, 20, 30, 40]

param_grid = {'kelly_factor' : kelly_factor_values,
              'prev_gains' : prev_gains_values,
              'prev_losses' : prev_losses_values}

grid_search = GridSearchCV(estimator=None, param_grid=param_grid, cv=5, scoring=pps_cout)

grid_search.fit(X_train, y_train, equity=100)

# Afficher les meilleurs hyperparamètres et leur score
print("Meilleurs hyperparamètres : ", grid_search.best_params_)
print("Score : ", -grid_search.best_score_)