### Imports

In [1]:
from natixis.deep_model import ExNet
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score, average_precision_score


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



Instructions for updating:
experimental_relax_shapes is deprecated, use reduce_retracing instead


Instructions for updating:
experimental_relax_shapes is deprecated, use reduce_retracing instead


### Checking class repartition

In [2]:
data = pd.read_csv('../data/new_dataset.csv')
print(len(data[data['Signal']==0]))
print(len(data[data['Signal']==1]))
print(len(data[data['Signal']==2]))

98903
101346
192096


### Load model (make sure it is the same parameters as in main.py)

In [3]:
# Build params
seed=0
n_experts = 5
spec_weight = 7.7e-4
entropy_weight = 4.2e-2
expert_architecture = [32, 32]
embedding_size = 32
dropout_rates = {"input": 0.1, "hidden": 0.5}
weight_decay = {"l1": 0.0, "l2": 0.0}
gamma = 2.5

# Fit params
n_epochs = 10
patience = 20
batch_size = 1024
learning_rate = 7.8e-4
optimizer = "nadam"
lookahead = True

# ===== Preparing data =====
data = pd.read_csv("../data/new_dataset.csv")
active_investors = data["company_short_name"].unique()
investor_mapping = dict(zip(active_investors, range(len(active_investors))))

data["investor_encoding"] = data["company_short_name"].apply(
    lambda x: investor_mapping[x]
)

n_investors = np.unique(data.investor_encoding.values).shape[0]

# Splitting data
indexes = np.arange(data.shape[0])
train_idx, test_idx = train_test_split(indexes, test_size=0.2, random_state=seed)
train_idx, val_idx = train_test_split(train_idx, test_size=0.2, random_state=seed)

features = list(data.columns[4:-1])

# Removing irrelevant columns - date, encoding, target & splits.
data = data[["Deal_Date", "investor_encoding", "Signal"] + features]

train_data = data.iloc[train_idx]
val_data = data.iloc[val_idx]
test_data = data.iloc[test_idx]

train_data_ = (
    train_data[features].values.astype(np.float32),
    train_data['investor_encoding'].values.astype(np.int32),
    pd.get_dummies(train_data.Signal).values.astype(np.float32),
)

val_data_ = (
    val_data[features].values.astype(np.float32),
    val_data['investor_encoding'].values.astype(np.int32),
    pd.get_dummies(val_data.Signal).values.astype(np.float32),
)

test_data_ = (
    test_data[features].values.astype(np.float32),
    test_data['investor_encoding'].values.astype(np.int32),
    pd.get_dummies(test_data.Signal).values.astype(np.float32),
)

model = ExNet(
    n_feats=len(features),
    output_dim=3,
    n_experts=n_experts,
    expert_architecture=expert_architecture,
    n_investors=n_investors,
    embedding_size=embedding_size,
    dropout_rates=dropout_rates,
    weight_decay={"l1": 0.0, "l2": 0.0},
    spec_weight=spec_weight,
    entropy_weight=entropy_weight,
    gamma=gamma,
    name=f"exnet",
)

model.fake_call()
model.load_weights('../models/exnet.h5')

### Calculating metrics

In [4]:
# Accuracy (correct prediction of signal for a given row)
predictions = model.predict(test_data_[0:2])
truth = np.argmax(test_data_[2], axis=1)

accuracy = accuracy_score(truth, np.argmax(predictions, axis=1))
print(accuracy)

0.7949891039773669


In [5]:
# Average Precision score
train_pred = model.predict(train_data_[0:2])
val_pred = model.predict(val_data_[0:2])
test_pred = model.predict(test_data_[0:2])

train_score = average_precision_score(train_data.Signal.values, train_pred)
val_score = average_precision_score(val_data.Signal.values, val_pred)
test_score = average_precision_score(test_data.Signal.values, test_pred)

print(
    f"train ap: {100*train_score:.2f} - val ap: {100*val_score:.2f} - test ap: {100*test_score:.2f}"
)

train ap: 82.40 - val ap: 81.56 - test ap: 81.75


In [6]:
# Client prediction
correct_buyers = 0
correct_sellers = 0
total_buyers = 0
total_sellers = 0

for i in range(len(test_data_[1])):
    # For buyers
    if sum(test_data_[2][i] == np.array([0., 0., 1.])) == 3:
        total_buyers += 1
        feats = test_data_[0][i]
        truth = test_data_[1][i]
        invests = np.arange(89, dtype='int32')
        feats_copied = np.tile(feats, (89, 1))
        to_pred = (feats_copied, invests)

        prediction = np.argmax(model.predict(to_pred)[:,2])
        if truth == prediction:
            correct_buyers += 1

    if sum(test_data_[2][i] == np.array([0., 1., 0.])) == 3:
        total_sellers += 1
        feats = test_data_[0][i]
        truth = test_data_[1][i]
        invests = np.arange(89, dtype='int32')
        feats_copied = np.tile(feats, (89, 1))
        to_pred = (feats_copied, invests)

        prediction = np.argmax(model.predict(to_pred)[:,1])
        if truth == prediction:
            correct_sellers += 1

print("Correct predictions for buyers: ", correct_buyers/total_buyers)
print("Correct predictions for sellers: ", correct_sellers/total_sellers)
print("Total correct predictions: ", (correct_buyers+correct_sellers)/(total_buyers+total_sellers))

Correct predictions for buyers:  0.3198589467679622
Correct predictions for sellers:  0.23321363569285258
Total correct predictions:  0.2899517794077696


In [7]:
# Top 3 clients prediction
correct_buyers = 0
correct_sellers = 0
total_buyers = 0
total_sellers = 0

for i in range(len(test_data_[1])):
    # For buyers
    if sum(test_data_[2][i] == np.array([0., 0., 1.])) == 3:
        total_buyers += 1
        feats = test_data_[0][i]
        truth = test_data_[1][i]
        invests = np.arange(89, dtype='int32')
        feats_copied = np.tile(feats, (89, 1))
        to_pred = (feats_copied, invests)

        top3_indices = np.argsort(model.predict(to_pred)[:, 2])[-3:]
        if truth in top3_indices:
            correct_buyers += 1

    if sum(test_data_[2][i] == np.array([0., 1., 0.])) == 3:
        total_sellers += 1
        feats = test_data_[0][i]
        truth = test_data_[1][i]
        invests = np.arange(89, dtype='int32')
        feats_copied = np.tile(feats, (89, 1))
        to_pred = (feats_copied, invests)

        top3_indices = np.argsort(model.predict(to_pred)[:, 1])[-3:]
        if truth in top3_indices:
            correct_sellers += 1

print("Correct predictions for buyers in top 3: ", correct_buyers/total_buyers)
print("Correct predictions for sellers in top 3: ", correct_sellers/total_sellers)
print("Total correct predictions in top 3: ", (correct_buyers+correct_sellers)/(total_buyers+total_sellers))

Correct predictions for buyers in top 3:  0.5182409832239998
Correct predictions for sellers in top 3:  0.40607014609670916
Total correct predictions in top 3:  0.4795232273838631


In [8]:
# Top 5 clients prediction
correct_buyers = 0
correct_sellers = 0
total_buyers = 0
total_sellers = 0

for i in range(len(test_data_[1])):
    # For buyers
    if sum(test_data_[2][i] == np.array([0., 0., 1.])) == 3:
        total_buyers += 1
        feats = test_data_[0][i]
        truth = test_data_[1][i]
        invests = np.arange(89, dtype='int32')
        feats_copied = np.tile(feats, (89, 1))
        to_pred = (feats_copied, invests)

        top5_indices = np.argsort(model.predict(to_pred)[:, 2])[-5:]
        if truth in top3_indices:
            correct_buyers += 1

    if sum(test_data_[2][i] == np.array([0., 1., 0.])) == 3:
        total_sellers += 1
        feats = test_data_[0][i]
        truth = test_data_[1][i]
        invests = np.arange(89, dtype='int32')
        feats_copied = np.tile(feats, (89, 1))
        to_pred = (feats_copied, invests)

        top5_indices = np.argsort(model.predict(to_pred)[:, 1])[-5:]
        if truth in top3_indices:
            correct_sellers += 1

print("Correct predictions for buyers in top 5: ", correct_buyers/total_buyers)
print("Correct predictions for sellers in top 5: ", correct_sellers/total_sellers)
print("Total correct predictions in top 5: ", (correct_buyers+correct_sellers)/(total_buyers+total_sellers))

Correct predictions for buyers in top 5:  0.5299089895506521
Correct predictions for sellers in top 5:  0.4122681883024251
Total correct predictions in top 5:  0.4893031784841076


In [9]:
# Top 10 clients prediction
correct_buyers = 0
correct_sellers = 0
total_buyers = 0
total_sellers = 0

for i in range(len(test_data_[1])):
    # For buyers
    if sum(test_data_[2][i] == np.array([0., 0., 1.])) == 3:
        total_buyers += 1
        feats = test_data_[0][i]
        truth = test_data_[1][i]
        invests = np.arange(89, dtype='int32')
        feats_copied = np.tile(feats, (89, 1))
        to_pred = (feats_copied, invests)

        top10_indices = np.argsort(model.predict(to_pred)[:, 2])[-10:]
        if truth in top3_indices:
            correct_buyers += 1

    if sum(test_data_[2][i] == np.array([0., 1., 0.])) == 3:
        total_sellers += 1
        feats = test_data_[0][i]
        truth = test_data_[1][i]
        invests = np.arange(89, dtype='int32')
        feats_copied = np.tile(feats, (89, 1))
        to_pred = (feats_copied, invests)

        top10_indices = np.argsort(model.predict(to_pred)[:, 1])[-10:]
        if truth in top3_indices:
            correct_sellers += 1

print("Correct predictions for buyers in top 10: ", correct_buyers/total_buyers)
print("Correct predictions for sellers in top 10: ", correct_sellers/total_sellers)
print("Total correct predictions in top 10: ", (correct_buyers+correct_sellers)/(total_buyers+total_sellers))

Correct predictions for buyers in top 10:  0.5299089895506521
Correct predictions for sellers in top 10:  0.4122681883024251
Total correct predictions in top 10:  0.4893031784841076
