In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import xgboost as xgb
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
import lightgbm as lgb

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, \
    confusion_matrix, roc_auc_score

In [2]:
path = '../RFQ_Data_Challenge_HEC.csv'
data = pd.read_csv(path)
data.head()

  data = pd.read_csv(path)


Unnamed: 0,Deal_Date,ISIN,cusip,Instrument,Sales_Name,Sales_Initial,company_short_name,B_Price,B_Side,Total_Requested_Volume,...,MidYTM,YTWDate,SpreadvsBenchmarkMid,MidASWSpread,MidZSpread,GSpreadMid,MidModifiedDuration,MidConvexity,MidEffectiveDuration,MidEffectiveConvexity
0,2022-09-15,DE000A14J587,D8397TBT2,TKAGR 25/2/2025 2.500,Blanca Tailpied,BLA,Bnp Reunion,0.0,NATIXIS SELL,77551,...,5.57551,,417.014771,286.230682,295.597916,430.414978,2.23491,8.41587,2.11347,6.82874
1,2022-09-15,DE000A14J587,D8397TBT2,TKAGR 25/2/2025 2.500,Blanca Tailpied,BLA,Cegi Ett,0.0,NATIXIS SELL,8163,...,5.57551,,417.014771,286.230682,295.597916,430.414978,2.23491,8.41587,2.11347,6.82874
2,2022-09-15,DE000A14J587,D8397TBT2,TKAGR 25/2/2025 2.500,Blanca Tailpied,BLA,Qbe,0.0,NATIXIS SELL,20408,...,5.57551,,417.014771,286.230682,295.597916,430.414978,2.23491,8.41587,2.11347,6.82874
3,2022-09-15,DE000A14J587,D8397TBT2,TKAGR 2.5% 25 FEB 2025,Blanca Tailpied,BLA,Cegi Ett,0.0,NATIXIS SELL,16327,...,5.57551,,417.014771,286.230682,295.597916,430.414978,2.23491,8.41587,2.11347,6.82874
4,2022-09-15,DE000A14J587,D8397TBT2,TKAGR 2.5% 25 FEB 2025,Constantin Durie,CON,Scp Laureau-Jeannerot,0.0,NATIXIS SELL,244898,...,5.57551,,417.014771,286.230682,295.597916,430.414978,2.23491,8.41587,2.11347,6.82874


In [3]:
def preprocess_dataframe(df):
    """
    Preprocesses the input DataFrame with the following steps:
    1. Converts 'Deal_Date', 'maturity', columns to datetime.
    2. Converts 'B_Side' column to boolean.
    3. Converts 'B_Price' and 'Total_Requested_Volume' integers.
    4. Fills null values in 'Tier', 'AssumedMaturity'.
    5. Converts 'Frequency' feature values into integers.
    6. Drops the unused 'Cusip' column.
    Parameters:
    - df (DataFrame): Input DataFrame.

    Returns:
    - DataFrame: Processed DataFrame.
    """

    df = df.copy()

    # Drop null values only for columns below the threshold
    columns_to_delete_null_values = [
        'MidYTM', 'Coupon', 'Ccy', 'cusip',
        'cdcissuerShortName', 'Frequency', 'MidPrice', 'cdcissuer',
        'company_short_name', 'BloomIndustrySubGroup', 'B_Price',
        'Total_Traded_Volume_Natixis', 'B_Side',
        'Total_Traded_Volume_Away', 'Total_Requested_Volume',
        'Total_Traded_Volume', 'Type', 'Maturity', 'ISIN', 'Deal_Date']
    df = df.dropna(subset=columns_to_delete_null_values)

    df['Maturity'] = pd.to_datetime(df['Maturity'])


    # Convert 'B_Price', 'Total_Requested_Volume', 'Frequency' to integers
    df['Frequency'] = df['Frequency'].str.replace('M', '')
    numerical_columns = ['B_Price', 'Total_Requested_Volume', 'Frequency']
    df.dropna(subset=numerical_columns, inplace=True)
    for column in numerical_columns:
        df[column] = pd.to_numeric(df[column], errors='coerce').astype(int)

    # Fix the error in the B_Price column
    df = df[df['B_Price'] >= 20]

    # Replace NaT with null values in the 'Maturity' column
    df['maturity'].replace({pd.NaT: np.nan}, inplace=True)

    # Convert 'Deal_Date', 'maturity', 'AssumedMaturity', 'YTWDate' to datetime
    df['Deal_Date'] = pd.to_datetime(df['Deal_Date'])
    df['maturity'] = pd.to_datetime(df['maturity'], errors='coerce',                
                                    format='%Y-%m-%d %H:%M:%S.%f')
    df['AssumedMaturity'] = pd.to_datetime(df['AssumedMaturity'],
                                           errors='coerce')
    df['YTWDate'] = pd.to_datetime(df['YTWDate'], errors='coerce')

    # Add year, month, day for clustering
    df['Year_dealdate'] = df['Deal_Date'].dt.year
    df['Month_dealdate'] = df['Deal_Date'].dt.month
    df['Day_dealdate'] = df['Deal_Date'].dt.day
    df['Year_maturity'] = df['Maturity'].dt.year
    df['Month_maturity'] = df['Maturity'].dt.month
    df['Day_maturity'] = df['Maturity'].dt.day


    # Delete maturities smaller than 2021 (as deal dates start in 2021)
    df = df[df['Maturity'].dt.year >= 2021]

    # Compute the number of days between maturity and deal date
    df['Days_to_Maturity'] = (df['Maturity'] - df['Deal_Date']).dt.days

    # Replace null values in 'AssumedMaturity' with values from 'Maturity'
    df['AssumedMaturity'] = df['AssumedMaturity'].fillna(df['Maturity'])

    # Convert 'B_Side' column to boolean (1 for ' BUY', 0 for ' SELL')
    df = df[df['B_Side'].isin(['NATIXIS SELL', 'NATIXIS BUY'])]
    df['B_Side'] = df['B_Side'].replace({'NATIXIS BUY': 1, 'NATIXIS SELL': 0})

    # Convert null values of 'Tier'
    df['Tier'].fillna('UNKNOWN', inplace=True)

    # Lower string names
    df['Sales_Name'] = df['Sales_Name'].str.lower()
    df['company_short_name'] = df['company_short_name'].str.lower()

    # Drop unused columns
    columns_to_drop = ['maturity','Cusip']
    df.drop(columns=columns_to_drop, inplace=True)

    return df

In [4]:
# Apply preprocessing function

df = preprocess_dataframe(data)

In [5]:
def column_encoding(df):

    """
    Perform column encoding and data transformations on the input DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame containing financial data.

    Returns:
    pd.DataFrame: Transformed DataFrame with column encoding.
    """

    # List of columns to delete
    columns_to_del = ['cusip', 'Instrument', 'Sales_Name', 'Sales_Initial',
                      'Total_Traded_Volume_Natixis',
                      'Total_Traded_Volume_Away',
                      'Total_Traded_Volume', 'cdcissuer',
                      'Tier']

    df.drop(columns=columns_to_del, axis=1, inplace=True)

    # Transform 'Ccy' to 'is_euro' boolean column
    df['is_euro'] = (df['Ccy'] == 'EUR').astype(int)

    # Transform 'Type' to 'is_fixed' boolean column
    df['is_fixed'] = (df['Type'] == 'Fixed').astype(int)

    # Drop the original 'Ccy' and 'Type' columns
    df = df.drop(['Ccy', 'Type'], axis=1, errors='ignore')

    # Ordinal encoding for 'Rating_Fitch'
    rating_mapping = {
        'AAA': 22,
        'AA+': 21,
        'AA': 20,
        'AA-': 19,
        'A+': 18,
        'A': 17,
        'A-': 16,
        'BBB+': 15,
        'BBB': 14,
        'BBB-': 13,
        'BB+': 12,
        'BB': 11,
        'BB-': 10,
        'B+': 9,
        'B': 8,
        'B-': 7,
        'CCC+': 6,
        'CCC': 5,
        'CCC-': 4,
        'CC': 3,
        'C': 2,
        'WD': 1,
        'D': 0,
        'NR': np.nan
    }

    rating_mapping_moodys = {
        'Aaa': 22,
        'Aa1': 21,
        'Aa2': 20,
        '(P)Aa2': 20,
        'Aa3': 19,
        '(P)Aa3': 19,
        'A1': 18,
        '(P)A1': 18,
        'A2': 17,
        '(P)A2': 17,
        'A3': 16,
        '(P)A3': 16,
        'Baa1': 15,
        '(P)Baa1': 15,
        'Baa2': 14,
        '(P)Baa2': 14,
        'Baa3': 13,
        'Ba1': 12,
        'Ba2': 11,
        'Ba3': 10,
        'B1': 9,
        'B2': 8,
        'B3': 7,
        'Caa1': 6,
        'Caa2': 5,
        'Caa3': 4,
        'Ca': 2.5,
        'C': 0
    }

    df['Rating_Fitch_encoded'] = df['Rating_Fitch'].map(rating_mapping)
    df['Rating_SP_encoded'] = df['Rating_SP'].map(rating_mapping)
    df['Rating_Moodys_encoded'] = df['Rating_Moodys'].map(
        rating_mapping_moodys
        )

    # Create a unique Rating that averages the 3 Ratings
    df['Rating'] = df[['Rating_Fitch_encoded', 'Rating_SP_encoded',
                       'Rating_Moodys_encoded']].mean(axis=1)
    df.drop(columns=['Rating_Fitch', 'Rating_SP',
                     'Rating_Moodys'], axis=1, inplace=True)

    # List of countries to encode
    encode_countries = ['ITALY', 'FRANCE', 'GERMANY', 'NETHERLANDS', 'BELGIUM']

    # Use the apply function with a lambda function to update the 'country'
    df['Country'] = df['Country'].apply(
        lambda x: x if x in encode_countries else 'Other')

    df = pd.get_dummies(df,
                        columns=['Classification', 'Country'], prefix='Class')

    # Convert booleans to numeric in df
    def convert_boolean_to_binary(df):
        for column in df.columns:
            # Check if the column contains boolean values
            if df[column].dtype == bool:
                # Convert boolean to binary integer (True -> 1, False -> 0)
                df[column] = df[column].astype(int)
        return df

    df = convert_boolean_to_binary(df)

    return df

In [6]:
# Apply encoding function
df = column_encoding(df)

In [7]:
df.sort_values(by=['Deal_Date'], inplace=True)
df.head()

Unnamed: 0,Deal_Date,ISIN,company_short_name,B_Price,B_Side,Total_Requested_Volume,BloomIndustrySector,BloomIndustryGroup,BloomIndustrySubGroup,cdcissuerShortName,...,Class_Mortgage Finance,Class_Technology,Class_Telecommunications Services,Class_Utilities,Class_BELGIUM,Class_FRANCE,Class_GERMANY,Class_ITALY,Class_NETHERLANDS,Class_Other
566145,2021-09-16,XS2379486884,groupe bpce,99,0,81633,Financial,Diversified Finan Serv,Finance-Other Services,INVENTIVGL,...,0,0,0,0,0,0,0,0,0,1
490605,2021-09-16,XS2236362849,groupe bpce,101,1,81633,Industrial,Electronics,Electronic Compo-Misc,FOXFAREAST,...,0,1,0,0,0,0,0,0,0,1
341068,2021-09-17,XS1751178499,groupe bpce,100,1,81633,Financial,Real Estate,Real Estate Oper/Develop,CNTYGDHD,...,0,0,0,0,0,0,0,0,0,1
210758,2021-09-23,XS2225678312,gegc,99,1,408163,Consumer,Non-cyclical,Food,Food-Flour&Grain,...,0,0,0,0,0,0,0,0,0,1
478979,2021-09-29,XS1165659514,bnp paribas,98,0,122449,Financial,Investment Companies,Investment Companies,HUARONGFIN,...,0,0,0,0,0,0,0,0,0,1


## Modelling

In [9]:
# Encode categorical target variable
label_encoder = LabelEncoder()
df['y_encoded'] = label_encoder.fit_transform(df['company_short_name'])

# Split the data into features (X) and target (y) for Buy and Sell side
X = df.drop(['company_short_name', 'y_encoded',
             'Deal_Date', 'Maturity', 'YTWDate', 'AssumedMaturity'], axis=1)

y_buy = df.loc[df['B_Side'] == 1, 'y_encoded']
y_sell = df.loc[df['B_Side'] == 0, 'y_encoded']

for col in X.select_dtypes(include=['object']).columns:
    X[col] = label_encoder.fit_transform(X[col])

X_buy = X[X['B_Side'] == 1]
X_sell = X[X['B_Side'] == 0]

In [10]:
# Split the data into training and testing sets
X_train_buy, X_test_buy, y_train_buy, y_test_buy = \
    train_test_split(X_buy, y_buy, test_size=0.2, random_state=42)
X_train_sell, X_test_sell, y_train_sell, y_test_sell = \
    train_test_split(X_sell, y_sell, test_size=0.2, random_state=42)

## XGBoost

#### For Buy Side

In [11]:
# Define the XGBoost classifier with handling categorical variables
xgb_classifier = xgb.XGBClassifier(objective='multi:softmax',
                                   num_class=len(df['company_short_name']
                                                .unique()),
                                   eval_metric='mlogloss')

# Train the classifier
xgb_classifier.fit(X_train_buy, y_train_buy)

# Make predictions on the testing set
y_pred = xgb_classifier.predict(X_test_buy)

# Evaluate the model
accuracy = accuracy_score(y_test_buy, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:\n', classification_report(y_test_buy, y_pred))

# Get class probabilities for calculating AUC
y_probs = xgb_classifier.predict_proba(X_test_buy)

# Calculate AUC score
auc_score = roc_auc_score(y_test_buy, y_probs, multi_class='ovr')
print(f'AUC Score: {auc_score:.2f}')

Accuracy: 0.37
Classification Report:
               precision    recall  f1-score   support

           0       0.10      0.02      0.04       135
           1       0.00      0.00      0.00        51
           2       0.26      0.23      0.24       147
           3       0.15      0.07      0.10        68
           4       0.00      0.00      0.00        38
           5       0.44      0.41      0.42       324
           6       0.16      0.08      0.11       121
           7       0.00      0.00      0.00        17
           8       0.21      0.15      0.18       145
           9       0.00      0.00      0.00        25
          10       0.51      0.56      0.53       317
          11       0.20      0.12      0.15        43
          12       0.51      0.49      0.50       368
          13       0.11      0.05      0.07       104
          14       0.10      0.06      0.08        16
          15       0.19      0.09      0.12       171
          16       0.13      0.10      0.1

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


AUC Score: 0.86


#### For Sell Side

In [12]:
# Train the classifier
xgb_classifier.fit(X_train_sell, y_train_sell)

# Make predictions on the testing set
y_pred = xgb_classifier.predict(X_test_sell)

# Evaluate the model
accuracy = accuracy_score(y_test_sell, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:\n', classification_report(y_test_sell,
                                                        y_pred))

# Get class probabilities for calculating AUC
y_probs = xgb_classifier.predict_proba(X_test_sell)

# Calculate AUC score
auc_score = roc_auc_score(y_test_sell, y_probs, multi_class='ovr')
print(f'AUC Score: {auc_score:.2f}')

Accuracy: 0.38
Classification Report:
               precision    recall  f1-score   support

           0       0.04      0.01      0.01       118
           1       0.09      0.03      0.04       107
           2       0.27      0.29      0.28       189
           3       0.19      0.13      0.16        82
           4       0.27      0.11      0.15       112
           5       0.31      0.30      0.31       369
           6       0.12      0.05      0.07       169
           7       0.27      0.14      0.19        21
           8       0.19      0.17      0.18       131
           9       0.44      0.48      0.46        81
          10       0.46      0.48      0.47       365
          11       0.26      0.25      0.26       158
          12       0.42      0.32      0.36       371
          13       0.14      0.04      0.06        81
          14       0.07      0.04      0.05        25
          15       0.26      0.12      0.16       194
          16       0.23      0.26      0.2

## CatBoost


#### For Buy Side

In [13]:
# Define the CatBoost classifier with handling categorical variables
catboost_classifier = CatBoostClassifier(iterations=100,
                                         loss_function='MultiClass',
                                         eval_metric='Accuracy')

# Train the classifier
catboost_classifier.fit(X_train_buy, y_train_buy)

# Make predictions on the testing set
y_pred = catboost_classifier.predict(X_test_buy)

# Evaluate the model
accuracy = accuracy_score(y_test_buy, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:\n', classification_report(y_test_buy,
                                                        y_pred))

# Get class probabilities for calculating AUC
y_probs = catboost_classifier.predict_proba(X_test_buy)

auc_score = roc_auc_score(y_test_buy, y_probs, multi_class='ovr')
print(f'AUC Score: {auc_score:.2f}')

Learning rate set to 0.5
0:	learn: 0.2458244	total: 1.24s	remaining: 2m 2s
1:	learn: 0.1043505	total: 2.18s	remaining: 1m 47s
2:	learn: 0.2072596	total: 3.04s	remaining: 1m 38s
3:	learn: 0.1890940	total: 3.94s	remaining: 1m 34s
4:	learn: 0.1402306	total: 4.82s	remaining: 1m 31s
5:	learn: 0.1806769	total: 5.67s	remaining: 1m 28s
6:	learn: 0.2304095	total: 6.57s	remaining: 1m 27s
7:	learn: 0.1710164	total: 7.46s	remaining: 1m 25s
8:	learn: 0.2120789	total: 8.32s	remaining: 1m 24s
9:	learn: 0.1571089	total: 9.21s	remaining: 1m 22s
10:	learn: 0.1732280	total: 10.1s	remaining: 1m 21s
11:	learn: 0.2169751	total: 11s	remaining: 1m 20s
12:	learn: 0.1821733	total: 11.9s	remaining: 1m 19s
13:	learn: 0.2201769	total: 12.9s	remaining: 1m 19s
14:	learn: 0.1862553	total: 13.9s	remaining: 1m 19s
15:	learn: 0.1952336	total: 15.1s	remaining: 1m 19s
16:	learn: 0.2062364	total: 16.2s	remaining: 1m 18s
17:	learn: 0.1915917	total: 17.2s	remaining: 1m 18s
18:	learn: 0.1715886	total: 18.2s	remaining: 1m 17s


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


AUC Score: 0.64


#### For Sell Side

In [14]:
# Train the classifier
catboost_classifier.fit(X_train_sell, y_train_sell)

# Make predictions on the testing set 
y_pred = catboost_classifier.predict(X_test_sell)

# Evaluate the model
accuracy = accuracy_score(y_test_sell, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:\n', classification_report(y_test_sell, y_pred))

# Get class probabilities for calculating AUC
y_probs = catboost_classifier.predict_proba(X_test_sell)

auc_score = roc_auc_score(y_test_sell, y_probs, multi_class='ovr')  
print(f'AUC Score: {auc_score:.2f}')

Learning rate set to 0.5
0:	learn: 0.3332460	total: 2.06s	remaining: 3m 23s
1:	learn: 0.1126664	total: 3.47s	remaining: 2m 50s
2:	learn: 0.2489519	total: 4.92s	remaining: 2m 38s
3:	learn: 0.2801577	total: 6.33s	remaining: 2m 32s
4:	learn: 0.1456454	total: 7.8s	remaining: 2m 28s
5:	learn: 0.2951604	total: 9.24s	remaining: 2m 24s
6:	learn: 0.2103057	total: 10.8s	remaining: 2m 23s
7:	learn: 0.1627870	total: 12.4s	remaining: 2m 22s
8:	learn: 0.2858858	total: 14.1s	remaining: 2m 22s
9:	learn: 0.2488849	total: 15.7s	remaining: 2m 21s
10:	learn: 0.2632111	total: 17.4s	remaining: 2m 20s
11:	learn: 0.1207710	total: 18.9s	remaining: 2m 18s
12:	learn: 0.2735582	total: 20.6s	remaining: 2m 18s
13:	learn: 0.2723517	total: 22.2s	remaining: 2m 16s
14:	learn: 0.1554868	total: 24.1s	remaining: 2m 16s
15:	learn: 0.2549786	total: 25.7s	remaining: 2m 15s
16:	learn: 0.2753802	total: 27.4s	remaining: 2m 13s
17:	learn: 0.2578182	total: 29s	remaining: 2m 11s
18:	learn: 0.1556940	total: 30.8s	remaining: 2m 11s


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       118
           1       0.00      0.00      0.00       107
           2       0.00      0.02      0.01       189
           3       0.00      0.00      0.00        82
           4       0.00      0.00      0.00       112
           5       0.02      0.02      0.02       369
           6       0.00      0.00      0.00       169
           7       0.00      0.00      0.00        21
           8       0.00      0.00      0.00       131
           9       0.04      0.01      0.02        81
          10       0.07      0.03      0.04       365
          11       0.00      0.00      0.00       158
          12       0.01      0.00      0.00       371
          13       0.00      0.00      0.00        81
          14       0.00      0.00      0.00        25
          15       0.00      0.00      0.00       194
          16       0.09      0.03      0.05      1603
   

## LightGBM

#### For Buy Side

In [15]:
# Define the LightGBM classifier with handling categorical variables
lgb_classifier = lgb.LGBMClassifier(objective='multiclass',
                                    num_class=len(df['company_short_name']
                                                  .unique()),
                                    metric='multi_logloss')

# Train the classifier
lgb_classifier.fit(X_train_buy, y_train_buy)

# Make predictions on the testing set
y_pred = lgb_classifier.predict(X_test_buy)

# Evaluate the model
accuracy = accuracy_score(y_test_buy, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:\n', classification_report(y_test_buy, y_pred))

# Get class probabilities for calculating AUC
y_probs = lgb_classifier.predict_proba(X_test_buy)

auc_score = roc_auc_score(y_test_buy, y_probs, multi_class='ovr')
print(f'AUC Score: {auc_score:.2f}')

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.015357 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4281
[LightGBM] [Info] Number of data points in the train set: 90886, number of used features: 50
[LightGBM] [Info] Start training from score -5.206761
[LightGBM] [Info] Start training from score -5.872184
[LightGBM] [Info] Start training from score -5.177085
[LightGBM] [Info] Start training from score -5.891908
[LightGBM] [Info] Start training from score -6.534559
[LightGBM] [Info] Start training from score -4.387388
[LightGBM] [Info] Start training from score -5.264629
[LightGBM] [Info] Start training from score -7.154681
[LightGBM] [Info] Start training from score -4.854917
[LightGBM] [Info] Start training from score -7.126902
[LightGBM] [Info] Start training from score -4.239579
[LightGBM] [Info] Start training from score -6.197005
[LightGBM] [Info] Start training from score -4.077824
[LightGBM

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


AUC Score: 0.50


#### For Sell Side

In [16]:
# Train the classifier
lgb_classifier.fit(X_train_sell, y_train_sell)

# Make predictions on the testing set
y_pred = lgb_classifier.predict(X_test_sell)

# Evaluate the model
accuracy = accuracy_score(y_test_sell, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:\n', classification_report(y_test_sell, y_pred))

# Get class probabilities for calculating AUC
y_probs = lgb_classifier.predict_proba(X_test_sell)

auc_score = roc_auc_score(y_test_sell, y_probs, multi_class='ovr')
print(f'AUC Score: {auc_score:.2f}')

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013651 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4259
[LightGBM] [Info] Number of data points in the train set: 164104, number of used features: 50
[LightGBM] [Info] Start training from score -5.851277
[LightGBM] [Info] Start training from score -5.953816
[LightGBM] [Info] Start training from score -5.355393
[LightGBM] [Info] Start training from score -6.086677
[LightGBM] [Info] Start training from score -5.830312
[LightGBM] [Info] Start training from score -4.889429
[LightGBM] [Info] Start training from score -5.341299
[LightGBM] [Info] Start training from score -7.745576
[LightGBM] [Info] Start training from score -5.754427
[LightGBM] [Info] Start training from score -6.194125
[LightGBM] [Info] Start training from score -4.677850
[LightGBM] [Info] Start training from score -5.46290

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


AUC Score: 0.50
