In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, confusion_matrix, precision_score, recall_score, average_precision_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier


import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

In [2]:
data = pd.read_csv('./src/train.csv')
data.shape

(400000, 87)

In [3]:
data.head()

Unnamed: 0.1,Unnamed: 0,ID_PRODUCT,ID_SELLER,ID_SELLER_COUNTRY,SELLER_GEO_1,SELLER_GEO_2,SELLER_GEO_3,DEPOSIT_PRICE,INSERTION_PRICE,PRICE,...,TOTAL_TRK_1D,TOTAL_TRK_7D,TOTAL_TRK_30D,NB_DROP_SELLER_1D,NB_DROP_SELLER_7D,NB_DROP_SELLER_30D,NB_WIDTHDRAW_SELLER_1D,NB_WIDTHDRAW_SELLER_7D,NB_WIDTHDRAW_SELLER_30D,LABEL
0,0,f398701175db97ad9f9ae4f061a8c7d7ef4da505708f0b...,904fc91a25b0630028eaaf0941b228a62f9341eadde903...,1253e9373e781b7500266caa55150e08e210bc8cd8cc70...,7e3a78f9aa16d14453f363271db2973b903b3949684f0f...,cf62a64b8a54fd96e70623b69429a70e1ba0e0ef9b502c...,5a9cf672c8be6b5ab9546a2fb49b06dd81a4e364c86ed0...,78.0,78.0,67.0,...,0,8,26,0,0,0,0,0,0,0
1,1,7e0544c102ef705f3939dacb080bb23686355879c3ac77...,216fce1cec515e792bd2d5aa5c68ac84d8118ad11823fb...,eb624dbe56eb6620ae62080c10a273cab73ae8eca98ab1...,7e3a78f9aa16d14453f363271db2973b903b3949684f0f...,09fbaf8891f39040036484a565bfc3f832713ce3f2d22b...,7a1ca4ef7515f7276bae7230545829c27810c9d9e98ab2...,258.0,262.0,118.0,...,39,154,612,2,25,123,0,5,5,0
2,2,fe9ca89ffb93396c469674056158d6ddfe10e94efb3807...,562a34b067f011d9736069d692be44aeb624a7d8b6eba6...,eb624dbe56eb6620ae62080c10a273cab73ae8eca98ab1...,7e3a78f9aa16d14453f363271db2973b903b3949684f0f...,09fbaf8891f39040036484a565bfc3f832713ce3f2d22b...,7a1ca4ef7515f7276bae7230545829c27810c9d9e98ab2...,478.0,478.0,232.0,...,12,75,113,1,11,16,0,2,2,0
3,3,66832d4bbd55c568753a6ec237f8aa213c0cf55929a544...,90c4e50ebbc3dd146dc2852b2b8d428c23fb4eebfee02e...,56f4da26ed956730309fa1488611ee0f13b0ac95ebb1bc...,2099c82f0bcc1c13c9ecc9dd8848c23916cf0eea8f7eef...,9b202ecbc6d45c6d8901d989a918878397a3eb9d00e8f4...,49dca65f362fee401292ed7ada96f96295eab1e589c52e...,350.473498,354.473498,287.256198,...,4,6,8,0,0,4,0,0,1,0
4,4,72a3d1f2ed0a526408159da9bb5bb584790eed9ff6d074...,adb633b0e58e3969d4dc099e4b8beb734282f3bcbbd77d...,eb624dbe56eb6620ae62080c10a273cab73ae8eca98ab1...,7e3a78f9aa16d14453f363271db2973b903b3949684f0f...,09fbaf8891f39040036484a565bfc3f832713ce3f2d22b...,7a1ca4ef7515f7276bae7230545829c27810c9d9e98ab2...,91.0,91.0,72.0,...,18,91,373,2,8,33,0,0,4,0


# EDA

## CORRELATIONS

In [10]:
corr = data.corr()
label_corr = abs(corr['LABEL'].sort_values(ascending=False))
label_corr_best = label_corr[:20]
label_corr_best



LABEL                                 1.000000
NB_DAYS_SINCE_LAST_SOLD               0.130014
NB_DAYS_SINCE_LAST_ORDER              0.130011
NB_DAYS_SINCE_SELLER_REPLY_TO_MMAO    0.116948
NB_DAYS_SINCE_LAST_PUBLISHED          0.112946
NB_DAYS_SINCE_LAST_DEPOSITED          0.104858
NB_DAYS_SINCE_LAST_SESSION            0.097066
NB_DAYS_SINCE_LAST_BS_CHAT            0.084662
TOTAL_SELLER_CANCELLED                0.065889
FLAG2                                 0.062977
NON_RECEIVED_PCT                      0.062277
TOTAL_NON_RECEIVED                    0.056298
RECO_PRICE                            0.049686
PRICE                                 0.047094
NB_DAYS_SINCE_LAST_LIKES              0.043861
TOTAL_MMAO_TIMEOUT_7D                 0.034946
TOTAL_MMAO_TIMEOUT_1D                 0.031764
TOTAL_MMAO_TIMEOUT_30D                0.028987
TIME_ONLINE                           0.027985
MMAO_NB                               0.022464
Name: LABEL, dtype: float64

In [12]:
label_corr_best.index

Index(['LABEL', 'NB_DAYS_SINCE_LAST_SOLD', 'NB_DAYS_SINCE_LAST_ORDER',
       'NB_DAYS_SINCE_SELLER_REPLY_TO_MMAO', 'NB_DAYS_SINCE_LAST_PUBLISHED',
       'NB_DAYS_SINCE_LAST_DEPOSITED', 'NB_DAYS_SINCE_LAST_SESSION',
       'NB_DAYS_SINCE_LAST_BS_CHAT', 'TOTAL_SELLER_CANCELLED', 'FLAG2',
       'NON_RECEIVED_PCT', 'TOTAL_NON_RECEIVED', 'RECO_PRICE', 'PRICE',
       'NB_DAYS_SINCE_LAST_LIKES', 'TOTAL_MMAO_TIMEOUT_7D',
       'TOTAL_MMAO_TIMEOUT_1D', 'TOTAL_MMAO_TIMEOUT_30D', 'TIME_ONLINE',
       'MMAO_NB'],
      dtype='object')

In [13]:
columns_tokeep = ['LABEL', 'NB_DAYS_SINCE_LAST_SOLD', 'NB_DAYS_SINCE_LAST_ORDER',
       'NB_DAYS_SINCE_SELLER_REPLY_TO_MMAO', 'NB_DAYS_SINCE_LAST_PUBLISHED',
       'NB_DAYS_SINCE_LAST_DEPOSITED', 'NB_DAYS_SINCE_LAST_SESSION',
       'NB_DAYS_SINCE_LAST_BS_CHAT', 'TOTAL_SELLER_CANCELLED', 'FLAG2',
       'NON_RECEIVED_PCT', 'TOTAL_NON_RECEIVED', 'RECO_PRICE', 'PRICE',
       'NB_DAYS_SINCE_LAST_LIKES', 'TOTAL_MMAO_TIMEOUT_7D',
       'TOTAL_MMAO_TIMEOUT_1D', 'TOTAL_MMAO_TIMEOUT_30D', 'TIME_ONLINE',
       'MMAO_NB']

## VIZ

In [7]:
data.LABEL.value_counts()
# UNBALANCED

0    371713
1     28287
Name: LABEL, dtype: int64

In [9]:
# dataset avec que les annulations
data_canceled = data[data.LABEL == 1]

In [10]:
data_canceled

(28287, 87)

In [11]:
data_canceled.head()

Unnamed: 0.1,Unnamed: 0,ID_PRODUCT,ID_SELLER,ID_SELLER_COUNTRY,SELLER_GEO_1,SELLER_GEO_2,SELLER_GEO_3,DEPOSIT_PRICE,INSERTION_PRICE,PRICE,...,TOTAL_TRK_1D,TOTAL_TRK_7D,TOTAL_TRK_30D,NB_DROP_SELLER_1D,NB_DROP_SELLER_7D,NB_DROP_SELLER_30D,NB_WIDTHDRAW_SELLER_1D,NB_WIDTHDRAW_SELLER_7D,NB_WIDTHDRAW_SELLER_30D,LABEL
23,23,8373ef1944ebc531dde1f93e4f43515a921428e4a8e79f...,226100bf6e7d45eab72df76f52487ed7507a916bfacf4b...,9b871512327c09ce91dd649b3f96a63b7408ef267c8cc5...,7e3a78f9aa16d14453f363271db2973b903b3949684f0f...,e7d529decf2effa45e405e83edd0d9b4b82f6ad2a1b95d...,8d23a6e37e0a6431a8f1b43a91026dcff51170a89a6512...,176.418605,176.418605,148.028007,...,0,4,4,0,0,0,0,0,0,1
24,24,d069f7afaf58fc5d803167d93d0a4bba2a4ab1766787f9...,9cf57e4b0935e5326b9120ce5026e555a6e750bd704168...,1253e9373e781b7500266caa55150e08e210bc8cd8cc70...,7e3a78f9aa16d14453f363271db2973b903b3949684f0f...,cf62a64b8a54fd96e70623b69429a70e1ba0e0ef9b502c...,5a9cf672c8be6b5ab9546a2fb49b06dd81a4e364c86ed0...,27.0,23.0,27.0,...,0,0,4,0,0,0,0,0,0,1
29,29,50da26abb01449d336af4a0d55da8c3f50ffbfa2dc55e1...,cbb545c8e2ee3f42bd3cf4ad72a21fab9ece99ae12d11e...,9b871512327c09ce91dd649b3f96a63b7408ef267c8cc5...,7e3a78f9aa16d14453f363271db2973b903b3949684f0f...,e7d529decf2effa45e405e83edd0d9b4b82f6ad2a1b95d...,8d23a6e37e0a6431a8f1b43a91026dcff51170a89a6512...,83.560284,83.560284,61.952026,...,7,22,85,0,2,13,0,0,0,1
51,51,f61c10597f5743e57e005801a7516ffd97a190c038f8f9...,caf5e3de91854f68e49b52e470a54063b2002d3fbd3b92...,1253e9373e781b7500266caa55150e08e210bc8cd8cc70...,7e3a78f9aa16d14453f363271db2973b903b3949684f0f...,cf62a64b8a54fd96e70623b69429a70e1ba0e0ef9b502c...,5a9cf672c8be6b5ab9546a2fb49b06dd81a4e364c86ed0...,43.0,43.0,39.0,...,4,16,138,0,16,16,0,0,0,1
52,52,00b110c7196240a79fee563ec98e38b0a4823b6b8c147d...,73c4fa05d0cb6479e2e20dab4a8e48e48200715488529b...,eb624dbe56eb6620ae62080c10a273cab73ae8eca98ab1...,7e3a78f9aa16d14453f363271db2973b903b3949684f0f...,09fbaf8891f39040036484a565bfc3f832713ce3f2d22b...,7a1ca4ef7515f7276bae7230545829c27810c9d9e98ab2...,452.0,452.0,402.0,...,3,13,40,0,0,0,0,0,0,1


In [None]:
data[''].unique()

In [None]:
data_ratio = (data['XX'].value_counts(normalize=True)*100).rename_axis('XX').reset_index(name='counts')

In [None]:
fig = px.pie(data_ratio,
             values='counts',
             names='XX', 
             width= 1000,
             title='Proportion'
             )
fig.update_traces(textposition = 'outside', textfont_size = 15)             
fig.update_layout(title_x = 0.5, 
                  margin=dict(l=50,r=50,b=50,t=50,pad=4), 
                  template = 'plotly_dark'
                  )    
fig.show()

In [None]:
fig = px.violin(data, 
             x="xx",
             title= "distribution"
             )

fig.update_layout(title_x = 0.5,
                  autosize = True,
                  width=800,
                  height=500,
                  margin=dict(l=50,r=50,b=50,t=50,pad=4),
                  yaxis_title = 'age',
                  yaxis = {'visible': False},
                  xaxis_title = 'age difference',
                  template = 'plotly_dark'
                  )                              
fig.show()

In [None]:
data[''] = data[''].apply(lambda x: 'XX' if x > 0 else 'YY')

In [None]:
fig = px.histogram(data, x="",
                   title = 'TITLE',
                   color = '',
                   barmode ='group',
                   template='plotly_dark'
                   )     
fig.show()

In [None]:
fig = px.histogram(data, x = "XX",
                   title = 'TITLE',
                   color = 'nn',
                   barmode ='group',
                   width= 1000,
                   height = 600,
                   text_auto = True
                  )       
fig.update_traces(textposition = 'outside', textfont_size = 15)
fig.update_layout(title_x = 0.5,
                  margin=dict(l=50,r=50,b=50,t=50,pad=4),
                  yaxis = {'visible': False}, 
                  xaxis = {'visible': True}, 
                  xaxis_title = '',
                  template = 'plotly_dark'
                  )
fig.update_xaxes(tickfont_size=15)                     
fig.show()

# PREPROCESSING

In [12]:
data.head()

Unnamed: 0.1,Unnamed: 0,ID_PRODUCT,ID_SELLER,ID_SELLER_COUNTRY,SELLER_GEO_1,SELLER_GEO_2,SELLER_GEO_3,DEPOSIT_PRICE,INSERTION_PRICE,PRICE,...,TOTAL_TRK_1D,TOTAL_TRK_7D,TOTAL_TRK_30D,NB_DROP_SELLER_1D,NB_DROP_SELLER_7D,NB_DROP_SELLER_30D,NB_WIDTHDRAW_SELLER_1D,NB_WIDTHDRAW_SELLER_7D,NB_WIDTHDRAW_SELLER_30D,LABEL
0,0,f398701175db97ad9f9ae4f061a8c7d7ef4da505708f0b...,904fc91a25b0630028eaaf0941b228a62f9341eadde903...,1253e9373e781b7500266caa55150e08e210bc8cd8cc70...,7e3a78f9aa16d14453f363271db2973b903b3949684f0f...,cf62a64b8a54fd96e70623b69429a70e1ba0e0ef9b502c...,5a9cf672c8be6b5ab9546a2fb49b06dd81a4e364c86ed0...,78.0,78.0,67.0,...,0,8,26,0,0,0,0,0,0,0
1,1,7e0544c102ef705f3939dacb080bb23686355879c3ac77...,216fce1cec515e792bd2d5aa5c68ac84d8118ad11823fb...,eb624dbe56eb6620ae62080c10a273cab73ae8eca98ab1...,7e3a78f9aa16d14453f363271db2973b903b3949684f0f...,09fbaf8891f39040036484a565bfc3f832713ce3f2d22b...,7a1ca4ef7515f7276bae7230545829c27810c9d9e98ab2...,258.0,262.0,118.0,...,39,154,612,2,25,123,0,5,5,0
2,2,fe9ca89ffb93396c469674056158d6ddfe10e94efb3807...,562a34b067f011d9736069d692be44aeb624a7d8b6eba6...,eb624dbe56eb6620ae62080c10a273cab73ae8eca98ab1...,7e3a78f9aa16d14453f363271db2973b903b3949684f0f...,09fbaf8891f39040036484a565bfc3f832713ce3f2d22b...,7a1ca4ef7515f7276bae7230545829c27810c9d9e98ab2...,478.0,478.0,232.0,...,12,75,113,1,11,16,0,2,2,0
3,3,66832d4bbd55c568753a6ec237f8aa213c0cf55929a544...,90c4e50ebbc3dd146dc2852b2b8d428c23fb4eebfee02e...,56f4da26ed956730309fa1488611ee0f13b0ac95ebb1bc...,2099c82f0bcc1c13c9ecc9dd8848c23916cf0eea8f7eef...,9b202ecbc6d45c6d8901d989a918878397a3eb9d00e8f4...,49dca65f362fee401292ed7ada96f96295eab1e589c52e...,350.473498,354.473498,287.256198,...,4,6,8,0,0,4,0,0,1,0
4,4,72a3d1f2ed0a526408159da9bb5bb584790eed9ff6d074...,adb633b0e58e3969d4dc099e4b8beb734282f3bcbbd77d...,eb624dbe56eb6620ae62080c10a273cab73ae8eca98ab1...,7e3a78f9aa16d14453f363271db2973b903b3949684f0f...,09fbaf8891f39040036484a565bfc3f832713ce3f2d22b...,7a1ca4ef7515f7276bae7230545829c27810c9d9e98ab2...,91.0,91.0,72.0,...,18,91,373,2,8,33,0,0,4,0


In [18]:
features_exclu =['ID_PRODUCT','ID_SELLER','ID_SELLER_COUNTRY','SELLER_GEO_1','SELLER_GEO_2','SELLER_GEO_3']

In [55]:
data = pd.read_csv('train.csv',index_col=0)

In [56]:
data = data.drop(['ID_CONDITION','DEPOSIT_DEVICE','ID_COLOUR','ID_PATTERN','CURRENCY','ID_SUB_SUBCATEGORY','ID_MODEL','ID_MATERIAL','LANGUAGE','ID_UNIVERSE','ID_CATEGORY','ID_PRODUCT','ID_SELLER','ID_SELLER_COUNTRY','SELLER_GEO_1','SELLER_GEO_2','SELLER_GEO_3','SEGMENT','BRAND_GROUP','ID_BRAND','ID_PAGE','ID_SITE'],axis=1)

In [57]:
data.head()

Unnamed: 0,DEPOSIT_PRICE,INSERTION_PRICE,PRICE,RECO_PRICE,TIME_ONLINE,MMAO_NB,FLAG1,SELLER_AGE,SELLER_FROM_FIRST_SELL_TO_INVOICE,FLAG2,...,TOTAL_TRK_1D,TOTAL_TRK_7D,TOTAL_TRK_30D,NB_DROP_SELLER_1D,NB_DROP_SELLER_7D,NB_DROP_SELLER_30D,NB_WIDTHDRAW_SELLER_1D,NB_WIDTHDRAW_SELLER_7D,NB_WIDTHDRAW_SELLER_30D,LABEL
0,78.0,78.0,67.0,52.0,108.916667,5,0,108,107.0,0,...,0,8,26,0,0,0,0,0,0,0
1,258.0,262.0,118.0,94.0,142.083333,1,0,3705,3676.0,0,...,39,154,612,2,25,123,0,5,5,0
2,478.0,478.0,232.0,2.0,11.083333,4,1,399,393.0,0,...,12,75,113,1,11,16,0,2,2,0
3,350.473498,354.473498,287.256198,0.0,121.166667,0,1,1080,126.0,0,...,4,6,8,0,0,4,0,0,1,0
4,91.0,91.0,72.0,86.0,11.875,3,0,2356,1694.0,0,...,18,91,373,2,8,33,0,0,4,0


In [58]:
features = data.columns.values[:-1]
target = data.columns.values[-1]

In [59]:
X = data.loc[:, features]
Y = data.loc[:, target]

In [60]:
X

Unnamed: 0,DEPOSIT_PRICE,INSERTION_PRICE,PRICE,RECO_PRICE,TIME_ONLINE,MMAO_NB,FLAG1,SELLER_AGE,SELLER_FROM_FIRST_SELL_TO_INVOICE,FLAG2,...,NB_DAYS_SINCE_LAST_SESSION,TOTAL_TRK_1D,TOTAL_TRK_7D,TOTAL_TRK_30D,NB_DROP_SELLER_1D,NB_DROP_SELLER_7D,NB_DROP_SELLER_30D,NB_WIDTHDRAW_SELLER_1D,NB_WIDTHDRAW_SELLER_7D,NB_WIDTHDRAW_SELLER_30D
0,78.000000,78.000000,67.000000,52.0,108.916667,5,0,108,107.0,0,...,2.791667,0,8,26,0,0,0,0,0,0
1,258.000000,262.000000,118.000000,94.0,142.083333,1,0,3705,3676.0,0,...,0.791667,39,154,612,2,25,123,0,5,5
2,478.000000,478.000000,232.000000,2.0,11.083333,4,1,399,393.0,0,...,0.625000,12,75,113,1,11,16,0,2,2
3,350.473498,354.473498,287.256198,0.0,121.166667,0,1,1080,126.0,0,...,0.166667,4,6,8,0,0,4,0,0,1
4,91.000000,91.000000,72.000000,86.0,11.875000,3,0,2356,1694.0,0,...,0.541667,18,91,373,2,8,33,0,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399995,136.793597,136.793597,139.652614,156.0,20.583333,4,0,226,192.0,0,...,7.166667,0,4,31,0,0,0,0,0,0
399996,248.000000,252.000000,192.000000,,12.166667,0,0,267,241.0,0,...,0.916667,36,133,521,2,6,23,0,0,4
399997,288.000000,292.000000,208.000000,140.0,2.000000,1,0,3593,2978.0,0,...,0.833333,17,49,217,0,0,2,0,0,1
399998,72.000000,72.000000,63.000000,43.0,22.125000,7,0,542,533.0,0,...,0.000000,9,42,130,0,0,8,0,0,0


In [96]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42, stratify=Y)

In [97]:
My_list = [*range(1,63)]

In [94]:
Mylist = [0,1,2,3,4,5,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,56,57,58,59,60,61]

In [98]:
numeric_features = My_list
categorical_features = []

num_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
cat_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(drop='first'))
])
preprocess = ColumnTransformer(
    transformers=[
        ('num', num_transformer, numeric_features),
        ('cat', cat_transformer, categorical_features)
    ])
X_train = preprocess.fit_transform(X_train)

## Baseline

In [99]:
classifier = LogisticRegression(max_iter=100000) 
classifier.fit(X_train, Y_train)

LogisticRegression(max_iter=100000)

In [100]:
Y_train_pred = classifier.predict(X_train)

In [101]:
X_test = preprocess.transform(X_test)

In [102]:
Y_test_pred = classifier.predict(X_test)

In [103]:
print("f1-score on train set : ", f1_score(Y_train, Y_train_pred))
print("f1-score on test set : ", f1_score(Y_test, Y_test_pred))
print("precision-score on train set : ", precision_score(Y_train, Y_train_pred))
print("precision-score on test set : ", precision_score(Y_test, Y_test_pred))
print("average-precision-score on train set : ", average_precision_score(Y_train, Y_train_pred))
print("average-precision-score on test set : ", average_precision_score(Y_test, Y_test_pred))

f1-score on train set :  0.04324278220819651
f1-score on test set :  0.041402908468776735
precision-score on train set :  0.6190476190476191
precision-score on test set :  0.6436170212765957
average-precision-score on train set :  0.08300344892210088
average-precision-score on test set :  0.08296660059651194


## GridSearch

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
grid={"C":[10,20,30,40,50,100,200],'tol':[0.01,0.02,0.03,0.04,0.05,0.06]}
logreg=LogisticRegression()
grid_logreg=GridSearchCV(logreg,grid,cv=10)
grid_logreg.fit(X_train,Y_train)

print("tuned hpyerparameters :",grid_logreg.best_params_)
print("accuracy :",grid_logreg.best_score_)

## Confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
cm = confusion_matrix(Y_test, Y_test_pred, normalize="true")
_ , ax = plt.subplots()
ax.set(title="Normalized Confusion Matrix on Test set")
disp = ConfusionMatrixDisplay(cm).plot(ax=ax)

## Feature importance

In [None]:
from sklearn.feature_selection import chi2
feature_importance = chi2(X, y)
feature_importance 

In [None]:
feature_ranking = pd.DataFrame(columns=X.columns, data=feature_importance, 
                               index=["Chi2 Score", "P-value"]).transpose().sort_values("Chi2 Score")
feature_ranking = feature_ranking.reset_index().rename({'index': 'feature'}, axis = 1)
feature_ranking

In [None]:
px.bar(feature_ranking, y = 'feature', x = 'Chi2 Score')