# Import Packages

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from IPython.display import Image
import pydot
from six import StringIO
from sklearn.tree import export_graphviz
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb

# Initialize The Data Source

In [None]:
from google.colab import drive
 
drive.mount("/content/gdrive")

# Read Train Dataset

In [None]:
train_data = pd.read_csv("/content/gdrive/My Drive/data/train_ver2.csv")

# Train Data Filtering

In [None]:
train_data = train_data[(train_data['fecha_dato'] == '2015-05-28') | (train_data['fecha_dato'] == '2016-05-28')]

In [None]:
train_data.head()

In [None]:
train_data.info()

In [None]:
train_data.columns

# Read Test Dataset

In [None]:
test_data = pd.read_csv("/content/gdrive/My Drive/data/test_ver2.csv")

In [None]:
test_data.head()

In [None]:
test_data.columns

In [None]:
test_data.info()

# Train Data Backup

In [None]:
train_df = pd.DataFrame(data=train_data, columns=train_data.columns, index=train_data.index)

In [None]:
train_df.head()

# Test Data Backup

In [None]:
test_df = pd.DataFrame(data=test_data, columns=test_data.columns, index=test_data.index)

In [None]:
test_df.head()

# Remove NAN Values

In [None]:
train_df.columns

In [None]:
test_df.columns

In [None]:
test_df.info()

In [None]:
train_df.drop(['conyuemp', 'ult_fec_cli_1t'], inplace=True, axis=1)
test_df.drop(['conyuemp', 'ult_fec_cli_1t'], inplace=True, axis=1)
train_df.dropna(inplace=True)
test_df.dropna(inplace=True)

In [None]:
train_df.info()

In [None]:
test_df.info()

# Train Data Analysing

In [None]:
initial_products_label = ['ind_ahor_fin_ult1','ind_aval_fin_ult1','ind_cco_fin_ult1','ind_cder_fin_ult1','ind_cno_fin_ult1','ind_ctju_fin_ult1','ind_ctma_fin_ult1','ind_ctop_fin_ult1','ind_ctpp_fin_ult1','ind_deco_fin_ult1','ind_deme_fin_ult1','ind_dela_fin_ult1','ind_ecue_fin_ult1','ind_fond_fin_ult1','ind_hip_fin_ult1','ind_plan_fin_ult1','ind_pres_fin_ult1','ind_reca_fin_ult1','ind_tjcr_fin_ult1','ind_valo_fin_ult1','ind_viv_fin_ult1','ind_nomina_ult1','ind_nom_pens_ult1','ind_recibo_ult1']

In [None]:
products_df = train_df.loc[:, initial_products_label]

In [None]:
products_df[products_df['ind_nomina_ult1'] == 1]

In [None]:
products_df.drop(['ind_nomina_ult1', 'ind_nom_pens_ult1'], inplace=True, axis=1)

In [None]:
products_df

In [None]:
products_label = products_df.columns

In [None]:
products = [i for i in range(len(products_label))]

In [None]:
products_dict = dict()

for i in range(len(products)):
  products_dict[products_label[i]] = products[i]

In [None]:
products_df.info()

In [None]:
products_df.describe()

In [None]:
for i in range(1, len(products_label)):
  products_df[products_label[i]] = products_df[products_label[i]].replace(1, products_dict[products_label[i]])

In [None]:
products_df['target'] = products_df.max(axis=1)

In [None]:
np.unique(products_df['target'])

In [None]:
len(np.unique(products_df['target']))

In [None]:
len(products_label)

# Add Train Targets

In [None]:
x_train_df = pd.DataFrame(data=train_df, columns=train_df.columns, index=train_df.index)

In [None]:
x_train_df.drop(initial_products_label, inplace=True, axis=1)

In [None]:
x_train_df['target'] = products_df['target']

In [None]:
x_test_df = pd.DataFrame(data=test_df, columns=test_df.columns, index=test_df.index)

# Data PreProcessing

In [None]:
x_train_df.head()

In [None]:
x_train_df.describe()

In [None]:
x_train_df.info()

In [None]:
x_train_df.isnull().any()

In [None]:
x_test_df.isnull().any()

In [None]:
x_train_df['fecha_dato'] = pd.to_datetime(x_train_df["fecha_dato"])
x_test_df['fecha_dato'] = pd.to_datetime(x_test_df["fecha_dato"])

In [None]:
x_train_df['fecha_alta'] = pd.to_datetime(x_train_df["fecha_alta"])
x_test_df['fecha_alta'] = pd.to_datetime(x_test_df["fecha_alta"])

In [None]:
x_train_df.dtypes

In [None]:
x_test_df.dtypes

In [None]:
train_age_df = x_train_df.loc[:, 'age']

train_age_df = train_age_df.astype(str)

train_age_df = train_age_df.str.replace(" ", "")
train_age_df = train_age_df.astype(int)

x_train_df['age'] = train_age_df

In [None]:
test_age_df = x_test_df.loc[:, 'age']

test_age_df = test_age_df.astype(str)

test_age_df = test_age_df.str.replace(" ", "")
test_age_df = test_age_df.astype(int)

x_test_df['age'] = test_age_df

In [None]:
train_new_columns = ['transactional_date', 'customer_code', 'employee_index', 'country_of_residence', 'gender', 'age', 'dof_contract',
                'new_customer_index', 'seniority', 'customer_type', 'cust_type_at_beg', 'cust_relation_at_beg', 'residency_index', 'foreign_index',
                'channel_used', 'decreased_index', 'address_type', 'province_code', 'province_name', 'activity_index', 'gross_income', 'segmentation', 'target'
                ]

test_new_columns = ['transactional_date', 'customer_code', 'employee_index', 'country_of_residence', 'gender', 'age', 'dof_contract',
                'new_customer_index', 'seniority', 'customer_type', 'cust_type_at_beg', 'cust_relation_at_beg', 'residency_index', 'foreign_index',
                'channel_used', 'decreased_index', 'address_type', 'province_code', 'province_name', 'activity_index', 'gross_income', 'segmentation'
                ]

In [None]:
# beg -> beginnning
# cust -> customer

x_train_df.columns = train_new_columns

In [None]:
x_test_df.columns = test_new_columns

In [None]:
x_train_df.describe()

In [None]:
x_train_df.info()

In [None]:
x_train_df['employee_index'].unique()

In [None]:
x_test_df['employee_index'].unique()

In [None]:
labelEncoder = LabelEncoder()

In [None]:
labelEncoder.fit_transform(x_train_df['employee_index'])
labelEncoder.fit_transform(x_test_df['employee_index'])

In [None]:
np.unique(labelEncoder.fit_transform(x_train_df['employee_index']))

In [None]:
x_train_df['employee_index'] = labelEncoder.fit_transform(x_train_df['employee_index'])
x_test_df['employee_index'] = labelEncoder.fit_transform(x_test_df['employee_index'])

In [None]:
x_train_df['new_customer_index'] = x_train_df['new_customer_index'].astype(int)
x_test_df['new_customer_index'] = x_test_df['new_customer_index'].astype(int)

In [None]:
x_train_df['customer_type'] = x_train_df['customer_type'].astype(int)
x_test_df['customer_type'] = x_test_df['customer_type'].astype(int)

In [None]:
x_train_df['cust_type_at_beg'] = x_train_df['cust_type_at_beg'].astype(float)
x_test_df['cust_type_at_beg'] = x_test_df['cust_type_at_beg'].astype(float)

In [None]:
x_train_df['cust_type_at_beg'] = x_train_df['cust_type_at_beg'].apply(int)
x_test_df['cust_type_at_beg'] = x_test_df['cust_type_at_beg'].apply(int)

In [None]:
x_train_df.drop(['address_type'], inplace=True, axis=1)
x_test_df.drop(['address_type'], inplace=True, axis=1)

In [None]:
x_train_df.info()

In [None]:
x_train_df['province_code'] = x_train_df['province_code'].apply(int)

In [None]:
x_train_df['activity_index'] = x_train_df['activity_index'].apply(int)

In [None]:
x_test_df['province_code'] = x_test_df['province_code'].apply(int)
x_test_df['activity_index'] = x_test_df['activity_index'].apply(int)

In [None]:
x_train_df.info()

In [None]:
x_train_df['country_of_residence'] = labelEncoder.fit_transform(x_train_df['country_of_residence'])
x_train_df['gender'] = labelEncoder.fit_transform(x_train_df['gender'])
x_train_df['cust_relation_at_beg'] = labelEncoder.fit_transform(x_train_df['cust_relation_at_beg'])
x_train_df['residency_index'] = labelEncoder.fit_transform(x_train_df['residency_index'])
x_train_df['products'] = labelEncoder.fit_transform(x_train_df['target'])
x_train_df['foreign_index'] = labelEncoder.fit_transform(x_train_df['foreign_index'])
x_train_df['decreased_index'] = labelEncoder.fit_transform(x_train_df['decreased_index'])
x_train_df['segmentation'] = labelEncoder.fit_transform(x_train_df['segmentation'])
x_train_df['province_name'] = labelEncoder.fit_transform(x_train_df['province_name'])

In [None]:
x_test_df['country_of_residence'] = labelEncoder.fit_transform(x_test_df['country_of_residence'])
x_test_df['gender'] = labelEncoder.fit_transform(x_test_df['gender'])
x_test_df['cust_relation_at_beg'] = labelEncoder.fit_transform(x_test_df['cust_relation_at_beg'])
x_test_df['residency_index'] = labelEncoder.fit_transform(x_test_df['residency_index'])
x_test_df['foreign_index'] = labelEncoder.fit_transform(x_test_df['foreign_index'])
x_test_df['decreased_index'] = labelEncoder.fit_transform(x_test_df['decreased_index'])
x_test_df['segmentation'] = labelEncoder.fit_transform(x_test_df['segmentation'])
x_test_df['province_name'] = labelEncoder.fit_transform(x_test_df['province_name'])

In [None]:
x_train_df['transactional_date'] = labelEncoder.fit_transform(x_train_df['transactional_date'])
x_train_df['dof_contract'] = labelEncoder.fit_transform(x_train_df['dof_contract'])
x_train_df['channel_used'] = labelEncoder.fit_transform(x_train_df['channel_used'])

In [None]:
x_test_df['transactional_date'] = labelEncoder.fit_transform(x_test_df['transactional_date'])
x_test_df['dof_contract'] = labelEncoder.fit_transform(x_test_df['dof_contract'])
x_test_df['channel_used'] = labelEncoder.fit_transform(x_test_df['channel_used'])

In [None]:
x_train_df['seniority'] = x_train_df['seniority'].apply(str)
x_train_df['seniority'] = x_train_df['seniority'].str.replace(" ", "")
x_train_df['seniority'] = x_train_df['seniority'].apply(int)
train_seniority_df = x_train_df.loc[:, 'seniority']

In [None]:
x_test_df['seniority'] = x_test_df['seniority'].apply(str)
x_test_df['seniority'] = x_test_df['seniority'].str.replace(" ", "")
x_test_df['seniority'] = x_test_df['seniority'].apply(int)
test_seniority_df = x_test_df.loc[:, 'seniority']

In [None]:
train_seniority_df[train_seniority_df == -999999] = int(train_seniority_df.mean())
test_seniority_df[test_seniority_df == -999999] = int(test_seniority_df.mean())

In [None]:
x_train_df['target'] = x_train_df['target'].astype(int)

In [None]:
del train_age_df
del test_age_df
del train_seniority_df
del test_seniority_df

In [None]:
x_train_df['age'][(x_train_df['age'] >= 0) & (x_train_df['age'] <= 10)] = 0
x_train_df['age'][(x_train_df['age'] >= 11) & (x_train_df['age'] <= 20)] = 1
x_train_df['age'][(x_train_df['age'] >= 21) & (x_train_df['age'] <= 30)] = 2
x_train_df['age'][(x_train_df['age'] >= 31) & (x_train_df['age'] <= 40)] = 3
x_train_df['age'][(x_train_df['age'] >= 41) & (x_train_df['age'] <= 50)] = 4
x_train_df['age'][(x_train_df['age'] >= 51) & (x_train_df['age'] <= 60)] = 5
x_train_df['age'][(x_train_df['age'] >= 61) & (x_train_df['age'] <= 70)] = 6
x_train_df['age'][(x_train_df['age'] >= 71) & (x_train_df['age'] <= 80)] = 7
x_train_df['age'][(x_train_df['age'] >= 81) & (x_train_df['age'] <= 90)] = 8
x_train_df['age'][(x_train_df['age'] >= 91) & (x_train_df['age'] <= 100)] = 9
x_train_df['age'][(x_train_df['age'] >= 101) & (x_train_df['age'] <= 200)] = 10

In [None]:
x_train_df['seniority'][(x_train_df['seniority'] >= 0) & (x_train_df['seniority'] <= 20)] = 0
x_train_df['seniority'][(x_train_df['seniority'] >= 21) & (x_train_df['seniority'] <= 40)] = 1
x_train_df['seniority'][(x_train_df['seniority'] >= 41) & (x_train_df['seniority'] <= 60)] = 3
x_train_df['seniority'][(x_train_df['seniority'] >= 61) & (x_train_df['seniority'] <= 80)] = 4
x_train_df['seniority'][(x_train_df['seniority'] >= 81) & (x_train_df['seniority'] <= 100)] = 5
x_train_df['seniority'][(x_train_df['seniority'] >= 101) & (x_train_df['seniority'] <= 120)] = 6
x_train_df['seniority'][(x_train_df['seniority'] >= 121) & (x_train_df['seniority'] <= 140)] = 7
x_train_df['seniority'][(x_train_df['seniority'] >= 141) & (x_train_df['seniority'] <= 160)] = 8
x_train_df['seniority'][(x_train_df['seniority'] >= 161) & (x_train_df['seniority'] <= 180)] = 9
x_train_df['seniority'][(x_train_df['seniority'] >= 181) & (x_train_df['seniority'] <= 200)] = 10
x_train_df['seniority'][(x_train_df['seniority'] >= 201) & (x_train_df['seniority'] <= 220)] = 11
x_train_df['seniority'][(x_train_df['seniority'] >= 221) & (x_train_df['seniority'] <= 240)] = 12
x_train_df['seniority'][(x_train_df['seniority'] >= 241) & (x_train_df['seniority'] <= 260)] = 13

In [None]:
x_train_df['gross_income'][(x_train_df['gross_income'] >= 0) & (x_train_df['gross_income'] <= 19999)] = 0
x_train_df['gross_income'][(x_train_df['gross_income'] >= 20000) & (x_train_df['gross_income'] <= 39999)] = 1
x_train_df['gross_income'][(x_train_df['gross_income'] >= 40000) & (x_train_df['gross_income'] <= 59999)] = 2
x_train_df['gross_income'][(x_train_df['gross_income'] >= 60000) & (x_train_df['gross_income'] <= 79999)] = 3
x_train_df['gross_income'][(x_train_df['gross_income'] >= 80000) & (x_train_df['gross_income'] <= 99999)] = 4
x_train_df['gross_income'][(x_train_df['gross_income'] >= 100000) & (x_train_df['gross_income'] <= 119999)] = 5
x_train_df['gross_income'][(x_train_df['gross_income'] >= 120000) & (x_train_df['gross_income'] <= 139999)] = 6
x_train_df['gross_income'][x_train_df['gross_income'] >= 140000] = 7

In [None]:
x_train_df['gross_income'] = x_train_df['gross_income'].apply(int)

In [None]:
x_test_df['age'][(x_test_df['age'] >= 0) & (x_test_df['age'] <= 10)] = 0
x_test_df['age'][(x_test_df['age'] >= 11) & (x_test_df['age'] <= 20)] = 1
x_test_df['age'][(x_test_df['age'] >= 21) & (x_test_df['age'] <= 30)] = 2
x_test_df['age'][(x_test_df['age'] >= 31) & (x_test_df['age'] <= 40)] = 3
x_test_df['age'][(x_test_df['age'] >= 41) & (x_test_df['age'] <= 50)] = 4
x_test_df['age'][(x_test_df['age'] >= 51) & (x_test_df['age'] <= 60)] = 5
x_test_df['age'][(x_test_df['age'] >= 61) & (x_test_df['age'] <= 70)] = 6
x_test_df['age'][(x_test_df['age'] >= 71) & (x_test_df['age'] <= 80)] = 7
x_test_df['age'][(x_test_df['age'] >= 81) & (x_test_df['age'] <= 90)] = 8
x_test_df['age'][(x_test_df['age'] >= 91) & (x_test_df['age'] <= 100)] = 9
x_test_df['age'][(x_test_df['age'] >= 101) & (x_test_df['age'] <= 200)] = 10

In [None]:
x_test_df['seniority'][(x_test_df['seniority'] >= 0) & (x_test_df['seniority'] <= 20)] = 0
x_test_df['seniority'][(x_test_df['seniority'] >= 21) & (x_test_df['seniority'] <= 40)] = 1
x_test_df['seniority'][(x_test_df['seniority'] >= 41) & (x_test_df['seniority'] <= 60)] = 3
x_test_df['seniority'][(x_test_df['seniority'] >= 61) & (x_test_df['seniority'] <= 80)] = 4
x_test_df['seniority'][(x_test_df['seniority'] >= 81) & (x_test_df['seniority'] <= 100)] = 5
x_test_df['seniority'][(x_test_df['seniority'] >= 101) & (x_test_df['seniority'] <= 120)] = 6
x_test_df['seniority'][(x_test_df['seniority'] >= 121) & (x_test_df['seniority'] <= 140)] = 7
x_test_df['seniority'][(x_test_df['seniority'] >= 141) & (x_test_df['seniority'] <= 160)] = 8
x_test_df['seniority'][(x_test_df['seniority'] >= 161) & (x_test_df['seniority'] <= 180)] = 9
x_test_df['seniority'][(x_test_df['seniority'] >= 181) & (x_test_df['seniority'] <= 200)] = 10
x_test_df['seniority'][(x_test_df['seniority'] >= 201) & (x_test_df['seniority'] <= 220)] = 11
x_test_df['seniority'][(x_test_df['seniority'] >= 221) & (x_test_df['seniority'] <= 240)] = 12
x_test_df['seniority'][(x_test_df['seniority'] >= 241) & (x_test_df['seniority'] <= 260)] = 13

In [None]:
x_test_df['gross_income'].unique()

In [None]:
x_test_df['gross_income'] = x_test_df['gross_income'].str.lstrip()

In [None]:
x_test_df['gross_income'].unique()

In [None]:
x_test_df.info()

In [None]:
x_test_df.drop(x_test_df[x_test_df['gross_income'] == 'NA'].index, inplace=True)

In [None]:
x_test_df['gross_income'].unique()

In [None]:
x_test_df['gross_income'] = x_test_df['gross_income'].apply(float)

In [None]:
x_test_df['gross_income'][(x_test_df['gross_income'] >= 0) & (x_test_df['gross_income'] <= 19999)] = 0
x_test_df['gross_income'][(x_test_df['gross_income'] >= 20000) & (x_test_df['gross_income'] <= 39999)] = 1
x_test_df['gross_income'][(x_test_df['gross_income'] >= 40000) & (x_test_df['gross_income'] <= 59999)] = 2
x_test_df['gross_income'][(x_test_df['gross_income'] >= 60000) & (x_test_df['gross_income'] <= 79999)] = 3
x_test_df['gross_income'][(x_test_df['gross_income'] >= 80000) & (x_test_df['gross_income'] <= 99999)] = 4
x_test_df['gross_income'][(x_test_df['gross_income'] >= 100000) & (x_test_df['gross_income'] <= 119999)] = 5
x_test_df['gross_income'][(x_test_df['gross_income'] >= 120000) & (x_test_df['gross_income'] <= 139999)] = 6
x_test_df['gross_income'][x_test_df['gross_income'] >= 140000] = 7

In [None]:
x_train_df['gross_income'] = x_train_df['gross_income'].apply(int)

In [None]:
x_train_df['gross_income'].unique()

In [None]:
x_train_df['gross_income'][x_train_df['gross_income'] == 39999] = 1
x_train_df['gross_income'][x_train_df['gross_income'] == 59999] = 2
x_train_df['gross_income'][x_train_df['gross_income'] == 79999] = 3
x_train_df['gross_income'][x_train_df['gross_income'] == 99999] = 4
x_train_df['gross_income'][x_train_df['gross_income'] == 119999] = 5

In [None]:
x_train_df['gross_income'].unique()

In [None]:
x_test_df['gross_income'] = x_test_df['gross_income'].apply(int)

In [None]:
x_test_df['gross_income'].unique()

In [None]:
x_test_df['gross_income'][x_test_df['gross_income'] == 39999] = 1
x_test_df['gross_income'][x_test_df['gross_income'] == 59999] = 2
x_test_df['gross_income'][x_test_df['gross_income'] == 79999] = 3
x_test_df['gross_income'][x_test_df['gross_income'] == 99999] = 4
x_test_df['gross_income'][x_test_df['gross_income'] == 119999] = 5

In [None]:
x_test_df['gross_income'].unique()

In [None]:
x_train_df['customer_type'] = labelEncoder.fit_transform(x_train_df['customer_type'])
x_test_df['customer_type'] = labelEncoder.fit_transform(x_test_df['customer_type'])

In [None]:
train_customer_code_df = x_train_df.loc[:, 'customer_code']
test_customer_code_df = x_test_df.loc[:, 'customer_code']

In [None]:
x_train_df.info()

In [None]:
x_test_df.info()

# Splitting DataFrame

In [None]:
x_train = x_train_df.drop(['target', 'customer_code', 'channel_used', 'province_code', 'province_name'], axis=1)
x_test = x_test_df.drop(['customer_code', 'channel_used', 'province_code', 'province_name'], axis=1)

In [None]:
y_train = x_train_df.loc[:, 'target'].values

In [None]:
plt.scatter(x_train['employee_index'], np.arange(len(x_train)))

In [None]:
plt.scatter(y_train, np.arange(len(y_train)))

In [None]:
x_train = x_train.values
x_test = x_test.values

# Random Forest v.0.0.1

In [None]:
def rfc_model(x_data, y_data, max_depth=6, max_features="auto", criterion='gini', max_leaf_nodes=None, random_state=0, 
              n_jobs=None, accuracy_feature_name=""):
  clf_model = RandomForestClassifier(max_depth=max_depth, max_features=max_features, 
                               criterion=criterion, max_leaf_nodes=max_leaf_nodes, 
                               random_state=random_state, n_jobs=n_jobs)
  clf_model.fit(x_data, y_data)
  y_pred = clf_model.predict(x_data)
  print("Accuracy:", accuracy_feature_name, '-->', accuracy_score(y_data, y_pred))
  print(accuracy_feature_name, '-->', classification_report(y_data, y_pred))

In [None]:
rfc_model(x_train, y_train, max_depth=7, random_state=8, criterion='entropy', accuracy_feature_name="Max Depth = " + str(7) + ", Random State = " + str(8))

# KNN v.0.0.1

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn_model = KNeighborsClassifier(n_neighbors=100, algorithm='ball_tree', p=1)

In [None]:
knn_model.fit(x_train, y_train)

In [None]:
y_pred = knn_model.predict(x_train)

In [None]:
print("Accuracy:", accuracy_score(y_train, y_pred))
print(classification_report(y_train, y_pred))

In [None]:
out_df = knn_model.kneighbors(x_train, return_distance=False)

In [None]:
out_df

# Model - V.0.0.2 Training -- Filter New Products

In [None]:
import xgboost as xgb

In [None]:
x_train = x_train_df.drop(['target', 'customer_code', 'channel_used', 'province_code', 'products', 'province_name'], axis=1).values

param = {}
param['objective'] = 'multi:softprob'
param['eta'] = 0.8
param['max_depth'] = 6
param['silent'] = 1
param['num_class'] = 22
param['eval_metric'] = "mlogloss"
param['min_child_weight'] = 12
param['subsample'] = 0.85
param['colsample_bytree'] = 0.9
param['seed'] = 10
num_rounds = 20
 
plst = list(param.items())
xgtrain = xgb.DMatrix(x_train, label=y_train)
model = xgb.train(plst, xgtrain, num_rounds)

# Model - V.0.0.2 Testing -- Filter New Products

In [None]:
xgtest = xgb.DMatrix(x_test)
preds = model.predict(xgtest)

In [None]:
len(preds[0, :])

In [None]:
preds.shape

In [None]:
preds[0, :]

In [None]:
sorted(preds[0, :], reverse=True)

In [None]:
train_customer_code = train_customer_code_df.values

In [None]:
len(train_customer_code)

In [None]:
out_df = {}

for i in range(len(preds)):
  pred_products = dict()
  for j in range(len(products)):
    pred_products[products_label[j]] = preds[i, products[j]]
  # print(list({k: pred_products[k] for k in sorted(pred_products, key=pred_products.get, reverse=True)}.keys())[:8])
  out_df[train_customer_code[i]] = list({k: pred_products[k] for k in sorted(pred_products, key=pred_products.get, reverse=True)}.keys())[:8]

In [None]:
out_df