# Import libraries

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from joblib import dump

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from  sklearn.pipeline  import  Pipeline
from  sklearn  import  set_config
set_config(display='diagram')

In [7]:
from sklearn.neural_network import MLPClassifier
from sklearn.multioutput import MultiOutputClassifier

# Custom functions

In [8]:
def dataset_parameters(df,target):
    df_features = df.drop(columns=target)
    data_type_numerical = ['int64', 'float64']
    data_type_object = ['object', 'bool', 'category']
    data_type_date = ['datetime64']
    categorical_features  = list(filter(lambda x: df_features[x].dtype in data_type_object, df_features.columns))
    numerical_features = list(filter(lambda x: df_features[x].dtype in data_type_numerical, df_features.columns))
    date_features = list(filter(lambda x: df_features[x].dtype in data_type_date, df_features.columns))
    all_features  = numerical_features  + categorical_features + date_features 
    return categorical_features, numerical_features, date_features, all_features,target 

# Loading and set data

In [9]:
with open('reuters_df.pkl', 'rb') as f:
    df = pickle.load(f)

In [10]:
df

Unnamed: 0,feature1,feature2,feature3,feature4,feature5,feature6,feature7,feature8,feature9,feature10,...,feature241,feature242,feature243,label1,label2,label3,label4,label5,label6,label7
0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,False,False,False,True,False,False,False
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,False,False,False,True,False,False,True
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,False,True,False,False,False,False,False
3,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,False,False,True,False,False,False,False
4,1.0,0.0,1.0,0.0,0.0,0.0,7.0,5.0,2.0,0.0,...,0.0,0.0,0.0,False,False,True,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,False,False,False,False,True,False,False
1996,1.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,3.0,1.0,...,0.0,0.0,0.0,False,True,False,False,False,False,False
1997,1.0,1.0,1.0,1.0,4.0,4.0,0.0,0.0,4.0,4.0,...,0.0,0.0,0.0,True,False,False,False,False,False,False
1998,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,False,False,False,True,False,False,False


In [11]:
labels = ['label1', 'label2', 'label3', 'label4', 'label5', 'label6', 'label7']

In [12]:
categorical_features, numerical_features, date_features, all_features, target = dataset_parameters(df,labels)

In [13]:
X = df[all_features]

In [14]:
y = df[target]

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=76) 

# Setup pipeline, train and test with best hyperparameters

In [16]:
categorical_transformer = Pipeline(steps=[
    ('one' , OneHotEncoder())
])

In [17]:
numerical_transformer = Pipeline(steps=[
    ('scale', StandardScaler()),
    ('normalize', Normalizer())
])

In [18]:
preprocessor = ColumnTransformer(transformers=[
    ('num', numerical_transformer, numerical_features),
    ('cat', categorical_transformer, categorical_features)
])

In [21]:
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', MultiOutputClassifier(MLPClassifier(alpha=0.01, batch_size=60, beta_1=0.6,
              hidden_layer_sizes=(100, 100, 100), max_iter=50, momentum=0.0,
              power_t=1, shuffle=False, tol=0.001, validation_fraction=0.3)))
])

In [22]:
pipeline.fit(X_train,  y_train)



In [23]:
pipeline.score(X_test,y_test)

0.66

# Save pipeline to disk

In [24]:
dump(pipeline, 'pipeline.joblib')

['pipeline.joblib']