In [30]:
# Data manipulation
from seaborn import load_dataset
import numpy as np
import pandas as pd
pd.options.display.precision = 4
pd.options.mode.chained_assignment = None  

# Machine learning pipeline
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn import set_config
set_config(display="diagram")

# Load data
columns = ['alive', 'class', 'embarked', 'who', 'alone', 'adult_male']
df = load_dataset('titanic').drop(columns=columns)
df['deck'] = df['deck'].astype('object')
print(df.shape)
df.head()

(891, 9)


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,deck,embark_town
0,0,3,male,22.0,1,0,7.25,,Southampton
1,1,1,female,38.0,1,0,71.2833,C,Cherbourg
2,1,3,female,26.0,0,0,7.925,,Southampton
3,1,1,female,35.0,1,0,53.1,C,Southampton
4,0,3,male,35.0,0,0,8.05,,Southampton


In [31]:
SEED = 42
TARGET = 'survived'
FEATURES = df.columns.drop(TARGET)

NUMERICAL = df[FEATURES].select_dtypes('number').columns
print(f"Numerical features: {', '.join(NUMERICAL)}")

CATEGORICAL = pd.Index(np.setdiff1d(FEATURES, NUMERICAL))
print(f"Categorical features: {', '.join(CATEGORICAL)}")

Numerical features: pclass, age, sibsp, parch, fare
Categorical features: deck, embark_town, sex


### 1. –ù–µ–ø—Ä–∞–≤–∏–ª—å–Ω—ã–π –ø–æ–¥—Ö–æ–¥

In [32]:
# Impute numerical variables with mean
df_num_imputed = df[NUMERICAL].fillna(df[NUMERICAL].mean())
# Normalise numerical variables
df_num_scaled = df_num_imputed.subtract(df_num_imputed.min(), axis=1)\
                              .divide(df_num_imputed.max()-df_num_imputed.min(), axis=1)

# Impute categorical variables with a constant
df_cat_imputed = df[CATEGORICAL].fillna('missing')
# One-hot-encode categorical variables
df_cat_encoded = pd.get_dummies(df_cat_imputed, drop_first=True)

# Merge data
df_preprocessed = df_num_scaled.join(df_cat_encoded)
df_preprocessed.head()

Unnamed: 0,pclass,age,sibsp,parch,fare,deck_B,deck_C,deck_D,deck_E,deck_F,deck_G,deck_missing,embark_town_Queenstown,embark_town_Southampton,embark_town_missing,sex_male
0,1.0,0.2712,0.125,0.0,0.0142,False,False,False,False,False,False,True,False,True,False,True
1,0.0,0.4722,0.125,0.0,0.1391,False,True,False,False,False,False,False,False,False,False,False
2,1.0,0.3214,0.0,0.0,0.0155,False,False,False,False,False,False,True,False,True,False,False
3,0.0,0.4345,0.125,0.0,0.1036,False,True,False,False,False,False,False,False,True,False,False
4,1.0,0.4345,0.0,0.0,0.0157,False,False,False,False,False,False,True,False,True,False,True


–ú—ã –≤–º–µ–Ω–∏–ª–∏ –Ω–µ–¥–æ—Å—Ç–∞—é—â–∏–µ –∑–Ω–∞—á–µ–Ω–∏—è, –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–ª–∏ —á–∏—Å–ª–æ–≤—ã–µ –ø–µ—Ä–µ–º–µ–Ω–Ω—ã–µ –æ—Ç 0 –¥–æ 1 –∏ –∑–∞–∫–æ–¥–∏—Ä–æ–≤–∞–ª–∏ –∫–∞—Ç–µ–≥–æ—Ä–∏–∞–ª—å–Ω—ã–µ –ø–µ—Ä–µ–º–µ–Ω–Ω—ã–µ –º–µ—Ç–æ–¥–æ–º –≥–æ—Ä—è—á–µ–≥–æ –∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏—è. –ü–æ—Å–ª–µ –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ–π –æ–±—Ä–∞–±–æ—Ç–∫–∏ –¥–∞–Ω–Ω—ã–µ —Å–µ–∫—Ü–∏–æ–Ω–∏—Ä—É—é—Ç—Å—è –∏ –ø–æ–¥–≥–æ–Ω—è–µ—Ç—Å—è –º–æ–¥–µ–ª—å:

In [33]:
# Partition data
X_train, X_test, y_train, y_test = train_test_split(df_preprocessed, df[TARGET], 
                                                    test_size=.2, random_state=SEED, 
                                                    stratify=df[TARGET])

# Train a model
model = LogisticRegression()
model.fit(X_train, y_train)

–•–æ—Ä–æ—à–æ, –¥–∞–≤–∞–π—Ç–µ –ø—Ä–æ–∞–Ω–∞–ª–∏–∑–∏—Ä—É–µ–º, —á—Ç–æ –Ω–µ —Ç–∞–∫ —Å —ç—Ç–∏–º –ø–æ–¥—Ö–æ–¥–æ–º:
   
‚óºÔ∏è –í–º–µ–Ω–µ–Ω–∏–µ: —á–∏—Å–ª–æ–≤—ã–µ –ø–µ—Ä–µ–º–µ–Ω–Ω—ã–µ —Å–ª–µ–¥—É–µ—Ç –≤–º–µ–Ω—è—Ç—å —Å–æ —Å—Ä–µ–¥–Ω–∏–º –∑–Ω–∞—á–µ–Ω–∏–µ–º –æ–±—É—á–∞—é—â–∏—Ö –¥–∞–Ω–Ω—ã—Ö, –∞ –Ω–µ —Å–æ –≤—Å–µ–º–∏ –¥–∞–Ω–Ω—ã–º–∏.   
‚óºÔ∏è –ú–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–Ω–∏–µ: –º–∏–Ω–∏–º–∞–ª—å–Ω–æ–µ –∏ –º–∞–∫—Å–∏–º–∞–ª—å–Ω–æ–µ –∑–Ω–∞—á–µ–Ω–∏—è —Å–ª–µ–¥—É–µ—Ç —Ä–∞—Å—Å—á–∏—Ç—ã–≤–∞—Ç—å –Ω–∞ –æ—Å–Ω–æ–≤–µ –¥–∞–Ω–Ω—ã—Ö —Ç—Ä–µ–Ω–∏—Ä–æ–≤.    
   .
‚óºÔ∏è –ö–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–µ: –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ —Å–ª–µ–¥—É–µ—Ç –æ–ø—Ä–µ–¥–µ–ª—è—Ç—å –Ω–∞ –æ—Å–Ω–æ–≤–µ –æ–±—É—á–∞—é—â–∏—Ö –¥–∞–Ω–Ω—ã—Ö. –ö—Ä–æ–º–µ —Ç–æ–≥–æ, –¥–∞–∂–µ –µ—Å–ª–∏ –¥–∞–Ω–Ω—ã–µ —Å–µ–∫—Ü–∏–æ–Ω–∏—Ä–æ–≤–∞–Ω—ã –¥–æ –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ–π –æ–±—Ä–∞–±–æ—Ç–∫–∏, –≥–æ—Ä—è—á–µ–µ –∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–µ —Å –ø–æ–º–æ—â—å—é pd.get_dummies(X_train)–∏ pd.get_dummies(X_test)–º–æ–∂–µ—Ç –ø—Ä–∏–≤–µ—Å—Ç–∏ –∫ –ø—Ä–æ—Ç–∏–≤–æ—Ä–µ—á–∏–≤—ã–º –¥–∞–Ω–Ω—ã–º –æ–±—É—á–µ–Ω–∏—è –∏ —Ç–µ—Å—Ç–∏—Ä–æ–≤–∞–Ω–∏—è (—Ç. –µ. —Å—Ç–æ–ª–±—Ü—ã –º–æ–≥—É—Ç —Ä–∞–∑–ª–∏—á–∞—Ç—å—Å—è –≤ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–∏ –æ—Ç –∫–∞—Ç–µ–≥–æ—Ä–∏–π –≤ –æ–±–æ–∏—Ö –Ω–∞–±–æ—Ä–∞—Ö –¥–∞–Ω–Ω—ã—Ö). –ü–æ—ç—Ç–æ–º—É pd.get_dummies()–Ω–µ —Å–ª–µ–¥—É–µ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –≥–æ—Ä—è—á–µ–µ –∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–µ –ø—Ä–∏ –ø–æ–¥–≥–æ—Ç–æ–≤–∫–µ –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –º–æ–¥–µ–ª–∏   .

üí° –î–∞–Ω–Ω—ã–µ –∏—Å–ø—ã—Ç–∞–Ω–∏–π —Å–ª–µ–¥—É–µ—Ç —Å–æ—Ö—Ä–∞–Ω–∏—Ç—å –¥–æ –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ–π –æ–±—Ä–∞–±–æ—Ç–∫–∏. –õ—é–±–∞—è —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞, —Ç–∞–∫–∞—è –∫–∞–∫ —Å—Ä–µ–¥–Ω–µ–µ, –º–∏–Ω–∏–º–∞–ª—å–Ω–æ–µ –∏ –º–∞–∫—Å–∏–º–∞–ª—å–Ω–æ–µ –∑–Ω–∞—á–µ–Ω–∏–µ, –∏—Å–ø–æ–ª—å–∑—É–µ–º–∞—è –¥–ª—è –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ–π –æ–±—Ä–∞–±–æ—Ç–∫–∏, –¥–æ–ª–∂–Ω–∞ –±—ã—Ç—å –ø–æ–ª—É—á–µ–Ω–∞ –∏–∑ –æ–±—É—á–∞—é—â–∏—Ö –¥–∞–Ω–Ω—ã—Ö. –í –ø—Ä–æ—Ç–∏–≤–Ω–æ–º —Å–ª—É—á–∞–µ –≤–æ–∑–Ω–∏–∫–Ω–µ—Ç –ø—Ä–æ–±–ª–µ–º–∞ —É—Ç–µ—á–∫–∏ –¥–∞–Ω–Ω—ã—Ö.

–¢–µ–ø–µ—Ä—å –¥–∞–≤–∞–π—Ç–µ –æ—Ü–µ–Ω–∏–º –º–æ–¥–µ–ª—å. –ú—ã –±—É–¥–µ–º –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å ROC-AUC –¥–ª—è –æ—Ü–µ–Ω–∫–∏ –º–æ–¥–µ–ª–∏. –ú—ã —Å–æ–∑–¥–∞–¥–∏–º —Ñ—É–Ω–∫—Ü–∏—é, –∫–æ—Ç–æ—Ä–∞—è –≤—ã—á–∏—Å–ª—è–µ—Ç ROC-AUC, –ø–æ—Å–∫–æ–ª—å–∫—É –æ–Ω–∞ –±—É–¥–µ—Ç –ø–æ–ª–µ–∑–Ω–∞ –¥–ª—è –æ—Ü–µ–Ω–∫–∏ –ø–æ—Å–ª–µ–¥—É—é—â–∏—Ö –ø–æ–¥—Ö–æ–¥–æ–≤:

In [34]:
def calculate_roc_auc(model_pipe, X, y):
    """Calculate roc auc score. 
    
    Parameters:
    ===========
    model_pipe: sklearn model or pipeline
    X: features
    y: true target
    """
    y_proba = model_pipe.predict_proba(X)[:,1]
    return roc_auc_score(y, y_proba)
  
print(f"Train ROC-AUC: {calculate_roc_auc(model, X_train, y_train):.4f}")
print(f"Test ROC-AUC: {calculate_roc_auc(model, X_test, y_test):.4f}")

Train ROC-AUC: 0.8669
Test ROC-AUC: 0.8329


‚ùî2. –ü—Ä–∞–≤–∏–ª—å–Ω—ã–π –ø–æ–¥—Ö–æ–¥, –Ω–æ‚Ä¶
–°–Ω–∞—á–∞–ª–∞ –º—ã —Ä–∞–∑–¥–µ–ª–∏–º –¥–∞–Ω–Ω—ã–µ –∏ –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–µ–º –∏—Ö —Å –ø–æ–º–æ—â—å—é –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞—Ç–µ–ª–µ–π Scikit-learn, —á—Ç–æ–±—ã –ø—Ä–µ–¥–æ—Ç–≤—Ä–∞—Ç–∏—Ç—å —É—Ç–µ—á–∫—É –¥–∞–Ω–Ω—ã—Ö –∑–∞ —Å—á–µ—Ç –ø—Ä–∞–≤–∏–ª—å–Ω–æ–π –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ–π –æ–±—Ä–∞–±–æ—Ç–∫–∏:

In [35]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=TARGET), df[TARGET], 
                                                    test_size=.2, random_state=SEED, 
                                                    stratify=df[TARGET])
num_imputer = SimpleImputer(strategy='mean')
train_num_imputed = num_imputer.fit_transform(X_train[NUMERICAL])

scaler = MinMaxScaler()
train_num_scaled = scaler.fit_transform(train_num_imputed)

cat_imputer = SimpleImputer(strategy='constant', fill_value='missing')
train_cat_imputed = cat_imputer.fit_transform(X_train[CATEGORICAL])

encoder = OneHotEncoder(drop='first', handle_unknown='ignore', sparse=False)
train_cat_encoded = encoder.fit_transform(train_cat_imputed)

train_preprocessed = np.concatenate((train_num_scaled, train_cat_encoded), axis=1)

columns = np.append(NUMERICAL, encoder.get_feature_names_out(CATEGORICAL))
pd.DataFrame(train_preprocessed, columns=columns, index=X_train.index).head()



Unnamed: 0,pclass,age,sibsp,parch,fare,deck_B,deck_C,deck_D,deck_E,deck_F,deck_G,deck_missing,embark_town_Queenstown,embark_town_Southampton,embark_town_missing,sex_male
692,1.0,0.3693,0.0,0.0,0.1103,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
481,0.5,0.3693,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
527,0.0,0.3693,0.0,0.0,0.4329,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
855,1.0,0.2209,0.0,0.1667,0.0182,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
801,0.5,0.3843,0.125,0.1667,0.0512,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0


In [45]:
train_cat_encoded

array([[0., 0., 0., ..., 1., 0., 1.],
       [0., 0., 0., ..., 1., 0., 1.],
       [0., 1., 0., ..., 1., 0., 1.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 1., 0., 1.],
       [0., 0., 0., ..., 1., 0., 1.]])

–ü—Ä–µ–∫—Ä–∞—Å–Ω–æ, —Ç–µ–ø–µ—Ä—å –º—ã –º–æ–∂–µ–º –ø–æ–¥–æ–≥–Ω–∞—Ç—å –º–æ–¥–µ–ª—å:

In [16]:
model = LogisticRegression()
model.fit(train_preprocessed, y_train)

–ü–µ—Ä–µ–¥ –æ—Ü–µ–Ω–∫–æ–π –Ω–∞–º –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ —Ç–∞–∫–∏–º –∂–µ –æ–±—Ä–∞–∑–æ–º –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞—Ç—å —Ç–µ—Å—Ç–æ–≤—ã–π –Ω–∞–±–æ—Ä –¥–∞–Ω–Ω—ã—Ö:

In [17]:
test_num_imputed = num_imputer.transform(X_test[NUMERICAL])
test_num_scaled = scaler.transform(test_num_imputed)
test_cat_imputed = cat_imputer.transform(X_test[CATEGORICAL])
test_cat_encoded = encoder.transform(test_cat_imputed)
test_preprocessed = np.concatenate((test_num_scaled, test_cat_encoded), axis=1)

print(f"Train ROC-AUC: {calculate_roc_auc(model, train_preprocessed, y_train):.4f}")
print(f"Test ROC-AUC: {calculate_roc_auc(model, test_preprocessed, y_test):.4f}")

Train ROC-AUC: 0.8670
Test ROC-AUC: 0.8332


‚úîÔ∏è3. –≠–ª–µ–≥–∞–Ω—Ç–Ω—ã–π –ø–æ–¥—Ö–æ–¥ ‚Ññ1
–î–∞–≤–∞–π—Ç–µ —É–ø—Ä–æ—Å—Ç–∏–º –ø—Ä–µ–¥—ã–¥—É—â–∏–π –∫–æ–¥, –∏—Å–ø–æ–ª—å–∑—É—è Scikit-learn Pipeline–∏ ColumnTransformer. –ï—Å–ª–∏ –≤—ã —Å –Ω–∏–º–∏ –Ω–µ –∑–Ω–∞–∫–æ–º—ã, –≤ —ç—Ç–æ–º –ø–æ—Å—Ç–µ –æ–Ω–∏ –∫—Ä–∞—Ç–∫–æ –æ–ø–∏—Å–∞–Ω—ã.

In [18]:
numerical_pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', MinMaxScaler())
])

categorical_pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('encoder', OneHotEncoder(drop='first', handle_unknown='ignore', sparse=False))
])

preprocessors = ColumnTransformer(transformers=[
    ('num', numerical_pipe, NUMERICAL),
    ('cat', categorical_pipe, CATEGORICAL)
])

pipe = Pipeline([
    ('preprocessors', preprocessors),
    ('model', LogisticRegression())
])

pipe.fit(X_train, y_train)



–ö–æ–Ω–≤–µ–π–µ—Ä:
‚óºÔ∏è –†–∞–∑–±–∏–≤–∞–µ—Ç –≤—Ö–æ–¥–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ –Ω–∞ —á–∏—Å–ª–æ–≤—ã–µ –∏ –∫–∞—Ç–µ–≥–æ—Ä–∏–∞–ª—å–Ω—ã–µ –≥—Ä—É–ø–ø—ã
‚óºÔ∏è –ü—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ –æ–±—Ä–∞–±–∞—Ç—ã–≤–∞–µ—Ç –æ–±–µ –≥—Ä—É–ø–ø—ã –ø–∞—Ä–∞–ª–ª–µ–ª—å–Ω–æ
‚óºÔ∏è –û–±—ä–µ–¥–∏–Ω—è–µ—Ç –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ –∏–∑ –æ–±–µ–∏—Ö –≥—Ä—É–ø–ø
‚óºÔ∏è –ü–µ—Ä–µ–¥–∞–µ—Ç –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ –≤ –º–æ–¥–µ–ª—å

–ö–æ–≥–¥–∞ –Ω–µ–æ–±—Ä–∞–±–æ—Ç–∞–Ω–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ –ø–µ—Ä–µ–¥–∞—é—Ç—Å—è –≤ –æ–±—É—á–µ–Ω–Ω—ã–π –∫–æ–Ω–≤–µ–π–µ—Ä, –æ–Ω –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ –æ–±—Ä–∞–±–∞—Ç—ã–≤–∞–µ—Ç –∏ –¥–µ–ª–∞–µ—Ç –ø—Ä–æ–≥–Ω–æ–∑. –≠—Ç–æ –æ–∑–Ω–∞—á–∞–µ—Ç, —á—Ç–æ –Ω–∞–º –±–æ–ª—å—à–µ –Ω–µ –Ω—É–∂–Ω–æ —Ö—Ä–∞–Ω–∏—Ç—å –ø—Ä–æ–º–µ–∂—É—Ç–æ—á–Ω—ã–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –∫–∞–∫ –¥–ª—è –æ–±—É—á–∞—é—â–µ–≥–æ, —Ç–∞–∫ –∏ –¥–ª—è —Ç–µ—Å—Ç–æ–≤–æ–≥–æ –Ω–∞–±–æ—Ä–∞ –¥–∞–Ω–Ω—ã—Ö. –û—Ü–µ–Ω–∏—Ç—å –Ω–µ–≤–∏–¥–∏–º—ã–µ –¥–∞–Ω–Ω—ã–µ —Ç–∞–∫ –∂–µ –ø—Ä–æ—Å—Ç–æ, –∫–∞–∫ pipe.predict(). –≠—Ç–æ –æ—á–µ–Ω—å —ç–ª–µ–≥–∞–Ω—Ç–Ω–æ, –Ω–µ —Ç–∞–∫ –ª–∏? –¢–µ–ø–µ—Ä—å –æ—Ü–µ–Ω–∏–º –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–∏:

In [19]:
print(f"Train ROC-AUC: {calculate_roc_auc(pipe, X_train, y_train):.4f}")
print(f"Test ROC-AUC: {calculate_roc_auc(pipe, X_test, y_test):.4f}")

Train ROC-AUC: 0.8670
Test ROC-AUC: 0.8332


‚úîÔ∏è 4. –≠–ª–µ–≥–∞–Ω—Ç–Ω—ã–π –ø–æ–¥—Ö–æ–¥ ‚Ññ2
–ü—Ä–∏ —Ç–∞–∫–æ–º –ø–æ–¥—Ö–æ–¥–µ –º—ã —Å–æ–∑–¥–∞–¥–∏–º —Å–æ–±—Å—Ç–≤–µ–Ω–Ω—ã–µ –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞—Ç–µ–ª–∏ —Å –ø–æ–º–æ—â—å—é Scikit-learn. –ú—ã –Ω–∞–¥–µ–µ–º—Å—è, —á—Ç–æ –Ω–∞–±–ª—é–¥–µ–Ω–∏–µ –∑–∞ —Ç–µ–º, –∫–∞–∫ —Ç–µ –∂–µ —ç—Ç–∞–ø—ã –ø—Ä–µ–¥–≤–∞—Ä–∏—Ç–µ–ª—å–Ω–æ–π –æ–±—Ä–∞–±–æ—Ç–∫–∏, —Å –∫–æ—Ç–æ—Ä—ã–º–∏ –º—ã –æ–∑–Ω–∞–∫–æ–º–∏–ª–∏—Å—å, –ø–µ—Ä–µ–≤–æ–¥—è—Ç—Å—è –≤ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å—Å–∫–∏–µ –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞—Ç–µ–ª–∏, –ø–æ–º–æ–∂–µ—Ç –≤–∞–º –ø–æ–Ω—è—Ç—å –æ—Å–Ω–æ–≤–Ω—É—é –∏–¥–µ—é –∏—Ö —Å–æ–∑–¥–∞–Ω–∏—è. –ï—Å–ª–∏ –≤–∞—Å –∏–Ω—Ç–µ—Ä–µ—Å—É—é—Ç –ø—Ä–∏–º–µ—Ä—ã –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å—Å–∫–∏—Ö –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞—Ç–µ–ª–µ–π, –ø–æ—Å–µ—Ç–∏—Ç–µ —ç—Ç–æ—Ç —Ä–µ–ø–æ–∑–∏—Ç–æ—Ä–∏–π GitHub .

In [20]:
class Imputer(BaseEstimator, TransformerMixin):
    def __init__(self, features, method='constant', value='missing'):
        self.features = features
        self.method = method
        self.value = value
    
    def fit(self, X, y=None):
        if self.method=='mean':
            self.value = X[self.features].mean()
        return self
    
    def transform(self, X):
        X_transformed = X.copy()
        X_transformed[self.features] = X[self.features].fillna(self.value)
        return X_transformed
    
class Scaler(BaseEstimator, TransformerMixin):
    def __init__(self, features):
        self.features = features
    
    def fit(self, X, y=None):
        self.min = X[self.features].min()
        self.range = X[self.features].max()-self.min
        return self
    
    def transform(self, X):
        X_transformed = X.copy()
        X_transformed[self.features] = (X[self.features]-self.min)/self.range
        return X_transformed
  
class Encoder(BaseEstimator, TransformerMixin):
    def __init__(self, features, drop='first'):
        self.features = features
        self.drop = drop
    
    def fit(self, X, y=None):
        self.encoder = OneHotEncoder(sparse=False, drop=self.drop)
        self.encoder.fit(X[self.features])
        return self
    
    def transform(self, X):
        X_transformed = pd.concat([X.drop(columns=self.features).reset_index(drop=True), 
                                   pd.DataFrame(self.encoder.transform(X[self.features]), 
                                                columns=self.encoder.get_feature_names_out(self.features))],
                                  axis=1)
        return X_transformed
        
pipe = Pipeline([
    ('num_imputer', Imputer(NUMERICAL, method='mean')),
    ('scaler', Scaler(NUMERICAL)),
    ('cat_imputer', Imputer(CATEGORICAL)),
    ('encoder', Encoder(CATEGORICAL)),
    ('model', LogisticRegression())
])

pipe.fit(X_train, y_train)   



–í –æ—Ç–ª–∏—á–∏–µ –æ—Ç –ø—Ä–µ–¥—ã–¥—É—â–µ–≥–æ, —à–∞–≥–∏ –≤—ã–ø–æ–ª–Ω—è—é—Ç—Å—è –ø–æ—Å–ª–µ–¥–æ–≤–∞—Ç–µ–ª—å–Ω–æ –æ–¥–∏–Ω –∑–∞ –¥—Ä—É–≥–∏–º, –∫–∞–∂–¥—ã–π —à–∞–≥ –ø–µ—Ä–µ–¥–∞–µ—Ç –≤—ã—Ö–æ–¥–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ —Å–ª–µ–¥—É—é—â–µ–º—É —à–∞–≥—É –≤ –∫–∞—á–µ—Å—Ç–≤–µ –≤—Ö–æ–¥–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö. –ü—Ä–∏—à–ª–æ –≤—Ä–µ–º—è –æ—Ü–µ–Ω–∏—Ç—å –º–æ–¥–µ–ª—å:

In [21]:

print(f"Train ROC-AUC: {calculate_roc_auc(pipe, X_train, y_train):.4f}")
print(f"Test ROC-AUC: {calculate_roc_auc(pipe, X_test, y_test):.4f}")

Train ROC-AUC: 0.8670
Test ROC-AUC: 0.8332


–î–ª—è –¥–æ–±–∞–≤–ª–µ–Ω–∏—è –∫–ª–∞—Å—Å–∞, –∫–æ—Ç–æ—Ä—ã–π –±—É–¥–µ—Ç –ø–æ–¥–±–∏—Ä–∞—Ç—å –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–æ–π —Ä–µ–≥—Ä–µ—Å—Å–∏–∏, –≤—ã –º–æ–∂–µ—Ç–µ —Å–æ–∑–¥–∞—Ç—å –Ω–æ–≤—ã–π –∫–ª–∞—Å—Å, –Ω–∞–ø—Ä–∏–º–µ—Ä, LogisticRegressionTuner, –∫–æ—Ç–æ—Ä—ã–π –±—É–¥–µ—Ç –≤—ã–ø–æ–ª–Ω—è—Ç—å –ø–æ–∏—Å–∫ –æ–ø—Ç–∏–º–∞–ª—å–Ω—ã—Ö –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤ –¥–ª—è –º–æ–¥–µ–ª–∏. –ó–∞—Ç–µ–º —ç—Ç–æ—Ç –∫–ª–∞—Å—Å –º–æ–∂–µ—Ç –±—ã—Ç—å –≤–∫–ª—é—á–µ–Ω –≤ –≤–∞—à—É –∫–æ–Ω–≤–µ–π–µ—Ä–Ω—É—é –æ–±—Ä–∞–±–æ—Ç–∫—É.

In [22]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression

class LogisticRegressionTuner(BaseEstimator, TransformerMixin):
    def __init__(self, param_grid=None, cv=3):
        self.param_grid = param_grid
        self.cv = cv
    
    def fit(self, X, y=None):
        self.model = LogisticRegression()
        self.grid_search = GridSearchCV(self.model, self.param_grid, cv=self.cv)
        self.grid_search.fit(X, y)
        return self
    
    def transform(self, X):
        # LogisticRegressionTuner –Ω–µ –∏–∑–º–µ–Ω—è–µ—Ç –¥–∞–Ω–Ω—ã–µ, –ø–æ—ç—Ç–æ–º—É –ø—Ä–æ—Å—Ç–æ –≤–æ–∑–≤—Ä–∞—â–∞–µ–º X
        return X

# –û–ø—Ä–µ–¥–µ–ª–∏—Ç–µ —Å–µ—Ç–∫—É –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤, –∫–æ—Ç–æ—Ä—ã–µ –≤—ã —Ö–æ—Ç–∏—Ç–µ –æ–ø—Ç–∏–º–∏–∑–∏—Ä–æ–≤–∞—Ç—å –¥–ª—è LogisticRegression
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'penalty': ['l2']
}

# –î–æ–±–∞–≤—å—Ç–µ LogisticRegressionTuner –≤ –≤–∞—à –∫–æ–Ω–≤–µ–π–µ—Ä
pipe = Pipeline([
    ('num_imputer', Imputer(NUMERICAL, method='mean')),
    ('scaler', Scaler(NUMERICAL)),
    ('cat_imputer', Imputer(CATEGORICAL)),
    ('encoder', Encoder(CATEGORICAL)),
    ('logistic_regression_tuner', LogisticRegressionTuner(param_grid=param_grid, cv=3)),
    ('model', LogisticRegression())  # –î–æ–±–∞–≤—å—Ç–µ –æ–ø—Ç–∏–º–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—É—é –º–æ–¥–µ–ª—å
])

# –ó–∞–ø—É—Å—Ç–∏—Ç–µ –∫–æ–Ω–≤–µ–π–µ—Ä –Ω–∞ —Ç—Ä–µ–Ω–∏—Ä–æ–≤–æ—á–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö
pipe.fit(X_train, y_train)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [23]:
print(f"Train ROC-AUC: {calculate_roc_auc(pipe, X_train, y_train):.4f}")
print(f"Test ROC-AUC: {calculate_roc_auc(pipe, X_test, y_test):.4f}")

Train ROC-AUC: 0.8670
Test ROC-AUC: 0.8332


–î–ª—è —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∏ –Ω–µ–π—Ä–æ–Ω–Ω–æ–π —Å–µ—Ç–∏ —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º TensorFlow, –≤—ã –º–æ–∂–µ—Ç–µ —Å–æ–∑–¥–∞—Ç—å —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤—É—é—â–∏–π –∫–ª–∞—Å—Å, –∫–æ—Ç–æ—Ä—ã–π –±—É–¥–µ—Ç –≤—ã–ø–æ–ª–Ω—è—Ç—å –æ–±—É—á–µ–Ω–∏–µ –∏ –ø—Ä–∏–º–µ–Ω–µ–Ω–∏–µ –≤–∞—à–µ–π –Ω–µ–π—Ä–æ–Ω–Ω–æ–π —Å–µ—Ç–∏. –ó–∞—Ç–µ–º —ç—Ç–æ—Ç –∫–ª–∞—Å—Å –º–æ–∂–µ—Ç –±—ã—Ç—å –≤–∫–ª—é—á–µ–Ω –≤ –≤–∞—à –∫–æ–Ω–≤–µ–π–µ—Ä. –í–æ—Ç –ø—Ä–∏–º–µ—Ä –∫–æ–Ω–≤–µ–π–µ—Ä–∞ –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –Ω–µ–π—Ä–æ–Ω–Ω–æ–π —Å–µ—Ç–∏ —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º TensorFlow:

In [24]:
import tensorflow as tf
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin

class NeuralNetworkTrainer(BaseEstimator, TransformerMixin):
    def __init__(self, epochs=10, batch_size=32, verbose=1):
        self.epochs = epochs
        self.batch_size = batch_size
        self.verbose = verbose
        self.model = None
    
    def fit(self, X, y=None):
        input_shape = (X.shape[1],)  # –û–ø—Ä–µ–¥–µ–ª–∏—Ç–µ —Ñ–æ—Ä–º—É –≤—Ö–æ–¥–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö –Ω–∞ –æ—Å–Ω–æ–≤–µ X
        num_classes = len(np.unique(y))  # –ö–æ–ª–∏—á–µ—Å—Ç–≤–æ –∫–ª–∞—Å—Å–æ–≤ –≤ –∑–∞–¥–∞—á–µ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏–∏
        
        model = tf.keras.models.Sequential([
            tf.keras.layers.Dense(2048, activation='relu', input_shape=input_shape),
            tf.keras.layers.Dense(num_classes, activation='softmax')
        ])
        
        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        
        model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=self.verbose)
        self.model = model
        return self
    
    def transform(self, X):
        # NeuralNetworkTrainer –Ω–µ –∏–∑–º–µ–Ω—è–µ—Ç –¥–∞–Ω–Ω—ã–µ, –ø–æ—ç—Ç–æ–º—É –ø—Ä–æ—Å—Ç–æ –≤–æ–∑–≤—Ä–∞—â–∞–µ–º X
        return X

    def predict(self, X):
        # –ü—Ä–µ–¥—Å–∫–∞–∑–∞—Ç—å –º–µ—Ç–∫–∏ –∫–ª–∞—Å—Å–æ–≤ –¥–ª—è –Ω–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö X
        if self.model is None:
            raise ValueError("–ú–æ–¥–µ–ª—å –Ω–µ –æ–±—É—á–µ–Ω–∞")
        y_pred = self.model.predict(X)
        return np.argmax(y_pred, axis=1)
        
    def score(self, X, y=None):
        # –û—Ü–µ–Ω–∏—Ç–µ –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–∏ –Ω–∞ –¥–∞–Ω–Ω—ã—Ö X –∏ y
        if self.model is None:
            raise ValueError("–ú–æ–¥–µ–ª—å –Ω–µ –æ–±—É—á–µ–Ω–∞")
        y_pred = self.model.predict(X)
        accuracy = accuracy_score(y, np.argmax(y_pred, axis=1))
        return accuracy

# –î–æ–±–∞–≤—å—Ç–µ NeuralNetworkTrainer –≤ –≤–∞—à –∫–æ–Ω–≤–µ–π–µ—Ä
pipe = Pipeline([
    ('num_imputer', Imputer(NUMERICAL, method='mean')),
    ('scaler', Scaler(NUMERICAL)),
    ('cat_imputer', Imputer(CATEGORICAL)),
    ('encoder', Encoder(CATEGORICAL)),
    ('neural_network_trainer', NeuralNetworkTrainer()),
])

# –ó–∞–ø—É—Å—Ç–∏—Ç–µ –∫–æ–Ω–≤–µ–π–µ—Ä –Ω–∞ —Ç—Ä–µ–Ω–∏—Ä–æ–≤–æ—á–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö
pipe.fit(X_train, y_train)




2023-09-11 21:57:54.483716: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-11 21:57:54.703782: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-11 21:57:54.705549: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [25]:

# –°–¥–µ–ª–∞–π—Ç–µ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö
y_pred = pipe.predict(X_test)




In [26]:
from sklearn.metrics import accuracy_score

# –í—ã—á–∏—Å–ª–∏—Ç–µ –æ—Ü–µ–Ω–∫—É —Ç–æ—á–Ω–æ—Å—Ç–∏ (accuracy) –º–æ–¥–µ–ª–∏
accuracy = accuracy_score(y_test, y_pred)

# –í—ã–≤–µ–¥–∏—Ç–µ –æ—Ü–µ–Ω–∫—É –Ω–∞ —ç–∫—Ä–∞–Ω
print("Accuracy:", accuracy)


Accuracy: 0.7877094972067039


In [27]:
import tensorflow as tf
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

class NeuralNetworkTrainer(BaseEstimator, TransformerMixin):
    def __init__(self, epochs=10, batch_size=32, verbose=1):
        self.epochs = epochs
        self.batch_size = batch_size
        self.verbose = verbose
        self.model = None
    
    def fit(self, X, y=None):
        input_shape = (X.shape[1],)  # –û–ø—Ä–µ–¥–µ–ª–∏—Ç–µ —Ñ–æ—Ä–º—É –≤—Ö–æ–¥–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö –Ω–∞ –æ—Å–Ω–æ–≤–µ X
        num_classes = len(np.unique(y))  # –ö–æ–ª–∏—á–µ—Å—Ç–≤–æ –∫–ª–∞—Å—Å–æ–≤ –≤ –∑–∞–¥–∞—á–µ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏–∏
        
        model = tf.keras.models.Sequential([
            tf.keras.layers.Dense(2048, activation='relu', input_shape=input_shape),
            tf.keras.layers.Dense(num_classes, activation='softmax')
        ])
        
        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        
        model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=self.verbose)
        self.model = model
        return self
    
    def transform(self, X):
        # NeuralNetworkTrainer –Ω–µ –∏–∑–º–µ–Ω—è–µ—Ç –¥–∞–Ω–Ω—ã–µ, –ø–æ—ç—Ç–æ–º—É –ø—Ä–æ—Å—Ç–æ –≤–æ–∑–≤—Ä–∞—â–∞–µ–º X
        return X

    def predict(self, X):
        # –ü—Ä–µ–¥—Å–∫–∞–∑–∞—Ç—å –º–µ—Ç–∫–∏ –∫–ª–∞—Å—Å–æ–≤ –¥–ª—è –Ω–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö X
        if self.model is None:
            raise ValueError("–ú–æ–¥–µ–ª—å –Ω–µ –æ–±—É—á–µ–Ω–∞")
        y_pred = self.model.predict(X)
        return np.argmax(y_pred, axis=1)
        
    def score(self, X, y=None):
        # –û—Ü–µ–Ω–∏—Ç–µ –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–∏ –Ω–∞ –¥–∞–Ω–Ω—ã—Ö X –∏ y
        if self.model is None:
            raise ValueError("–ú–æ–¥–µ–ª—å –Ω–µ –æ–±—É—á–µ–Ω–∞")
        y_pred = self.model.predict(X)
        accuracy = accuracy_score(y, np.argmax(y_pred, axis=1))
        return accuracy

# –û—Å—Ç–∞–ª—å–Ω–æ–π –∫–æ–¥ –æ—Å—Ç–∞–µ—Ç—Å—è –±–µ–∑ –∏–∑–º–µ–Ω–µ–Ω–∏–π

# –°–æ–∑–¥–∞–π—Ç–µ —ç–∫–∑–µ–º–ø–ª—è—Ä –Ω–µ–π—Ä–æ–Ω–Ω–æ–π —Å–µ—Ç–∏
neural_network = NeuralNetworkTrainer()

# –û–ø—Ä–µ–¥–µ–ª–∏—Ç–µ —Å–µ—Ç–∫—É –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤ –¥–ª—è –Ω–µ–π—Ä–æ–Ω–Ω–æ–π —Å–µ—Ç–∏
param_grid = {
    'neural_network__epochs': [10, 20, 30],  # –†–∞–∑–Ω—ã–µ –∑–Ω–∞—á–µ–Ω–∏—è —á–∏—Å–ª–∞ —ç–ø–æ—Ö
    'neural_network__batch_size': [32, 64, 128],  # –†–∞–∑–Ω—ã–µ –∑–Ω–∞—á–µ–Ω–∏—è —Ä–∞–∑–º–µ—Ä–∞ –ø–∞–∫–µ—Ç–∞
    'neural_network__verbose': [0, 1],  # –†–∞–∑–Ω—ã–µ —É—Ä–æ–≤–Ω–∏ –≤—ã–≤–æ–¥–∞
}

# –°–æ–∑–¥–∞–π—Ç–µ –∫–æ–Ω–≤–µ–π–µ—Ä —Å –Ω–µ–π—Ä–æ–Ω–Ω–æ–π —Å–µ—Ç—å—é
pipeline = Pipeline([
    ('num_imputer', Imputer(NUMERICAL, method='mean')),
    ('scaler', Scaler(NUMERICAL)),
    ('cat_imputer', Imputer(CATEGORICAL)),
    ('encoder', Encoder(CATEGORICAL)),
    ('neural_network', neural_network),
])

# –°–æ–∑–¥–∞–π—Ç–µ GridSearchCV –æ–±—ä–µ–∫—Ç —Å –∫–æ–Ω–≤–µ–π–µ—Ä–æ–º –∏ —Å–µ—Ç–∫–æ–π –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤
grid_search = GridSearchCV(pipeline, param_grid, cv=3, n_jobs=-1)

# –ó–∞–ø—É—Å—Ç–∏—Ç–µ –ø–æ–∏—Å–∫ –ø–æ —Å–µ—Ç–∫–µ –Ω–∞ —Ç—Ä–µ–Ω–∏—Ä–æ–≤–æ—á–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö
grid_search.fit(X_train, y_train)

# –ü–æ–ª—É—á–∏—Ç–µ –Ω–∞–∏–ª—É—á—à–∏–µ –ø–∞—Ä–∞–º–µ—Ç—Ä—ã
best_params = grid_search.best_params_
print("–ù–∞–∏–ª—É—á—à–∏–µ –ø–∞—Ä–∞–º–µ—Ç—Ä—ã:", best_params)


2023-09-11 21:58:12.067288: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-11 21:58:12.076313: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-11 21:58:12.142327: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-11 21:58:12.143524: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-09-11 21:58:12.155534: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-11 21:58:12.156272: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instru

Epoch 1/10




Epoch 1/10




Epoch 1/10
Epoch 1/20
Epoch 1/20




Epoch 1/20
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 2/10
Epoch 6/10
Epoch 3/10
Epoch 7/10
Epoch 4/10
Epoch 8/10
Epoch 5/10
Epoch 9/10
 1/15 [=>............................] - ETA: 0s - loss: 0.4212 - accuracy: 0.8750Epoch 6/10
 1/15 [=>............................] - ETA: 0s - loss: 0.4736 - accuracy: 0.8125Epoch 4/10


Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

 1/15 [=>............................] - ETA: 0s - loss: 0.4597 - accuracy: 0.8125Epoch 7/10
 1/15 [=>............................] - ETA: 30s - loss: 0.6783 - accuracy: 0.5938Epoch 5/10
Epoch 3/20
Epoch 8/10
Epoch 4/20
Epoch 7/10
Epoch 9/10
Epoch 5/20
 1/15 [=>............................] - ETA: 0s - loss: 0.4259 - accuracy: 0.8438Epoch 3/20
Epoch 10/10

Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 6/20
Epoch 4/20
Epoch 4/20
Epoch 7/20
Epoch 5/20
 1/15 [=>............................] - ETA: 0s - loss: 0.4675 - accuracy: 0.7812Epoch 10/10
Epoch 8/20
Epoch 6/20
Epoch 1/30
Epoch 9/20
 1/15 [=>............................] - ETA: 35s - loss: 0.6885 - accuracy: 0.5625Epoch 6/20
Epoch 7/20
Epoch 10/20
Epoch 7/20
Epoch 2/30
Epoch 8/20
Epoch 11/20
Epoch 3/30
 1/15 [=>............................] - ETA: 0s - loss: 0.4219 - accuracy: 0.8125Epoch 8/20
 1/15 [=>............................] - ETA: 0s - loss: 0.3157 - accuracy: 0.8750Epoch 4/30
Epoch 12/20
Epoch 10/20
Epoch 9/20
 1/15 [=>............................] - ETA: 0s - loss: 0.4411 - accuracy: 0.8125Epoch 13/20



Epoch 11/20
 1/15 [=>............................] - ETA: 0s - loss: 0.4407 - accuracy: 0.9062Epoch 10/20
Epoch 6/30
Epoch 14/20
 1/15 [=>............................] - ETA: 0s - loss: 0.5402 - accuracy: 0.7812Epoch 7/30
Epoch 11/20
Epoch 15/20
Epoch 13/20
Epoch 8/30
Epoch 16/20

Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 9/30
Epoch 14/20
Epoch 17/20
 1/15 [=>............................] - ETA: 0s - loss: 0.5273 - accuracy: 0.7812Epoch 13/20
Epoch 15/20
 1/15 [=>............................] - ETA: 0s - loss: 0.5171 - accuracy: 0.7188Epoch 14/20
Epoch 18/20
Epoch 16/20
Epoch 15/20
Epoch 17/20
 1/15 [=>............................] - ETA: 0s - loss: 0.5974 - accuracy: 0.7188Epoch 19/20
Epoch 12/30



Epoch 20/20
 1/15 [=>............................] - ETA: 0s - loss: 0.3116 - accuracy: 0.9062Epoch 16/20
Epoch 18/20
Epoch 17/20
 1/15 [=>............................] - ETA: 0s - loss: 0.2660 - accuracy: 0.9375

Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 14/30
Epoch 20/20
Epoch 18/20
Epoch 15/30
Epoch 2/30
Epoch 20/20
Epoch 16/30




Epoch 3/30
Epoch 17/30
Epoch 4/30
Epoch 18/30
Epoch 5/30
Epoch 6/30
Epoch 19/30
Epoch 7/30
Epoch 20/30
Epoch 8/30
Epoch 9/30
 1/15 [=>............................] - ETA: 0s - loss: 0.3409 - accuracy: 0.9062



Epoch 21/30
Epoch 10/30
Epoch 22/30
Epoch 11/30
 1/15 [=>............................] - ETA: 0s - loss: 0.4892 - accuracy: 0.7812



Epoch 12/30
Epoch 13/30
Epoch 24/30
 1/15 [=>............................] - ETA: 0s - loss: 0.3559 - accuracy: 0.8750

Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 14/30
Epoch 25/30
Epoch 3/30
1/8 [==>...........................] - ETA: 6sEpoch 15/30
 1/15 [=>............................] - ETA: 0s - loss: 0.5217 - accuracy: 0.7500Epoch 1/10
Epoch 26/30
Epoch 16/30
 1/15 [=>............................] - ETA: 0s - loss: 0.3613 - accuracy: 0.8438Epoch 4/30
Epoch 27/30
Epoch 17/30




Epoch 28/30
Epoch 5/30
Epoch 18/30
Epoch 19/30
Epoch 6/30
Epoch 30/30
Epoch 20/30
Epoch 7/30
Epoch 21/30
Epoch 2/10
Epoch 8/30




Epoch 22/30
Epoch 3/10
Epoch 4/10
Epoch 9/30
Epoch 23/30
Epoch 5/10
Epoch 6/10
Epoch 24/30
Epoch 7/10
Epoch 10/30
Epoch 8/10
1/8 [==>...........................] - ETA: 1sEpoch 1/20 0.3651 - accuracy: 0.8490
Epoch 11/30
Epoch 9/10
Epoch 26/30
Epoch 10/10
Epoch 12/30
Epoch 13/30


Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 2/10
Epoch 14/30
Epoch 28/30
1/8 [==>...........................] - ETA: 0s - loss: 0.5111 - accuracy: 0.7812Epoch 15/30
Epoch 3/10
 1/15 [=>............................] - ETA: 0s - loss: 0.3681 - accuracy: 0.8750Epoch 1/20
Epoch 29/30
Epoch 4/10
Epoch 4/10
Epoch 16/30



Epoch 30/30
Epoch 5/10
1/8 [==>...........................] - ETA: 0s - loss: 0.3455 - accuracy: 0.8281Epoch 5/10
Epoch 17/30
Epoch 6/10
Epoch 18/30
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 19/30
Epoch 8/10



Epoch 9/10
Epoch 20/30
Epoch 9/10
 1/15 [=>............................] - ETA: 0s - loss: 0.2646 - accuracy: 0.9062Epoch 10/10
Epoch 10/10
Epoch 21/30
Epoch 22/30
 1/15 [=>............................] - ETA: 0s - loss: 0.3075 - accuracy: 0.8438



Epoch 23/30

Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 24/30
Epoch 25/30
Epoch 1/30



Epoch 26/30
Epoch 1/30
Epoch 27/30
Epoch 2/20
Epoch 3/20
1/8 [==>...........................] - ETA: 0s - loss: 0.5397 - accuracy: 0.7031Epoch 3/20
Epoch 4/20
Epoch 29/30
Epoch 3/20
1/8 [==>...........................] - ETA: 0s - loss: 0.4490 - accuracy: 0.7812Epoch 5/20
Epoch 30/30
Epoch 6/20
1/8 [==>...........................] - ETA: 0s - loss: 0.4942 - accuracy: 0.7969Epoch 5/20
1/8 [==>...........................] - ETA: 0s - loss: 0.4651 - accuracy: 0.7969Epoch 4/20
Epoch 7/20
Epoch 6/20
Epoch 5/20

Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 7/20
1/8 [==>...........................] - ETA: 0s - loss: 0.4542 - accuracy: 0.7812Epoch 8/20
Epoch 6/20
Epoch 8/20
1/8 [==>...........................] - ETA: 0s - loss: 0.3302 - accuracy: 0.8281Epoch 9/20
Epoch 7/20
Epoch 10/20
Epoch 9/20
Epoch 8/20
Epoch 11/20
Epoch 12/20
Epoch 10/20
1/8 [==>...........................] - ETA: 0s - loss: 0.4325 - accuracy: 0.8438Epoch 9/20
Epoch 13/20
Epoch 10/20
Epoch 14/20
Epoch 11/20
1/8 [==>...........................] - ETA: 0s - loss: 0.3745 - accuracy: 0.8594Epoch 1/10
Epoch 11/20
Epoch 15/20
1/8 [==>...........................] - ETA: 0s - loss: 0.3234 - accuracy: 0.8438



Epoch 12/20
Epoch 12/20
Epoch 16/20
Epoch 13/20
1/8 [==>...........................] - ETA: 0s - loss: 0.3925 - accuracy: 0.8125Epoch 13/20
Epoch 14/20
Epoch 14/20
Epoch 17/20
Epoch 15/20
Epoch 15/20
Epoch 18/20
Epoch 19/20
Epoch 3/30
Epoch 16/20
1/8 [==>...........................] - ETA: 13s - loss: 0.6933 - accuracy: 0.3750Epoch 20/20
Epoch 4/30
Epoch 2/30
Epoch 17/20
Epoch 5/30
Epoch 3/30
Epoch 18/20
1/8 [==>...........................] - ETA: 0s - loss: 0.3998 - accuracy: 0.7812Epoch 18/20
Epoch 6/30
Epoch 19/20
Epoch 7/30
Epoch 2/30
1/8 [==>...........................] - ETA: 0s - loss: 0.3374 - accuracy: 0.8438Epoch 19/20
Epoch 5/30
Epoch 20/20
Epoch 8/30
Epoch 20/20
Epoch 3/30




Epoch 6/30
Epoch 4/30
Epoch 9/30
Epoch 7/30
1/8 [==>...........................] - ETA: 0s - loss: 0.5045 - accuracy: 0.7344Epoch 10/30
Epoch 8/30
Epoch 6/30
Epoch 11/30
Epoch 9/30
Epoch 7/30
1/8 [==>...........................] - ETA: 0s - loss: 0.3591 - accuracy: 0.8281

Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 12/30
1/8 [==>...........................] - ETA: 0s - loss: 0.4180 - accuracy: 0.7812Epoch 10/30
Epoch 8/30
1/8 [==>...........................] - ETA: 1sEpoch 13/30
Epoch 11/30
Epoch 9/30
Epoch 12/30
Epoch 10/30
Epoch 15/30
1/8 [==>...........................] - ETA: 0s - loss: 0.3097 - accuracy: 0.8906Epoch 2/10
Epoch 3/10
Epoch 16/30
Epoch 4/10




Epoch 17/30
1/8 [==>...........................] - ETA: 0s - loss: 0.4585 - accuracy: 0.7812Epoch 5/10
1/8 [==>...........................] - ETA: 0s - loss: 0.2674 - accuracy: 0.9062Epoch 12/30
Epoch 18/30
Epoch 14/30
Epoch 13/30
Epoch 7/10




Epoch 8/10
Epoch 9/10
Epoch 14/30
Epoch 15/30
Epoch 10/10
Epoch 16/30
Epoch 2/10
Epoch 3/10
1/8 [==>...........................] - ETA: 1sEpoch 16/302464 - accuracy: 0.9062
Epoch 17/30
Epoch 4/10
Epoch 22/30
Epoch 1/20
Epoch 5/10
Epoch 23/30
1/8 [==>...........................] - ETA: 0s - loss: 0.4159 - accuracy: 0.8281Epoch 6/10
Epoch 24/30
1/8 [==>...........................] - ETA: 1sEpoch 18/30



Epoch 7/10
Epoch 25/30
Epoch 19/30
Epoch 8/10
Epoch 26/30
1/8 [==>...........................] - ETA: 0s - loss: 0.5166 - accuracy: 0.7656Epoch 9/10
Epoch 2/10
Epoch 20/30
Epoch 3/10
Epoch 10/10

Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 4/10
Epoch 21/30
1/8 [==>...........................] - ETA: 0s - loss: 0.3768 - accuracy: 0.8594Epoch 20/30
Epoch 5/10
Epoch 21/30
1/8 [==>...........................] - ETA: 0s - loss: 0.3017 - accuracy: 0.9062Epoch 22/30
Epoch 6/10
1/8 [==>...........................] - ETA: 0s - loss: 0.4085 - accuracy: 0.8438Epoch 7/10
Epoch 22/30
Epoch 23/30
Epoch 23/30
1/8 [==>...........................] - ETA: 0s - loss: 0.3248 - accuracy: 0.8906Epoch 30/30
Epoch 8/10
Epoch 24/30
Epoch 24/30
Epoch 9/10


Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 25/30
Epoch 25/30
1/8 [==>...........................] - ETA: 0s - loss: 0.3216 - accuracy: 0.8906Epoch 1/20
Epoch 26/30
Epoch 26/30

Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 27/30
Epoch 27/30
Epoch 28/30
Epoch 28/30



Epoch 29/30
Epoch 29/30
Epoch 30/30
Epoch 30/30
Epoch 1/30
Epoch 1/30


Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 2/20
Epoch 6/20
Epoch 3/20
Epoch 7/20
Epoch 4/20
Epoch 8/20
Epoch 5/20
Epoch 9/20
Epoch 6/20
Epoch 10/20
Epoch 7/20
Epoch 11/20
Epoch 12/20
Epoch 9/20
Epoch 13/20
Epoch 10/20
Epoch 11/20
Epoch 2/20
Epoch 15/20
Epoch 3/20
Epoch 16/20
Epoch 4/20
Epoch 17/20
Epoch 5/20
Epoch 15/20

Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 18/20
Epoch 6/20
Epoch 19/20
Epoch 17/20
Epoch 18/20
Epoch 20/20
Epoch 2/30
Epoch 8/20
Epoch 19/20
Epoch 3/30
Epoch 10/20
Epoch 2/30
Epoch 20/20
Epoch 11/20
Epoch 5/30
Epoch 3/30
Epoch 12/20
Epoch 6/30
Epoch 4/30
Epoch 13/20
Epoch 5/30
Epoch 2/30
Epoch 7/30
Epoch 14/20
Epoch 6/30
Epoch 3/30
Epoch 8/30
Epoch 15/20
Epoch 7/30
Epoch 4/30
Epoch 17/20
Epoch 5/30
Epoch 10/30
Epoch 18/20
Epoch 6/30
Epoch 11/30
1/8 [==>...........................] - ETA: 1sEpoch 7/30
Epoch 19/20
Epoch 10/30
Epoch 8/30
Epoch 11/30
Epoch 20/20
1/8 [==>...........................] - ETA: 1sEpoch 9/30
Epoch 12/30
Epoch 14/30
Epoch 13/30
Epoch 10/30
Epoch 15/30
Epoch 14/30
Epoch 16/30
Epoch 12/30
Epoch 17/30
Epoch 16/30
Epoch 13/30
Epoch 18/30
Epoch 17/30
Epoch 14/30
Epoch 18/30
Epoch 19/30
Epoch 15/30
Epoch 20/30
Epoch 19/30
Epoch 20/30
Epoch 17/30
Epoch 21/30
Epoch 21/30
Epoch 22/30
Epoch 22/30
Epoch 19/30
Epoch 23/30
Epoch 24/30
Epoch 24/30
Epoch 21/30
Epoch 25/30
Epoch 25/30
Epoch 22/30
Epoch 26/30
Epoch 

Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 27/30
Epoch 24/30
Epoch 27/30
Epoch 25/30
Epoch 28/30
Epoch 28/30
Epoch 26/30
Epoch 29/30
Epoch 29/30
Epoch 27/30
Epoch 30/30
Epoch 28/30

Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

Epoch 29/30
Epoch 30/30


Traceback (most recent call last):
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/pipeline.py", line 749, in score
    Xt = transform.transform(Xt)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/tmp/ipykernel_9164/2555258950.py", line 43, in transform
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/utils/_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/gea/.local/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py", line 1016, in transform
    X_int, X_mask = self._transform(
  File "

–ù–∞–∏–ª—É—á—à–∏–µ –ø–∞—Ä–∞–º–µ—Ç—Ä—ã: {'neural_network__batch_size': 32, 'neural_network__epochs': 10, 'neural_network__verbose': 0}


In [28]:
# –°–¥–µ–ª–∞–π—Ç–µ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö
y_pred = pipe.predict(X_test)



In [29]:
from sklearn.metrics import accuracy_score

# –í—ã—á–∏—Å–ª–∏—Ç–µ –æ—Ü–µ–Ω–∫—É —Ç–æ—á–Ω–æ—Å—Ç–∏ (accuracy) –º–æ–¥–µ–ª–∏
accuracy = accuracy_score(y_test, y_pred)

# –í—ã–≤–µ–¥–∏—Ç–µ –æ—Ü–µ–Ω–∫—É –Ω–∞ —ç–∫—Ä–∞–Ω
print("Accuracy:", accuracy)

Accuracy: 0.7877094972067039
