# Disaster tweets DL model

In [1]:
import pickle
import numpy as np
import pandas as pd
from sklearn import base, feature_extraction, ensemble, model_selection, pipeline, compose, preprocessing, metrics
from sklearn.experimental import enable_halving_search_cv
import tensorflow as tf
from embedding_transformer import Doc2VecTransformer
from scikeras.wrappers import KerasClassifier
import optuna
import pprint

SCRIPT_NAME='DL-06'

2024-04-03 08:15:15.069774: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-03 08:15:15.090850: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df_train = pd.read_csv('./train_enriched.csv', index_col='id')
df_train.fillna({'keyword': '', 'location': '', 'country': '', 'state': '', 'city': '', 'url_domains': '', 'clean_text': ''}, inplace=True)
df_train.head()

Unnamed: 0_level_0,keyword,positive_factor,location,country,state,city,missing_location,text,clean_text,text_content,...,punct_factor,ann_count,urls_count,tokens_count,stop_words_factor,clean_tokens_factor,url_domains,url_redirects_count,hashtags_sentiment,target
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,0.5,,,,,1,Our Deeds are the Reason of this #earthquake M...,deed reason earthquake may allah forgive u,Our Deeds are the Reason of this #earthquake M...,...,0.017544,0,0,13,0.384615,0.615385,,0,1.0,1
1,,0.5,,,,,1,Forest fire near La Ronge Sask. Canada,forest fire near la ronge sask canada,Forest fire near La Ronge Sask. Canada,...,0.03125,0,0,7,0.0,1.0,,0,0.0,1
2,,0.5,,,,,1,All residents asked to 'shelter in place' are ...,resident asked shelter place notified officer ...,All residents asked to 'shelter in place' are ...,...,0.026786,0,0,22,0.409091,0.590909,,0,0.0,1
3,,0.5,,,,,1,"13,000 people receive #wildfires evacuation or...",people receive wildfire evacuation order calif...,"13,000 people receive #wildfires evacuation or...",...,0.035088,0,0,9,0.111111,0.888889,,0,1.0,1
4,,0.5,,,,,1,Just got sent this photo from Ruby #Alaska as ...,got sent photo ruby alaska smoke wildfire pour...,Just got sent this photo from Ruby #Alaska as ...,...,0.027778,0,0,17,0.352941,0.647059,,0,0.714286,1


In [3]:
text_embedding = None
with open('./train-text-embeddings.pkl', 'rb') as fin:
    text_embedding = pickle.load(fin)

In [4]:
len(text_embedding), len(text_embedding[0])

(7613, 384)

In [5]:
class ConditionalEmbeddingTransformer(base.BaseEstimator, base.TransformerMixin):
    def __init__(self, data):
        # Store the embeddings and mode
        self.data = data

    def fit(self, X, y=None):
        # No fitting necessary, return self
        return self

    def transform(self, X, y=None):
        return np.concatenate([X, self.data], axis=1)


In [6]:
categorical_features = [
    'country',
    'state',
]
numerical_features = [
    'text_length', 
    # 'ann_count',
    # 'url_redirects_count',
    # 'stop_words_factor',
    'positive_factor',
    'hashtags_sentiment'
]

# domains_vec = feature_extraction.text.TfidfVectorizer(max_features=100)
# domains_vec = feature_extraction.text.CountVectorizer(max_features=100)

column_transformer = compose.ColumnTransformer(transformers=[
    # ('domains_vec', domains_vec, 'url_domains'),
    ('one_hot', preprocessing.OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_features),
    ('numerical', preprocessing.StandardScaler(), numerical_features)
], remainder='drop')

embedding_transformer = ConditionalEmbeddingTransformer(text_embedding)

transformer = pipeline.Pipeline([
    ('columns', column_transformer),
    ('text_embedding', embedding_transformer)
])

transformer.fit(df_train)
X_train = transformer.transform(df_train)
print('X_train shape', X_train.shape)

Y_train = df_train['target']

print(f'X_train type={type(X_train)}, shape={X_train.shape}')
print(f'Y_train shape={Y_train.shape}')


X_train shape (7613, 554)
X_train type=<class 'numpy.ndarray'>, shape=(7613, 554)
Y_train shape=(7613,)


In [16]:
INPUT_SIZE = X_train.shape[1]
OUTPUT_SIZE = 1
NN_SHAPE = [INPUT_SIZE, 16, 16, 1]
BATCH_SIZE= X_train.shape[0]
MAX_EPOCHS = 20


def sigmoid(x):
    return tf.cast(tf.greater(tf.nn.sigmoid(x), .5), tf.int32)


def build_model(layer_dims, use_dropout=False, dropout_rate_1=0.3, dropout_rate_2=0.1,
                learning_rate=1e-3, 
                use_emma=False, emma_momentum=0.99, 
                use_regularizer=False, regularizer=0.01,
                initializer='glorot_normal',
                activation='relu'
                ):
    n_layers = len(layer_dims)
    layers = []

    for l in range(1, n_layers-1):
        layer_kws = {}

        if use_regularizer:
            layer_kws['kernel_regularizer'] = tf.keras.regularizers.l2(regularizer)
        if initializer:
            layer_kws['kernel_initializer'] = initializer
        
        hidden_layer= tf.keras.layers.Dense(layer_dims[l], input_shape=(layer_dims[l-1],), activation=activation, **layer_kws)
        
        layers.append(hidden_layer)
        
        if use_dropout:
            if l==1:
                rate = dropout_rate_1
            elif l==2 and n_layers > 3:
                rate = dropout_rate_2
            else:
                rate = 0.0
            if rate > .0:
                layers.append(tf.keras.layers.Dropout(rate=rate))
    layers.append(tf.keras.layers.Dense(layer_dims[n_layers-1], activation='linear'))
    model = tf.keras.Sequential(layers)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate, use_ema=use_emma, ema_momentum=emma_momentum),                 
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics='accuracy')
    return model


In [17]:
def objective(trial):
    use_dropout = False # trial.suggest_categorical('use_dropout', [True, False])
    dropout_rate_1 = .0 # trial.suggest_float('dropout_rate_1', 0.1, 0.4) if use_dropout else .0
    dropout_rate_2 = .0 # trial.suggest_float('dropout_rate_2', 0.0, 0.2) if use_dropout else .0
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    batch_size = trial.suggest_categorical('batch_size', [BATCH_SIZE, BATCH_SIZE//2 + 1, BATCH_SIZE//4 + 1])
    use_emma = trial.suggest_categorical('use_emma', [True, False])
    emma_momentum = trial.suggest_float('emma_momentum', 0.9, 0.9999, log=True) if use_emma else 0.999
    regularizer = trial.suggest_float('regularizer', 1e-5, 1e-2, log=True)
    initializer = trial.suggest_categorical('initializer', ['glorot_normal', 'he_normal'])
    activation = trial.suggest_categorical('activation', ['relu', 'gelu', 'leaky_relu'])

    k = 3  # Number of validations
    shuffle_split = model_selection.StratifiedShuffleSplit(n_splits=k, test_size=0.2)
    cvscores = []

    model = build_model(layer_dims=NN_SHAPE, 
                        use_dropout=use_dropout, dropout_rate_1=dropout_rate_1, dropout_rate_2=dropout_rate_2,
                        initializer=initializer,
                        regularizer=regularizer, 
                        activation=activation,
                        learning_rate=learning_rate, use_emma=use_emma, emma_momentum=emma_momentum)
    
    early_stopping = tf.keras.callbacks.EarlyStopping(patience=2, restore_best_weights=True)

    for index, datasets in enumerate(shuffle_split.split(X_train, Y_train)):
        train, test = datasets
        X_train_set = X_train[train]
        Y_train_set = Y_train[train]
        X_test_set = X_train[test]
        Y_test_set = Y_train[test]
        # print(f'---- step {index+1} of {k}')
        # print(f'train size: {len(X_train_set)}, test size: {len(X_test_set)}')
        
        model.fit(X_train_set, Y_train_set, batch_size=batch_size, epochs=MAX_EPOCHS, 
                validation_data=(X_test_set, Y_test_set),
                callbacks=[early_stopping],
                verbose=0)

        Y_predict = sigmoid(model.predict(X_test_set))

        f1_score = metrics.f1_score(Y_test_set, Y_predict)
        # print(f"Validation F1: {f1_score}")

        cvscores.append(f1_score)

    score = np.mean(cvscores)
    print(f"Mean cross-validation F1 score: {score}")
    # print(f"Standard deviation of cross-validation score: {tf.math.reduce_std(cvscores)}")
    # print(model.summary())
    
    return score

In [18]:
study_name=SCRIPT_NAME
storage=f"sqlite:///{SCRIPT_NAME}.optuna.db"

# recreate study for new NN architecture
try:
    optuna.delete_study(study_name=study_name, storage=storage)
except:
    pass


In [19]:
study = optuna.create_study(study_name=study_name, storage=storage,
                            direction='maximize', 
                            sampler=optuna.samplers.TPESampler(seed=42, consider_prior=True),
                            load_if_exists=True)
study.optimize(objective, n_trials=30, show_progress_bar=True)

# Print optimal hyperparameters and the corresponding score

trial = study.best_trial
print(f'-> Best score: {trial.value}')
print(f'-> Optimal hyperparameters: ')
pprint.pprint(trial.params)


[I 2024-04-03 09:55:02,288] A new study created in RDB with name: DL-06
  0%|          | 0/30 [00:00<?, ?it/s]



Best trial: 0. Best value: 0.504849:   3%|▎         | 1/30 [00:02<01:10,  2.43s/it]

Mean cross-validation F1 score: 0.5048487052607555
[I 2024-04-03 09:55:04,719] Trial 0 finished with value: 0.5048487052607555 and parameters: {'learning_rate': 0.00031489116479568613, 'batch_size': 7613, 'use_emma': True, 'emma_momentum': 0.9055193748996595, 'regularizer': 0.003967605077052989, 'initializer': 'he_normal', 'activation': 'gelu'}. Best is trial 0 with value: 0.5048487052607555.


Best trial: 1. Best value: 0.522537:   7%|▋         | 2/30 [00:04<01:06,  2.36s/it]

Mean cross-validation F1 score: 0.5225370966403865
[I 2024-04-03 09:55:07,023] Trial 1 finished with value: 0.5225370966403865 and parameters: {'learning_rate': 7.068974950624602e-05, 'batch_size': 1904, 'use_emma': True, 'emma_momentum': 0.9280166656874778, 'regularizer': 0.000684792009557478, 'initializer': 'he_normal', 'activation': 'leaky_relu'}. Best is trial 1 with value: 0.5225370966403865.


Best trial: 1. Best value: 0.522537:  10%|█         | 3/30 [00:08<01:15,  2.80s/it]

Mean cross-validation F1 score: 0.3409389372679943
[I 2024-04-03 09:55:10,355] Trial 2 finished with value: 0.3409389372679943 and parameters: {'learning_rate': 6.290644294586152e-05, 'batch_size': 3807, 'use_emma': True, 'emma_momentum': 0.9061837746233719, 'regularizer': 0.007025166339242158, 'initializer': 'glorot_normal', 'activation': 'leaky_relu'}. Best is trial 1 with value: 0.5225370966403865.


Best trial: 1. Best value: 0.522537:  13%|█▎        | 4/30 [00:11<01:20,  3.11s/it]

Mean cross-validation F1 score: 0.2911302238063133
[I 2024-04-03 09:55:13,926] Trial 3 finished with value: 0.2911302238063133 and parameters: {'learning_rate': 0.00057624872164786, 'batch_size': 3807, 'use_emma': True, 'emma_momentum': 0.9650039526079448, 'regularizer': 8.612579192594876e-05, 'initializer': 'he_normal', 'activation': 'gelu'}. Best is trial 1 with value: 0.5225370966403865.


Best trial: 4. Best value: 0.821701:  17%|█▋        | 5/30 [00:13<01:08,  2.73s/it]

Mean cross-validation F1 score: 0.8217014704948479
[I 2024-04-03 09:55:15,977] Trial 4 finished with value: 0.8217014704948479 and parameters: {'learning_rate': 0.057279044707996205, 'batch_size': 1904, 'use_emma': False, 'regularizer': 1.3667272915456215e-05, 'initializer': 'he_normal', 'activation': 'gelu'}. Best is trial 4 with value: 0.8217014704948479.


Best trial: 4. Best value: 0.821701:  20%|██        | 6/30 [00:16<01:09,  2.89s/it]

Mean cross-validation F1 score: 0.7626776399737064
[I 2024-04-03 09:55:19,190] Trial 5 finished with value: 0.7626776399737064 and parameters: {'learning_rate': 0.000132965214572995, 'batch_size': 1904, 'use_emma': False, 'regularizer': 0.0020736445177905022, 'initializer': 'glorot_normal', 'activation': 'relu'}. Best is trial 4 with value: 0.8217014704948479.


Best trial: 4. Best value: 0.821701:  23%|██▎       | 7/30 [00:19<01:01,  2.66s/it]

Mean cross-validation F1 score: 0.8155602865598944
[I 2024-04-03 09:55:21,386] Trial 6 finished with value: 0.8155602865598944 and parameters: {'learning_rate': 0.01216413935141706, 'batch_size': 3807, 'use_emma': True, 'emma_momentum': 0.9318997629286275, 'regularizer': 1.551225912648474e-05, 'initializer': 'he_normal', 'activation': 'leaky_relu'}. Best is trial 4 with value: 0.8217014704948479.


Best trial: 4. Best value: 0.821701:  27%|██▋       | 8/30 [00:22<01:00,  2.75s/it]

Mean cross-validation F1 score: 0.7890053956559951
[I 2024-04-03 09:55:24,323] Trial 7 finished with value: 0.7890053956559951 and parameters: {'learning_rate': 0.0007742116473996246, 'batch_size': 1904, 'use_emma': False, 'regularizer': 0.00030296104428212476, 'initializer': 'glorot_normal', 'activation': 'gelu'}. Best is trial 4 with value: 0.8217014704948479.


Best trial: 8. Best value: 0.828981:  30%|███       | 9/30 [00:24<00:52,  2.52s/it]

Mean cross-validation F1 score: 0.8289807949853394
[I 2024-04-03 09:55:26,330] Trial 8 finished with value: 0.8289807949853394 and parameters: {'learning_rate': 0.003512704726270845, 'batch_size': 1904, 'use_emma': False, 'regularizer': 0.0018477934173519257, 'initializer': 'glorot_normal', 'activation': 'leaky_relu'}. Best is trial 8 with value: 0.8289807949853394.


Best trial: 8. Best value: 0.828981:  33%|███▎      | 10/30 [00:25<00:44,  2.25s/it]

Mean cross-validation F1 score: 0.8276364389047924
[I 2024-04-03 09:55:27,964] Trial 9 finished with value: 0.8276364389047924 and parameters: {'learning_rate': 0.01707975034295823, 'batch_size': 3807, 'use_emma': False, 'regularizer': 0.0004149795789891589, 'initializer': 'he_normal', 'activation': 'relu'}. Best is trial 8 with value: 0.8289807949853394.


Best trial: 8. Best value: 0.828981:  37%|███▋      | 11/30 [00:28<00:45,  2.38s/it]

Mean cross-validation F1 score: 0.8127878789478066
[I 2024-04-03 09:55:30,662] Trial 10 finished with value: 0.8127878789478066 and parameters: {'learning_rate': 0.004170922025076834, 'batch_size': 7613, 'use_emma': False, 'regularizer': 0.0014560113137373743, 'initializer': 'glorot_normal', 'activation': 'leaky_relu'}. Best is trial 8 with value: 0.8289807949853394.


Best trial: 8. Best value: 0.828981:  40%|████      | 12/30 [00:30<00:40,  2.28s/it]

Mean cross-validation F1 score: 0.8191048897783668
[I 2024-04-03 09:55:32,696] Trial 11 finished with value: 0.8191048897783668 and parameters: {'learning_rate': 0.004389114309508197, 'batch_size': 3807, 'use_emma': False, 'regularizer': 0.00020077076699713252, 'initializer': 'glorot_normal', 'activation': 'relu'}. Best is trial 8 with value: 0.8289807949853394.


Best trial: 8. Best value: 0.828981:  43%|████▎     | 13/30 [00:32<00:35,  2.09s/it]

Mean cross-validation F1 score: 0.8249523395308133
[I 2024-04-03 09:55:34,363] Trial 12 finished with value: 0.8249523395308133 and parameters: {'learning_rate': 0.06406014280158744, 'batch_size': 1904, 'use_emma': False, 'regularizer': 0.0008305828495032285, 'initializer': 'he_normal', 'activation': 'relu'}. Best is trial 8 with value: 0.8289807949853394.


Best trial: 8. Best value: 0.828981:  47%|████▋     | 14/30 [00:34<00:32,  2.05s/it]

Mean cross-validation F1 score: 0.823568350276077
[I 2024-04-03 09:55:36,317] Trial 13 finished with value: 0.823568350276077 and parameters: {'learning_rate': 0.009741500480830036, 'batch_size': 3807, 'use_emma': False, 'regularizer': 9.232121954654636e-05, 'initializer': 'glorot_normal', 'activation': 'relu'}. Best is trial 8 with value: 0.8289807949853394.


Best trial: 8. Best value: 0.828981:  50%|█████     | 15/30 [00:36<00:34,  2.28s/it]

Mean cross-validation F1 score: 0.6030173047053727
[I 2024-04-03 09:55:39,132] Trial 14 finished with value: 0.6030173047053727 and parameters: {'learning_rate': 1.9409940328987788e-05, 'batch_size': 7613, 'use_emma': False, 'regularizer': 0.009728583151931052, 'initializer': 'he_normal', 'activation': 'leaky_relu'}. Best is trial 8 with value: 0.8289807949853394.


Best trial: 8. Best value: 0.828981:  53%|█████▎    | 16/30 [00:39<00:33,  2.36s/it]

Mean cross-validation F1 score: 0.8272082454758888
[I 2024-04-03 09:55:41,676] Trial 15 finished with value: 0.8272082454758888 and parameters: {'learning_rate': 0.0018091728041976268, 'batch_size': 1904, 'use_emma': False, 'regularizer': 0.0007531545048628777, 'initializer': 'glorot_normal', 'activation': 'relu'}. Best is trial 8 with value: 0.8289807949853394.


Best trial: 16. Best value: 0.832266:  57%|█████▋    | 17/30 [00:41<00:28,  2.18s/it]

Mean cross-validation F1 score: 0.8322655444475965
[I 2024-04-03 09:55:43,438] Trial 16 finished with value: 0.8322655444475965 and parameters: {'learning_rate': 0.021387579426513884, 'batch_size': 3807, 'use_emma': False, 'regularizer': 0.00012084427030065391, 'initializer': 'he_normal', 'activation': 'relu'}. Best is trial 16 with value: 0.8322655444475965.


Best trial: 16. Best value: 0.832266:  60%|██████    | 18/30 [00:43<00:25,  2.10s/it]

Mean cross-validation F1 score: 0.8267970809438746
[I 2024-04-03 09:55:45,348] Trial 17 finished with value: 0.8267970809438746 and parameters: {'learning_rate': 0.027134422433011514, 'batch_size': 1904, 'use_emma': False, 'regularizer': 5.234640505529974e-05, 'initializer': 'glorot_normal', 'activation': 'leaky_relu'}. Best is trial 16 with value: 0.8322655444475965.


Best trial: 16. Best value: 0.832266:  63%|██████▎   | 19/30 [00:45<00:24,  2.24s/it]

Mean cross-validation F1 score: 0.817919718084859
[I 2024-04-03 09:55:47,920] Trial 18 finished with value: 0.817919718084859 and parameters: {'learning_rate': 0.002291327504585287, 'batch_size': 3807, 'use_emma': False, 'regularizer': 3.0279191466484946e-05, 'initializer': 'he_normal', 'activation': 'leaky_relu'}. Best is trial 16 with value: 0.8322655444475965.


Best trial: 16. Best value: 0.832266:  67%|██████▋   | 20/30 [00:47<00:22,  2.25s/it]

Mean cross-validation F1 score: 0.8207254042833054
[I 2024-04-03 09:55:50,193] Trial 19 finished with value: 0.8207254042833054 and parameters: {'learning_rate': 0.005881981994886059, 'batch_size': 7613, 'use_emma': False, 'regularizer': 0.0001693603638300872, 'initializer': 'glorot_normal', 'activation': 'relu'}. Best is trial 16 with value: 0.8322655444475965.


Best trial: 16. Best value: 0.832266:  70%|███████   | 21/30 [00:49<00:18,  2.09s/it]

Mean cross-validation F1 score: 0.8178225127241724
[I 2024-04-03 09:55:51,905] Trial 20 finished with value: 0.8178225127241724 and parameters: {'learning_rate': 0.09223063173596477, 'batch_size': 1904, 'use_emma': False, 'regularizer': 0.002617189581022643, 'initializer': 'he_normal', 'activation': 'leaky_relu'}. Best is trial 16 with value: 0.8322655444475965.


Best trial: 21. Best value: 0.843226:  73%|███████▎  | 22/30 [00:51<00:15,  2.00s/it]

Mean cross-validation F1 score: 0.8432260480392424
[I 2024-04-03 09:55:53,685] Trial 21 finished with value: 0.8432260480392424 and parameters: {'learning_rate': 0.02255828003147729, 'batch_size': 3807, 'use_emma': False, 'regularizer': 0.0002229157076000158, 'initializer': 'he_normal', 'activation': 'relu'}. Best is trial 21 with value: 0.8432260480392424.


Best trial: 21. Best value: 0.843226:  77%|███████▋  | 23/30 [00:53<00:13,  1.88s/it]

Mean cross-validation F1 score: 0.8086016123374099
[I 2024-04-03 09:55:55,292] Trial 22 finished with value: 0.8086016123374099 and parameters: {'learning_rate': 0.03139981694462374, 'batch_size': 3807, 'use_emma': False, 'regularizer': 0.00011752003418812835, 'initializer': 'he_normal', 'activation': 'relu'}. Best is trial 21 with value: 0.8432260480392424.


Best trial: 21. Best value: 0.843226:  80%|████████  | 24/30 [00:54<00:11,  1.87s/it]

Mean cross-validation F1 score: 0.8241618256236475
[I 2024-04-03 09:55:57,146] Trial 23 finished with value: 0.8241618256236475 and parameters: {'learning_rate': 0.027360016279819613, 'batch_size': 3807, 'use_emma': False, 'regularizer': 0.00034691019994193396, 'initializer': 'he_normal', 'activation': 'relu'}. Best is trial 21 with value: 0.8432260480392424.


Best trial: 21. Best value: 0.843226:  83%|████████▎ | 25/30 [00:57<00:10,  2.08s/it]

Mean cross-validation F1 score: 0.8345543074791278
[I 2024-04-03 09:55:59,715] Trial 24 finished with value: 0.8345543074791278 and parameters: {'learning_rate': 0.002031251632248019, 'batch_size': 3807, 'use_emma': False, 'regularizer': 4.0977830252199155e-05, 'initializer': 'he_normal', 'activation': 'relu'}. Best is trial 21 with value: 0.8432260480392424.


Best trial: 21. Best value: 0.843226:  87%|████████▋ | 26/30 [01:00<00:08,  2.24s/it]

Mean cross-validation F1 score: 0.8191924103388154
[I 2024-04-03 09:56:02,330] Trial 25 finished with value: 0.8191924103388154 and parameters: {'learning_rate': 0.0016449692426504204, 'batch_size': 3807, 'use_emma': False, 'regularizer': 3.847351550619711e-05, 'initializer': 'he_normal', 'activation': 'relu'}. Best is trial 21 with value: 0.8432260480392424.


Best trial: 21. Best value: 0.843226:  90%|█████████ | 27/30 [01:02<00:06,  2.16s/it]

Mean cross-validation F1 score: 0.8245428679067773
[I 2024-04-03 09:56:04,299] Trial 26 finished with value: 0.8245428679067773 and parameters: {'learning_rate': 0.008660924771653838, 'batch_size': 3807, 'use_emma': False, 'regularizer': 2.1850156068446692e-05, 'initializer': 'he_normal', 'activation': 'relu'}. Best is trial 21 with value: 0.8432260480392424.


Best trial: 21. Best value: 0.843226:  93%|█████████▎| 28/30 [01:05<00:04,  2.41s/it]

Mean cross-validation F1 score: 0.2651565500172754
[I 2024-04-03 09:56:07,296] Trial 27 finished with value: 0.2651565500172754 and parameters: {'learning_rate': 0.0003228298265747048, 'batch_size': 3807, 'use_emma': True, 'emma_momentum': 0.9967753110910683, 'regularizer': 6.102872513033479e-05, 'initializer': 'he_normal', 'activation': 'relu'}. Best is trial 21 with value: 0.8432260480392424.


Best trial: 21. Best value: 0.843226:  97%|█████████▋| 29/30 [01:06<00:02,  2.23s/it]

Mean cross-validation F1 score: 0.8280755644201093
[I 2024-04-03 09:56:09,117] Trial 28 finished with value: 0.8280755644201093 and parameters: {'learning_rate': 0.017148304779352887, 'batch_size': 3807, 'use_emma': False, 'regularizer': 0.00017528114829507303, 'initializer': 'he_normal', 'activation': 'relu'}. Best is trial 21 with value: 0.8432260480392424.


Best trial: 21. Best value: 0.843226: 100%|██████████| 30/30 [01:09<00:00,  2.33s/it]

Mean cross-validation F1 score: 0.37433696056639737
[I 2024-04-03 09:56:12,226] Trial 29 finished with value: 0.37433696056639737 and parameters: {'learning_rate': 0.0010665592641105094, 'batch_size': 7613, 'use_emma': True, 'emma_momentum': 0.9980880227949029, 'regularizer': 5.191187370085861e-05, 'initializer': 'he_normal', 'activation': 'gelu'}. Best is trial 21 with value: 0.8432260480392424.
-> Best score: 0.8432260480392424
-> Optimal hyperparameters: 
{'activation': 'relu',
 'batch_size': 3807,
 'initializer': 'he_normal',
 'learning_rate': 0.02255828003147729,
 'regularizer': 0.0002229157076000158,
 'use_emma': False}





#### Optimal model parameters

In [15]:
print(f'-> Best score: {trial.value}')
pprint.pprint(trial.params)

-> Best score: 0.8618254341828041
{'emma_momentum': 0.9585386269812564,
 'learning_rate': 0.00031489116479568613,
 'regularizer': 2.9380279387035334e-05,
 'use_emma': True}


#### Train model with optimal parameters

In [16]:
def train_best_model(best_params):
    batch_size = best_params.pop('batch_size', BATCH_SIZE)
    early_stopping = tf.keras.callbacks.EarlyStopping(patience=2, restore_best_weights=True)
    best_model = build_model(layer_dims=NN_SHAPE, **best_params)
    best_model.fit(X_train, Y_train, batch_size=batch_size, epochs=MAX_EPOCHS, validation_split=0.2,
                callbacks=[early_stopping], verbose=3)
    Y_predict = sigmoid(best_model(X_train))
    f1_score = metrics.f1_score(Y_train, Y_predict)
    print(f'Best model F1={f1_score:.3f}')
    return best_model

best_model = train_best_model(trial.params)

Epoch 1/20




Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Best model F1=0.878


In [17]:
best_model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_40 (Dense)            (None, 2048)              1136640   
                                                                 
 dense_41 (Dense)            (None, 1024)              2098176   
                                                                 
 dense_42 (Dense)            (None, 64)                65600     
                                                                 
 dense_43 (Dense)            (None, 1)                 65        
                                                                 
Total params: 3,300,481
Trainable params: 3,300,481
Non-trainable params: 0
_________________________________________________________________


#### Generate output

In [18]:
best_model.save(SCRIPT_NAME)



INFO:tensorflow:Assets written to: DL-06/assets


INFO:tensorflow:Assets written to: DL-06/assets


In [19]:
df_test = pd.read_csv('./test_enriched.csv', index_col='id')
df_test.fillna({'keyword': '', 'location': '', 'country': '', 'state': '', 'city': '', 'url_domains': ''}, inplace=True)
df_test.shape

(3263, 22)

In [20]:
test_embedding = None
with open('./test-text-embeddings.pkl', 'rb') as fin:
    test_embedding = pickle.load(fin)
len(test_embedding), len(test_embedding[0])

(3263, 384)

In [21]:
embedding_transformer.data = test_embedding
X_test = transformer.transform(df_test)
print('X_test shape', X_test.shape)

Y_test_predict = sigmoid(best_model(X_test))

df_example = pd.read_csv('./sample_submission.csv')
df_example['target'] = Y_test_predict

df_example.to_csv(f'./{SCRIPT_NAME}-submission.csv', index=False)

X_test shape (3263, 554)
