In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('./DfCleaned.csv')
df.head()

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import TextVectorization

In [None]:
import tensorflow_hub as hub
import tensorflow_text as text

In [None]:
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

In [None]:
def get_sentence_embeding(sentences):
    preprocessed_text = bert_preprocess(sentences)
    return bert_encoder(preprocessed_text)['pooled_output']

### Bert-Evaluate

In [None]:
x_train = df['ngram3']
y_train = df['class']

In [None]:
from sklearn.model_selection import KFold

In [None]:
num_folds = 10
kfold = KFold(n_splits=num_folds, shuffle=True)

In [None]:

results = []

# Iterate over the folds
for train_index, test_index in kfold.split(x_train):
    x_train_fold, x_val_fold = x_train[train_index], x_train[test_index]
    y_train_fold, y_val_fold = y_train[train_index], y_train[test_index]

    text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
    preprocessed_text = bert_preprocess(text_input)
    outputs = bert_encoder(preprocessed_text)

    l = tf.keras.layers.Dropout(0.1, name="dropout")(outputs['pooled_output'])
    l = tf.keras.layers.Dense(32, activation='relu', name="first")(l)
    l = tf.keras.layers.Dense(64, activation='relu', name="second")(l)
    l = tf.keras.layers.Dense(3, activation='softmax', name="output")(l)

    model = tf.keras.Model(inputs=[text_input], outputs=[l])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Train the model on the current fold
    model.fit(x_train_fold, y_train_fold, epochs=10)  # Adjust the number of epochs as needed

    # Evaluate the model on the validation data for this fold
    _, accuracy = model.evaluate(x_val_fold, y_val_fold)
    results.append(accuracy)


In [None]:
model.summary()

In [None]:

# Calculate the mean of the results
mean_accuracy = np.mean(results)
print("Mean cross-validated accuracy:", mean_accuracy)