In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Conv1D, LSTM, Dropout, Flatten
from tensorflow.keras import optimizers
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.src.utils import pad_sequences
from sklearn.model_selection import train_test_split
from custom_preprocessor import preprocess
from fs.osfs import OSFS

# Prediction

In [16]:

tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts()
def get_columns(directory):
    with OSFS(f'./{directory}') as fs:
        print(fs.listdir('.'))
        return fs.listdir('.')

def predict(model_path, text, max_length, directory):
    print(f"Text before preprocessing: {text}")
    prediction_word = preprocess(text)
    print(f"Text after preprocessing: {prediction_word}")
    prediction_word = tokenizer.texts_to_sequences([prediction_word])
    prediction_word = pad_sequences(prediction_word, maxlen=max_length)
    model = load_model(model_path)
    prediction = model.predict([prediction_word])
    output_probabilities = np.array(prediction)
    result = {}
    class_labels = np.unique(get_columns(directory))
    for index, prediction in enumerate(output_probabilities[0]):
        result.update({class_labels[index]: round(prediction * 100, 1)})

    # Print keys and values of the result dictionary
    for key, value in result.items():
        print(f"{key}: {value}%")

text_prediction = "Hvem er du og hvad laver du her"
predict('./models/model_lang_families.keras', text_prediction, 40, 'data_families')

Text before preprocessing: Hvem er du og hvad laver du her
Text after preprocessing: hvem er du og hvad laver du her hvem er du og
[[]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
['Hellenic', 'Romance', 'Germanic', 'Slavic', 'Uralic']
Germanic: 11.8%
Hellenic: 9.9%
Romance: 34.8%
Slavic: 25.6%
Uralic: 17.9%
