In [73]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

In [74]:
# Loading my dataset 
ds = pd.read_csv('/Users/damacm1143/Downloads/SA_official_languages.csv')

In [75]:
ds

Unnamed: 0,lang_id,text
0,xho,umgaqo-siseko wenza amalungiselelo kumaziko ax...
1,xho,i-dha iya kuba nobulumko bokubeka umsebenzi na...
2,eng,the province of kwazulu-natal department of tr...
3,nso,o netefatša gore o ba file dilo ka moka tše le...
4,ven,khomishini ya ndinganyiso ya mbeu yo ewa maana...
...,...,...
32995,tsn,popo ya dipolateforomo tse ke go tlisa boetele...
32996,sot,modise mosadi na o ntse o sa utlwe hore thaban...
32997,eng,closing date for the submission of completed t...
32998,xho,nawuphina umntu ofunyenwe enetyala phantsi kwa...


In [76]:
ds['lang_id'].value_counts()

lang_id
xho    3000
eng    3000
nso    3000
ven    3000
tsn    3000
nbl    3000
zul    3000
ssw    3000
tso    3000
sot    3000
afr    3000
Name: count, dtype: int64

In [77]:
# Distribution of characters for each language
char_counts = ds['text'].apply(lambda x: pd.Series(list(x))).stack().value_counts()
print(char_counts)

# Character frequency for each language
char_freq_per_lang = ds.groupby('lang_id')['text'].apply(lambda x: ''.join(x)).apply(lambda x: pd.Series(list(x)).value_counts())
print(char_freq_per_lang)

     1192280
a     907551
e     763846
i     525770
o     517527
      ...   
˜          1
¢          1
à          1
          1
±          1
Name: count, Length: 94, dtype: int64
                          e        i        n         a        r        s  \
lang_id                                                                     
afr      114856.0  107523.0  55324.0  52452.0   49657.0  42102.0  39295.0   
eng      117700.0   76046.0  49640.0  47932.0   49299.0  41457.0  39123.0   
nbl       74238.0   66240.0  54086.0  57271.0   89402.0   4366.0  21148.0   
nso      132758.0   74385.0  26054.0  22457.0   92480.0  11509.0  13929.0   
sot      134202.0   80717.0  23177.0  33305.0   96002.0   9216.0  36022.0   
ssw       75957.0   76149.0  57159.0  55349.0   77857.0   1177.0  30641.0   
tsn      131791.0   70927.0  31063.0  31495.0   91651.0  13609.0  33526.0   
tso      125327.0   43596.0  64226.0  47274.0   98455.0  14132.0  22707.0   
ven      129784.0   35922.0  48688.0  39765.0  10

In [78]:
from sklearn.feature_extraction.text import CountVectorizer

# Function to get top N-grams
def get_top_ngrams(corpus, n=2, top_k=10):
    vec = CountVectorizer(ngram_range=(n, n)).fit(corpus)
    bag_of_words = vec.transform(corpus)
    sum_words = bag_of_words.sum(axis=0)
    words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
    words_freq = sorted(words_freq, key=lambda x: x[1], reverse=True)
    return words_freq[:top_k]

# Get top bigrams for each language
for lang in ds['lang_id'].unique():
    texts = ds[ds['lang_id'] == lang]['text']
    print(f"Top bigrams in {lang}:")
    print(get_top_ngrams(texts, n=2))

Top bigrams in xho:
[('emzantsi afrika', 76), ('ukuba ngaba', 59), ('phambi kokuba', 53), ('ngaphandle kokuba', 49), ('nayiphi na', 48), ('bomzantsi afrika', 46), ('emva kokuba', 45), ('okanye ke', 39), ('nawuphi na', 38), ('ukuqinisekisa ukuba', 36)]
Top bigrams in eng:
[('of the', 1901), ('in the', 760), ('to the', 671), ('by the', 420), ('for the', 398), ('and the', 379), ('terms of', 299), ('with the', 292), ('on the', 288), ('in terms', 288)]
Top bigrams in nso:
[('le go', 761), ('ka go', 675), ('ya go', 577), ('swanetše go', 517), ('go ya', 429), ('tšeo di', 411), ('ya ka', 406), ('tše di', 395), ('tša go', 340), ('na le', 328)]
Top bigrams in ven:
[('vha na', 435), ('vha tshi', 341), ('afrika tshipembe', 320), ('mulayo wa', 282), ('kha vha', 280), ('ine ya', 269), ('tshi khou', 263), ('hu tshi', 244), ('ya nga', 240), ('vha nga', 239)]
Top bigrams in tsn:
[('tse di', 1182), ('le go', 749), ('tshwanetse go', 549), ('go ya', 497), ('ya go', 442), ('ba ba', 357), ('se se', 355), ('

In [79]:
# # Separating sentences and labels
# X = ds['text'].values
# y = ds['lang_id'].values

In [80]:
# Tokenizing text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)
X= tokenizer.word_index

In [81]:
X

{'ya': 1,
 'a': 2,
 'le': 3,
 'ka': 4,
 'go': 5,
 'e': 6,
 'na': 7,
 'u': 8,
 'die': 9,
 'wa': 10,
 'the': 11,
 'ba': 12,
 'o': 13,
 'ho': 14,
 'ku': 15,
 'of': 16,
 'nga': 17,
 'ke': 18,
 'in': 19,
 'di': 20,
 'sa': 21,
 'ha': 22,
 'tsa': 23,
 'se': 24,
 'van': 25,
 'vha': 26,
 'mo': 27,
 'to': 28,
 'hi': 29,
 'and': 30,
 'la': 31,
 'is': 32,
 'en': 33,
 'tse': 34,
 'n': 35,
 'i': 36,
 'kha': 37,
 'ga': 38,
 'tša': 39,
 'eka': 40,
 'gore': 41,
 'yo': 42,
 'afrika': 43,
 'tla': 44,
 'kanye': 45,
 'va': 46,
 'te': 47,
 'xa': 48,
 'ta': 49,
 're': 50,
 'ukuba': 51,
 'wat': 52,
 'bona': 53,
 'noma': 54,
 'word': 55,
 'as': 56,
 'mme': 57,
 'be': 58,
 'kana': 59,
 'om': 60,
 'ye': 61,
 'or': 62,
 'hore': 63,
 'dza': 64,
 'by': 65,
 'tshi': 66,
 'nie': 67,
 'fa': 68,
 'for': 69,
 'swi': 70,
 'uma': 71,
 'ri': 72,
 'uri': 73,
 'bo': 74,
 'molao': 75,
 'zwa': 76,
 'kapa': 77,
 'kumbe': 78,
 'kutsi': 79,
 'ge': 80,
 'vir': 81,
 'okanye': 82,
 'goba': 83,
 'yi': 84,
 'that': 85,
 'kwa': 86,
 'l

In [82]:
# Pad sequences
max_len = max([len(seq) for seq in X_seq])
X_pad = pad_sequences(X_seq, maxlen=max_len, padding='post')

In [83]:
# label_Encode
label_encoder = LabelEncoder()
y_enc = label_encoder.fit_transform(y)

In [84]:
import numpy as np
from sklearn.model_selection import train_test_split

# Sample data
X_pad = np.random.rand(1000, 100)  # 1000 samples, 100 features each
y_enc = np.random.randint(0, 10, size=(1000,))  # 1000 samples, 10 possible classes

# Check consistency
print("X_pad shape:", X_pad.shape)
print("y_enc length:", len(y_enc))

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_pad, y_enc, test_size=0.2, random_state=42)

X_pad shape: (1000, 100)
y_enc length: 1000


In [86]:
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, BatchNormalization, Dropout
from tensorflow.keras.models import Sequential

# Determine the vocabulary size and number of classes
vocab_size = len(tokenizer.word_index) + 1
num_classes = len(label_tokenizer.word_index)

# Define the model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=maxlen))
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Bidirectional(LSTM(64)))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=40, validation_split=0.2)


Epoch 1/50


InvalidArgumentError: Graph execution error:

Detected at node ArgMax defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/opt/anaconda3/lib/python3.11/site-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 701, in start

  File "/opt/anaconda3/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/opt/anaconda3/lib/python3.11/asyncio/base_events.py", line 607, in run_forever

  File "/opt/anaconda3/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once

  File "/opt/anaconda3/lib/python3.11/asyncio/events.py", line 80, in _run

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 534, in dispatch_queue

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 523, in process_one

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 429, in dispatch_shell

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 767, in execute_request

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 429, in do_execute

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3051, in run_cell

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3106, in _run_cell

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3311, in run_cell_async

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3493, in run_ast_nodes

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "/var/folders/dt/lksn0l5j6md0rhyypnfyzwbr0000gn/T/ipykernel_10844/772721027.py", line 23, in <module>

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 318, in fit

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 108, in one_step_on_data

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 77, in train_step

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/trainers/trainer.py", line 444, in compute_metrics

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/trainers/compile_utils.py", line 330, in update_state

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/trainers/compile_utils.py", line 17, in update_state

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/metrics/reduction_metrics.py", line 204, in update_state

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/metrics/accuracy_metrics.py", line 240, in sparse_categorical_accuracy

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/ops/numpy.py", line 869, in argmax

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/numpy.py", line 772, in argmax

Reduction axis -1 is empty in shape [40,0]
	 [[{{node ArgMax}}]] [Op:__inference_one_step_on_iterator_774614]

In [85]:
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, BatchNormalization, Dropout
from tensorflow.keras.models import Sequential

# Determine the vocabulary size and number of classes
vocab_size = len(tokenizer.word_index) + 1
num_classes = len(label_tokenizer.word_index)

# Define the model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=maxlen))
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Bidirectional(LSTM(64)))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=40, validation_split=0.2)

Epoch 1/50




InvalidArgumentError: Graph execution error:

Detected at node compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/opt/anaconda3/lib/python3.11/site-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 701, in start

  File "/opt/anaconda3/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/opt/anaconda3/lib/python3.11/asyncio/base_events.py", line 607, in run_forever

  File "/opt/anaconda3/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once

  File "/opt/anaconda3/lib/python3.11/asyncio/events.py", line 80, in _run

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 534, in dispatch_queue

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 523, in process_one

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 429, in dispatch_shell

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 767, in execute_request

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 429, in do_execute

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3051, in run_cell

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3106, in _run_cell

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3311, in run_cell_async

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3493, in run_ast_nodes

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "/var/folders/dt/lksn0l5j6md0rhyypnfyzwbr0000gn/T/ipykernel_10844/3044964441.py", line 23, in <module>

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 318, in fit

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 108, in one_step_on_data

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 54, in train_step

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/trainers/trainer.py", line 357, in _compute_loss

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/trainers/trainer.py", line 325, in compute_loss

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/trainers/compile_utils.py", line 609, in __call__

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/trainers/compile_utils.py", line 645, in call

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/losses/loss.py", line 43, in __call__

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/losses/losses.py", line 27, in call

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/losses/losses.py", line 1853, in sparse_categorical_crossentropy

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/ops/nn.py", line 1567, in sparse_categorical_crossentropy

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/nn.py", line 645, in sparse_categorical_crossentropy

Must have at least one class, but got logits shape [40,0]
	 [[{{node compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_one_step_on_iterator_766851]

In [None]:
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, BatchNormalization, Dropout
from tensorflow.keras.models import Sequential

# Define model
model = Sequential()
model.add(Embedding(input_dim=len(X) + 1, output_dim=128, input_length=maxlen))
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Bidirectional(LSTM(64)))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(len(label_tokenizer.word_index), activation='softmax'))

# Compile model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=40, validation_split=0.2)


Epoch 1/50


InvalidArgumentError: Graph execution error:

Detected at node ArgMax defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/opt/anaconda3/lib/python3.11/site-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 701, in start

  File "/opt/anaconda3/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/opt/anaconda3/lib/python3.11/asyncio/base_events.py", line 607, in run_forever

  File "/opt/anaconda3/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once

  File "/opt/anaconda3/lib/python3.11/asyncio/events.py", line 80, in _run

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 534, in dispatch_queue

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 523, in process_one

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 429, in dispatch_shell

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 767, in execute_request

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 429, in do_execute

  File "/opt/anaconda3/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3051, in run_cell

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3106, in _run_cell

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3311, in run_cell_async

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3493, in run_ast_nodes

  File "/opt/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "/var/folders/dt/lksn0l5j6md0rhyypnfyzwbr0000gn/T/ipykernel_10844/1540384463.py", line 19, in <module>

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 318, in fit

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 108, in one_step_on_data

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 77, in train_step

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/trainers/trainer.py", line 444, in compute_metrics

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/trainers/compile_utils.py", line 330, in update_state

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/trainers/compile_utils.py", line 17, in update_state

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/metrics/reduction_metrics.py", line 204, in update_state

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/metrics/accuracy_metrics.py", line 240, in sparse_categorical_accuracy

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/ops/numpy.py", line 869, in argmax

  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/numpy.py", line 772, in argmax

Reduction axis -1 is empty in shape [40,0]
	 [[{{node ArgMax}}]] [Op:__inference_one_step_on_iterator_751325]

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')

[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 21ms/step - accuracy: 0.1103 - loss: 12.7256
Loss: 12.760379791259766
Accuracy: 0.11666666716337204


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Example tokenizers (replace with your actual tokenizers)
text_tokenizer = Tokenizer()  # You should have fitted this tokenizer on your training data
label_tokenizer = Tokenizer()  # You should have fitted this tokenizer on your labels


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle

# Define file paths
model_path = 'language_detection_model.h5'
label_tokenizer_path = 'label_tokenizer.pkl'
tokenizer_path = 'text_tokenizer.pkl'

# Load the model and tokenizers
try:
    model = tf.keras.models.load_model(model_path)
    with open(label_tokenizer_path, 'rb') as handle:
        label_tokenizer = pickle.load(handle)
    with open(tokenizer_path, 'rb') as handle:
        tokenizer = pickle.load(handle)
except Exception as e:
    print(f"Error loading model or tokenizer: {e}")

# Define the prediction function
def predict_language(sentence, maxlen):
    sequences = tokenizer.texts_to_sequences([sentence])
    padded_sequences = pad_sequences(sequences, maxlen=maxlen)
    
    # Predict the language
    prediction = model.predict(padded_sequences)
    predicted_label_index = np.argmax(prediction, axis=1)[0]
    predicted_label = label_tokenizer.index_word.get(predicted_label_index + 1, 'Unknown')

    return predicted_label

# Example usage
maxlen = 100  # Ensure this is consistent with the padding length used during training
sentence = "re go bona kamoo o hudugago ka gona"
print(predict_language(sentence, maxlen))




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 250ms/step
tsn


In [None]:
from keras.models import load_model, Sequential
import pickle

# Assuming model is a Keras model
# Convert model to .h5 format
model.save('kgoza.h5')


