In [6]:
import numpy as np
from numba import cuda

# Check if a GPU is available
if cuda.is_available():
    print("GPU is available.")
    
    # Get the name of the GPU
    gpu_name = cuda.get_current_device().name
    print(f"Using GPU: {gpu_name}")
    
else:
    print("GPU is not available. Using CPU.")

# Your CUDA code here, if you want to run specific CUDA operations



GPU is available.
Using GPU: b'NVIDIA GeForce GTX 1650 Ti'


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Input
from numba import jit, cuda

In [8]:
df = pd.read_csv('data/preprocessed_data.csv', encoding='utf-8')
df = df.head(1000)

In [9]:
hindi_sentences = df['Hindi']
telugu_sentences = df['Telugu']
hindi_train, hindi_val, telugu_train, telugu_val = train_test_split(hindi_sentences, telugu_sentences, test_size=0.2, random_state=42)

In [10]:
hindi_tokenizer = Tokenizer(oov_token="<OOV>")
hindi_tokenizer.fit_on_texts(hindi_sentences)
hindi_vocab_size = len(hindi_tokenizer.word_index) + 1

In [11]:
telugu_tokenizer = Tokenizer(oov_token="<OOV>")
telugu_tokenizer.fit_on_texts(telugu_sentences)
telugu_vocab_size = len(telugu_tokenizer.word_index) + 1

In [12]:
max_sequence_length = max(len(seq) for seq in hindi_train)
hindi_train_sequences = pad_sequences(hindi_tokenizer.texts_to_sequences(hindi_train), maxlen=max_sequence_length, padding="post")
telugu_train_sequences = pad_sequences(telugu_tokenizer.texts_to_sequences(telugu_train), maxlen=max_sequence_length, padding="post")


In [13]:
def gpu_evaluate_model(model, hindi_sequences, telugu_sequences, target_test_data):
    evaluation = model.evaluate([hindi_sequences, telugu_sequences], target_test_data)
    return evaluation

In [14]:
def create_gpu_model(hindi_vocab_size, telugu_vocab_size, max_sequence_length):
    encoder_inputs = Input(shape=(max_sequence_length,))
    encoder_embedding = Embedding(hindi_vocab_size, 256, input_length=max_sequence_length)(encoder_inputs)
    encoder_lstm = LSTM(256, return_state=True)
    encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
    encoder_states = [state_h, state_c]

    decoder_inputs = Input(shape=(max_sequence_length,))
    decoder_embedding = Embedding(telugu_vocab_size, 256, input_length=max_sequence_length)(decoder_inputs)
    decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
    decoder_dense = Dense(telugu_vocab_size, activation="softmax")
    output = decoder_dense(decoder_outputs)

    model = Model([encoder_inputs, decoder_inputs], output)
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    return model

In [15]:
gpu_model = create_gpu_model(hindi_vocab_size, telugu_vocab_size, max_sequence_length)
epochs = 5

In [17]:
import tensorflow as tf
target_train_data = tf.keras.utils.to_categorical(telugu_train_sequences, num_classes=telugu_vocab_size, dtype='float32')

In [18]:
gpu_model.fit([hindi_train_sequences, telugu_train_sequences], target_train_data, epochs=epochs, batch_size=64, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x2298e2111d0>

In [19]:
hindi_test_sequences = pad_sequences(hindi_tokenizer.texts_to_sequences(hindi_val), maxlen=max_sequence_length, padding="post")
telugu_test_sequences = pad_sequences(telugu_tokenizer.texts_to_sequences(telugu_val), maxlen=max_sequence_length, padding="post")

target_test_data = tf.keras.utils.to_categorical(telugu_test_sequences, num_classes=telugu_vocab_size, dtype='float32')

evaluation = gpu_evaluate_model(gpu_model, hindi_test_sequences, telugu_test_sequences, target_test_data)

print("Test Loss:", evaluation[0])
print("Test Accuracy:", evaluation[1])

Test Loss: 0.25048258900642395
Test Accuracy: 0.9730697870254517


In [20]:
import pickle

gpu_model.save('models/gpu_translation_model.keras')

with open('tokenizers/gpu_hindi_tokenizer.pkl', 'wb') as tokenizer_file:
    pickle.dump(hindi_tokenizer, tokenizer_file)

with open('tokenizers/gpu_telugu_tokenizer.pkl', 'wb') as tokenizer_file:
    pickle.dump(telugu_tokenizer, tokenizer_file)