<a href="https://colab.research.google.com/github/adithyanmk/javascript-code-assist-ai/blob/main/Untitled19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import Callback

# Function to get the last stopped epoch from checkpoint filenames
def get_last_epoch_from_checkpoints(checkpoint_dir):
    checkpoint_files = os.listdir(checkpoint_dir)
    if checkpoint_files:
        epoch_numbers = [int(filename.split('_')[2].split('.')[0]) for filename in checkpoint_files if 'model_checkpoint' in filename]
        last_epoch = max(epoch_numbers)
        return last_epoch
    else:
        return 0

# Function to load the latest model checkpoints
def load_latest_generator_checkpoint(model, checkpoint_dir):
    last_epoch = get_last_epoch_from_checkpoints(checkpoint_dir)
    if last_epoch > 0:
        model.load_weights(os.path.join(checkpoint_dir, f"model_checkpoint_{last_epoch:03d}.h5"))
        print(f"Resuming training from epoch {last_epoch}.")
    return model, last_epoch

# Read text data from a file
with open('javascript_dataset.txt', 'r') as file:
    texts = file.readlines()

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
total_words = len(tokenizer.word_index) + 1

# Create input sequences and labels
input_sequences = []
for text in texts:
    token_list = tokenizer.texts_to_sequences([text])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

max_sequence_length = max([len(x) for x in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')

X = input_sequences[:, :-1]
y = input_sequences[:, -1]

# Build a simple LSTM model
model = keras.Sequential([
    Embedding(total_words, 100, input_length=max_sequence_length-1),
    LSTM(100),
    Dense(total_words, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

# Define a folder for model checkpoints
checkpoint_folder = 'checkpoints'
os.makedirs(checkpoint_folder, exist_ok=True)

# Define a CustomModelCheckpoint callback to save checkpoints with epoch names
class CustomModelCheckpoint(Callback):
    def __init__(self, filepath, save_freq):
        super(CustomModelCheckpoint, self).__init__()
        self.filepath = filepath
        self.save_freq = save_freq

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.save_freq == 0:
            filepath = self.filepath.format(epoch=epoch + 1, **logs)
            self.model.save(filepath)

# Load the latest model checkpoint
model, last_epoch = load_latest_generator_checkpoint(model, checkpoint_folder)

# Define a CustomModelCheckpoint callback to save checkpoints with epoch names
checkpoint_filepath = os.path.join(checkpoint_folder, "model_checkpoint_{epoch:03d}.h5")
custom_checkpoint_callback = CustomModelCheckpoint(
    filepath=checkpoint_filepath,
    save_freq=100,  # Save every 100 epochs
)

# Train the model with the custom checkpoint callback, specifying the initial epoch
model.fit(X, y, epochs=1000, verbose=1, callbacks=[custom_checkpoint_callback], initial_epoch=last_epoch)

# Generate text from a seed text
seed_text = "Hello,"
next_words = 5

for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_length-1, padding='pre')
    predicted_probs = model.predict(token_list, verbose=0)[0]
    predicted_index = np.argmax(predicted_probs)
    output_word = ""
    for word, index in tokenizer.word_index.items():
        if index == predicted_index:
            output_word = word
            break
    seed_text += " " + output_word

print(seed_text)

In [25]:

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Read text data from a file
with open('javascript_dataset.txt', 'r') as file:
    texts = file.readlines()

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
total_words = len(tokenizer.word_index) + 1

# Create input sequences and labels (same code as before)
input_sequences = []
for text in texts:
    token_list = tokenizer.texts_to_sequences([text])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

max_sequence_length = max([len(x) for x in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')

X = input_sequences[:, :-1]
y = input_sequences[:, -1]

# Build a simple LSTM model (same code as before)
model = keras.Sequential([
    Embedding(total_words, 100, input_length=max_sequence_length-1),
    LSTM(100),
    Dense(total_words, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

# Load the saved checkpoint
checkpoint_filepath = 'checkpoints/model_checkpoint_300.h5'
model.load_weights(checkpoint_filepath)

# Generate text from a seed text
seed_text = "make a variable"
next_words = 10

for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_length-1, padding='pre')
    predicted_probs = model.predict(token_list, verbose=0)[0]
    predicted_index = np.argmax(predicted_probs)
    output_word = ""
    for word, index in tokenizer.word_index.items():
        if index == predicted_index:
            output_word = word
            break
    seed_text += " " + output_word

print(seed_text)

make a variable declared without a value will have the value undefined after


In [31]:
!git remote add origin https://github.com/adithyanmk/javascript-code-assist-ai.git

fatal: not a git repository (or any of the parent directories): .git
