# 🚀 LSTM Christian Text Generator - Full Training (40 Epochs)

**Setup:**
1. Runtime → Change runtime type → **T4 GPU**
2. Run all cells
3. ~2-3 hours training

In [None]:
# 1️⃣ Check GPU
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("GPU Available:", tf.config.list_physical_devices('GPU'))
!nvidia-smi

In [None]:
# 2️⃣ Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import os
output_dir = '/content/drive/MyDrive/everyday-christian-model'
os.makedirs(output_dir, exist_ok=True)
print(f"Models will save to: {output_dir}")

In [None]:
# 3️⃣ Download training data from YOUR GitHub
!wget -q https://raw.githubusercontent.com/elev8tion/edc/main/assets/training_data/lstm_training_data.txt -O /content/training_data.txt
!ls -lh /content/training_data.txt
print("✅ Training data downloaded from GitHub")

In [None]:
# 4️⃣ Configuration
import numpy as np
import time

# FULL CAPACITY - NO COMPROMISES
SEQ_LENGTH = 100
BATCH_SIZE = 256
BUFFER_SIZE = 10000
EMBEDDING_DIM = 256
RNN_UNITS = 1024
EPOCHS = 40  # FULL 40 EPOCHS
LEARNING_RATE = 0.001

print("📋 FULL Training Configuration:")
print(f"  Epochs: {EPOCHS}")
print(f"  Batch Size: {BATCH_SIZE}")
print(f"  RNN Units: {RNN_UNITS}")
print(f"  Embedding Dim: {EMBEDDING_DIM}")

In [None]:
# 5️⃣ Prepare data
def prepare_data():
    print("📖 Loading training data...")
    with open('/content/training_data.txt', 'r', encoding='utf-8') as f:
        text = f.read()
    
    print(f"✅ Loaded {len(text):,} characters")
    
    # Create vocabulary
    vocab = sorted(set(text))
    vocab = ['<PAD>', '<START>', '<END>'] + vocab
    
    char2idx = {char: idx for idx, char in enumerate(vocab)}
    idx2char = {idx: char for idx, char in enumerate(vocab)}
    
    print(f"📝 Vocabulary size: {len(vocab)}")
    
    # Save vocabulary
    vocab_path = f'{output_dir}/char_vocab.txt'
    with open(vocab_path, 'w', encoding='utf-8') as f:
        for char in vocab:
            f.write(f"{char}\n")
    
    # Create sequences
    text_as_int = np.array([char2idx.get(c, 0) for c in text])
    char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
    sequences = char_dataset.batch(SEQ_LENGTH + 1, drop_remainder=True)
    
    def split_input_target(chunk):
        input_text = chunk[:-1]
        target_text = chunk[1:]
        return input_text, target_text
    
    dataset = sequences.map(split_input_target)
    dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
    dataset = dataset.cache().prefetch(tf.data.AUTOTUNE)
    
    return dataset, len(vocab), char2idx, idx2char

dataset, vocab_size, char2idx, idx2char = prepare_data()

In [None]:
# 6️⃣ Build PROPER model (NO batch_input_shape error!)
def build_model(vocab_size, embedding_dim, rnn_units):
    model = tf.keras.Sequential([
        # FIXED: No batch_input_shape argument
        tf.keras.layers.Embedding(
            vocab_size,
            embedding_dim
        ),
        tf.keras.layers.LSTM(
            rnn_units,
            return_sequences=True,
            recurrent_initializer='glorot_uniform',
            recurrent_dropout=0.1
        ),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.LSTM(
            rnn_units // 2,
            return_sequences=True,
            recurrent_initializer='glorot_uniform',
            recurrent_dropout=0.1
        ),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(
            vocab_size,
            kernel_regularizer=tf.keras.regularizers.l2(0.01)
        )
    ])
    return model

print("🏗️ Building LSTM model...")
model = build_model(vocab_size, EMBEDDING_DIM, RNN_UNITS)

# Compile
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])

print(model.summary())

In [None]:
# 7️⃣ TRAIN - FULL 40 EPOCHS
print("🎓 Starting FULL training...")
print("⏱️ Estimated time: 2-3 hours")
print("☕ Colab will keep running\n")

# Callbacks
checkpoint_path = f"{output_dir}/checkpoint-{{epoch:02d}}"
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        save_weights_only=True,
        save_freq='epoch',
        verbose=1
    ),
    tf.keras.callbacks.EarlyStopping(
        monitor='loss',
        patience=5,
        min_delta=0.001,
        restore_best_weights=True,
        verbose=1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='loss',
        factor=0.5,
        patience=3,
        min_lr=1e-5,
        verbose=1
    )
]

# TRAIN!
start_time = time.time()
history = model.fit(
    dataset,
    epochs=EPOCHS,
    callbacks=callbacks,
    verbose=1
)

training_time = (time.time() - start_time) / 3600
print(f"\n✅ Training complete in {training_time:.2f} hours!")
print(f"Final Loss: {history.history['loss'][-1]:.4f}")
print(f"Final Accuracy: {history.history['accuracy'][-1]:.4f}")

In [None]:
# 8️⃣ Convert to TFLite for iOS
print("🔄 Converting to TFLite...")

# Save weights
model.save_weights(f'{output_dir}/final_weights.h5')

# Rebuild for inference (batch size 1)
inference_model = build_model(vocab_size, EMBEDDING_DIM, RNN_UNITS)
inference_model.build(tf.TensorShape([1, None]))
inference_model.load_weights(f'{output_dir}/final_weights.h5')

# Save Keras model
keras_path = f'{output_dir}/text_generator.h5'
inference_model.save(keras_path)
print(f"✅ Saved Keras model")

# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(inference_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
# iOS compatible
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS
]
tflite_model = converter.convert()

# Save TFLite
tflite_path = f'{output_dir}/text_generator.tflite'
with open(tflite_path, 'wb') as f:
    f.write(tflite_model)

print(f"✅ Saved TFLite model")
print(f"📊 Model size: {len(tflite_model) / 1024 / 1024:.2f} MB")

In [None]:
# 9️⃣ Test generation
def generate_text(model, start_string, char2idx, idx2char, temperature=0.3):
    num_generate = 200
    input_eval = [char2idx.get(s, 0) for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)
    
    text_generated = []
    
    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()
        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char.get(predicted_id, ''))
    
    return start_string + ''.join(text_generated)

print("🧪 Testing generation...\n")
test_prompts = ["God ", "I feel ", "When I pray "]

for prompt in test_prompts:
    generated = generate_text(inference_model, prompt, char2idx, idx2char)
    print(f"'{prompt}' -> {generated[:150]}...\n")

In [None]:
# 🎉 DOWNLOAD
print("✨ COMPLETE!\n")
print("📦 Models saved to Google Drive:")
print(f"   {tflite_path}")
print(f"   {output_dir}/char_vocab.txt\n")

from google.colab import files
print("🔽 Auto-downloading to your computer...")
files.download(tflite_path)
files.download(f'{output_dir}/char_vocab.txt')

print("\n📱 Copy to Flutter app:")
print("   assets/models/text_generator.tflite")
print("   assets/models/char_vocab.txt")