In [None]:
# --- Imports and Setup ---
# Run this cell first.

import sys
import os
import torch
import yaml
import logging # Import logging module

# Add the project root to the Python path so we can import nano_llm
# This assumes you launched jupyter lab from the project root directory
project_root = os.path.abspath(os.path.join(os.getcwd()))
if project_root not in sys.path:
    sys.path.append(project_root)
    print(f"Added project root to sys.path: {project_root}")
else:
    print("Project root already in sys.path.")


# Setup basic logging within the notebook
# Prevent adding multiple handlers if cell is run multiple times
if not logging.getLogger().handlers:
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        stream=sys.stdout # Log to console (notebook output)
    )
    logging.info("Logging configured within notebook.")
else:
    logging.info("Logging already configured.")


# Now you can import your project modules
try:
    from nano_llm.models import SimpleTransformerDecoder
    from nano_llm.data_processing import CharTokenizer # Or your new SubwordTokenizer
    from nano_llm.utils import load_checkpoint # Assuming load_checkpoint is in utils.py
    import torch.nn.functional as F # For softmax etc if needed manually later
    print("Successfully imported project modules.")
except ImportError as e:
    print(f"Error importing project modules: {e}")
    print("Please ensure you launched jupyter lab from the project root directory")
    print(f"Current working directory: {os.getcwd()}")
    print(f"Sys path: {sys.path}")
    # You might need to adjust the sys.path.append line above if your structure is different

Added project root to sys.path: C:\Users\viksh\nanollm\src\scripts
2025-05-28 15:35:06,405 - INFO - Logging configured within notebook.
Error importing project modules: No module named 'nano_llm'
Please ensure you launched jupyter lab from the project root directory
Current working directory: C:\Users\viksh\nanollm\src\scripts
Sys path: ['C:\\Users\\viksh\\AppData\\Local\\Programs\\Python\\Python39\\python39.zip', 'C:\\Users\\viksh\\AppData\\Local\\Programs\\Python\\Python39\\DLLs', 'C:\\Users\\viksh\\AppData\\Local\\Programs\\Python\\Python39\\lib', 'C:\\Users\\viksh\\AppData\\Local\\Programs\\Python\\Python39', 'C:\\Users\\viksh\\nanollm\\.venv', '', 'C:\\Users\\viksh\\nanollm\\.venv\\lib\\site-packages', 'C:\\Users\\viksh\\nanollm\\.venv\\lib\\site-packages\\win32', 'C:\\Users\\viksh\\nanollm\\.venv\\lib\\site-packages\\win32\\lib', 'C:\\Users\\viksh\\nanollm\\.venv\\lib\\site-packages\\Pythonwin', 'C:\\Users\\viksh\\nanollm\\src\\scripts']


In [4]:
# Load Configurations
#Load the necessary YAML configuration files for the model, data, and inference settings.
#Ensure these file paths are correct relative to where you are running the notebook from (which should be the project root if you followed the setup instructions).

In [5]:
# --- Load Configurations ---
# Run this cell.

try:
    with open('config/model_config.yaml', 'r') as f:
        model_config = yaml.safe_load(f)
        logging.info("Loaded model_config.yaml")

    with open('config/data_config.yaml', 'r') as f:
        data_config = yaml.safe_load(f)
        logging.info("Loaded data_config.yaml")

    with open('config/inference_config.yaml', 'r') as f:
        inference_config = yaml.safe_load(f)
        logging.info("Loaded inference_config.yaml")

except FileNotFoundError as e:
    logging.error(f"Configuration file not found: {e}")
    # You might want to stop here if configs are essential
except yaml.YAMLError as e:
    logging.error(f"Error parsing configuration file: {e}")
    # You might want to stop here

print("Configuration loading complete.")

2025-05-28 15:35:56,269 - ERROR - Configuration file not found: [Errno 2] No such file or directory: 'config/model_config.yaml'
Configuration loading complete.


In [6]:
# Determine Device
#Set the device (CPU or GPU) to use for the model.

In [8]:
# --- Determine Device ---
# Run this cell.

# Prioritize CUDA if available, otherwise use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Optionally, you can try to get device from inference_config, fallback if not available
# if 'device' in inference_config and inference_config['device'].lower() != 'auto':
#     requested_device = inference_config['device'].lower()
#     if requested_device == 'cuda' and not torch.cuda.is_available():
#          logging.warning("CUDA device requested but not available. Falling back to CPU.")
#          device = torch.device('cpu')
#     elif requested_device in ['cuda', 'cpu']:
#          device = torch.device(requested_device)
#     else:
#          logging.warning(f"Unknown device '{requested_device}' specified in inference_config. Using auto-detected.")

logging.info(f"Using device: {device}")

2025-05-28 15:36:32,613 - INFO - Using device: cpu


In [9]:
# Load Tokenizer
#Load the vocabulary and initialize the tokenizer. This is needed to encode your prompt and decode the generated output.
#Make sure the `vocab.json` file exists in your processed data directory after running preprocessing.

In [12]:
# --- Load Tokenizer ---
# Run this cell.
processed_dir_relative = data_config.get('processed_data_dir', 'data/processed')
# Construct the absolute path to the processed data directory from the project root
processed_dir_abs = os.path.join(project_root, processed_dir_relative)
vocab_path = os.path.join(processed_dir_abs, 'vocab.json')

try:
    # Assuming CharTokenizer has a class method load_vocab
    tokenizer = CharTokenizer.load_vocab(vocab_path) # <-- Use your actual Tokenizer class/method
    logging.info(f"Tokenizer loaded from {vocab_path}. Vocab size: {tokenizer.vocab_size}")
except FileNotFoundError:
    logging.error(f"Vocabulary file not found at {vocab_path}. Ensure preprocess_data.py has been run.")
    # You cannot proceed without the tokenizer
    # You might want to manually stop execution here or handle it.
    tokenizer = None # Set tokenizer to None if load fails
except Exception as e:
    logging.error(f"Error loading tokenizer vocabulary from {vocab_path}: {e}")
    tokenizer = None # Set tokenizer to None if load fails


if tokenizer is not None:
    # You can test encoding/decoding here if the tokenizer loaded successfully
    test_text = "hello world"
    test_ids = tokenizer.encode(test_text)
    print(f"Test encode '{test_text}': {test_ids}")
    print(f"Test decode {test_ids}: '{tokenizer.decode(test_ids)}'")

AttributeError: 'str' object has no attribute 'get'

In [13]:
# Run Inference (Text Generation)
#Use the loaded model to generate text based on a starting prompt.
#Modify the `prompt_text`, `max_new_tokens`, `temperature`, and `top_k` parameters below to experiment with generation.

In [None]:
# --- Run Inference ---
# Run this cell.

if model is None:
    print("Cannot run inference because the model was not loaded successfully.")
else:
    # Define your prompt and generation parameters
    # Get defaults from inference_config
    prompt_text = "The quick brown fox" # <-- CHANGE THIS PROMPT
    max_new_tokens = inference_config.get('max_new_tokens', 100)
    temperature = inference_config.get('temperature', 0.8)
    top_k = inference_config.get('top_k') # None by default

    logging.info(f"Prompt: '{prompt_text}'")
    logging.info(f"Generation parameters: max_new_tokens={max_new_tokens}, temperature={temperature}, top_k={top_k}")


    # Encode the prompt
    prompt_token_ids = tokenizer.encode(prompt_text)

    if not prompt_token_ids:
        logging.warning("Prompt resulted in empty token list. Cannot generate.")
    else:
        # Convert to tensor and move to device
        prompt_tensor = torch.tensor([prompt_token_ids], dtype=torch.long, device=device) # Shape (1, prompt_length)
        logging.info(f"Prompt tensor shape: {prompt_tensor.shape}")

        # Disable gradient calculations during generation
        with torch.no_grad():
            try:
                # Generate text using the model's generate method
                generated_tokens = model.generate(
                    prompt_tensor,
                    max_new_tokens=max_new_tokens,
                    temperature=temperature,
                    top_k=top_k
                )

                # Decode the generated tokens
                # generated_tokens shape: (batch_size, prompt_length + max_new_tokens)
                # Since we use batch_size=1, get the single sequence [0] and convert to list
                generated_ids_list = generated_tokens[0].tolist()

                # Decode the list of token IDs back into a string
                generated_text = tokenizer.decode(generated_ids_list)

                print("\n" + "=" * 40)
                print("Generated Text:")
                print(generated_text)
                print("=" * 40 + "\n")

            except Exception as e:
                logging.error(f"Error during text generation: {e}")
                # Optional: Decode and print partial generation up to the error point
                # print("Partial generated sequence (before error):", tokenizer.decode(generated_tokens[0].tolist()))