# MICRO-ASXR MX2LM MICRONAUT AI - Colab Training

Train ASX language models on Google Colab with GPU support.

## Features
- GPU-accelerated training
- N-gram and Transformer models
- Export models for deployment
- Integration with ASXR Runtime Server

## Setup

Install dependencies and clone the repository.

In [None]:
!pip install torch transformers datasets accelerate tqdm sentencepiece -q
print("✓ Dependencies installed")

In [None]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

In [None]:
# Clone repository (optional - if working with remote repo)
# !git clone https://github.com/cannaseedus-bot/MICRO-ASXR.git
# %cd MICRO-ASXR

# Or upload your files
from google.colab import files
import os

# Create directories
!mkdir -p brain models datasets
print("✓ Directories created")

## Upload Training Scripts

Upload the ASX training scripts from your repository.

In [None]:
%%writefile asx_train.py
#!/usr/bin/env python3
"""
ASX Training Script for Colab
"""

import json
import sys
import os
import argparse

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Config, GPT2LMHeadModel, GPT2Tokenizer
from tqdm import tqdm

class ASXDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length=512):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        encoding = self.tokenizer(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze()
        }

def train_transformer(dataset_path, output_dir='models/asx-gpt2', epochs=3, batch_size=4):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    with open(dataset_path, 'r', encoding='utf-8') as f:
        texts = [line.strip() for line in f if line.strip()]

    print(f"Loaded {len(texts)} examples")

    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    tokenizer.pad_token = tokenizer.eos_token

    config = GPT2Config(
        vocab_size=tokenizer.vocab_size,
        n_positions=512,
        n_embd=256,
        n_layer=4,
        n_head=4
    )

    model = GPT2LMHeadModel(config)
    model.to(device)

    dataset = ASXDataset(texts, tokenizer)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)

    model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")

        for batch in progress_bar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=input_ids
            )

            loss = outputs.loss
            epoch_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            progress_bar.set_postfix({'loss': loss.item()})

        print(f"Epoch {epoch+1} - Avg Loss: {epoch_loss / len(dataloader):.4f}")

    os.makedirs(output_dir, exist_ok=True)
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    print(f"✓ Model saved to {output_dir}")

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('dataset', type=str)
    parser.add_argument('--output', type=str, default='models/asx-gpt2')
    parser.add_argument('--epochs', type=int, default=3)
    parser.add_argument('--batch-size', type=int, default=4)
    args = parser.parse_args()

    train_transformer(args.dataset, args.output, args.epochs, args.batch_size)

## Prepare Dataset

Create or upload your training dataset.

In [None]:
# Option 1: Create sample dataset
sample_data = """
The quick brown fox jumps over the lazy dog
Machine learning is transforming artificial intelligence
Natural language processing enables computers to understand text
Deep learning models learn from large amounts of data
Neural networks are inspired by the human brain
Training AI models requires significant computational resources
GPUs accelerate machine learning training processes
Transfer learning allows models to leverage pre-trained knowledge
Fine-tuning adapts models to specific tasks and domains
Language models predict the next word in a sequence
""".strip()

with open('datasets/sample.txt', 'w') as f:
    f.write(sample_data)

print("✓ Sample dataset created")
print(f"Lines: {len(sample_data.split(chr(10)))}")

In [None]:
# Option 2: Upload your own dataset
# uploaded = files.upload()
# for filename in uploaded.keys():
#     !mv {filename} datasets/

## Train N-gram Model

Train a simple n-gram model (CPU-friendly, no GPU needed).

In [None]:
import json
from collections import defaultdict

def train_ngram(dataset_path, output_dir='brain'):
    with open(dataset_path, 'r') as f:
        texts = f.readlines()

    bigrams = defaultdict(lambda: defaultdict(int))
    trigrams = defaultdict(lambda: defaultdict(int))

    for text in texts:
        tokens = text.lower().strip().split()

        for i in range(len(tokens) - 1):
            bigrams[tokens[i]][tokens[i + 1]] += 1

        for i in range(len(tokens) - 2):
            key = f"{tokens[i]} {tokens[i + 1]}"
            trigrams[key][tokens[i + 2]] += 1

    os.makedirs(output_dir, exist_ok=True)

    with open(f'{output_dir}/bigrams.json', 'w') as f:
        json.dump(dict(bigrams), f, indent=2)

    with open(f'{output_dir}/trigrams.json', 'w') as f:
        json.dump(dict(trigrams), f, indent=2)

    print(f"✓ N-gram model trained")
    print(f"  Bigrams: {len(bigrams)} keys")
    print(f"  Trigrams: {len(trigrams)} keys")

train_ngram('datasets/sample.txt')

## Train Transformer Model (GPU)

Train a small GPT-2 style transformer model using GPU.

In [None]:
!python asx_train.py datasets/sample.txt --output models/asx-gpt2 --epochs 5 --batch-size 2

## Test Inference

Test the trained models with predictions.

In [None]:
# Test N-gram
def predict_ngram(text, max_results=5):
    with open('brain/bigrams.json', 'r') as f:
        bigrams = json.load(f)
    with open('brain/trigrams.json', 'r') as f:
        trigrams = json.load(f)

    tokens = text.lower().split()
    predictions = []

    if len(tokens) >= 2:
        key = f"{tokens[-2]} {tokens[-1]}"
        if key in trigrams:
            predictions = sorted(trigrams[key].items(), key=lambda x: x[1], reverse=True)[:max_results]

    if not predictions and len(tokens) >= 1:
        key = tokens[-1]
        if key in bigrams:
            predictions = sorted(bigrams[key].items(), key=lambda x: x[1], reverse=True)[:max_results]

    return predictions

test_text = "machine learning"
print(f"Input: '{test_text}'")
print(f"Predictions: {predict_ngram(test_text)}")

In [None]:
# Test Transformer
from transformers import GPT2LMHeadModel, GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained('models/asx-gpt2')
model = GPT2LMHeadModel.from_pretrained('models/asx-gpt2')
model.eval()
model.to('cuda' if torch.cuda.is_available() else 'cpu')

def generate_text(prompt, max_length=30):
    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(model.device)
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            num_return_sequences=1,
            temperature=0.8,
            do_sample=True
        )
    return tokenizer.decode(output[0], skip_special_tokens=True)

prompt = "Machine learning"
print(f"Prompt: {prompt}")
print(f"Generated: {generate_text(prompt)}")

## Export Models

Download trained models for deployment.

In [None]:
# Zip brain directory
!zip -r brain.zip brain/
print("✓ Brain zipped")

# Zip transformer model
!zip -r asx-gpt2.zip models/asx-gpt2/
print("✓ Transformer model zipped")

# Download
files.download('brain.zip')
files.download('asx-gpt2.zip')

## Deploy to ASXR Server

Instructions for deploying models to your ASXR runtime server.

```bash
# On your server:

# 1. Extract brain models
unzip brain.zip -d /path/to/MICRO-ASXR/

# 2. Extract transformer models
unzip asx-gpt2.zip -d /path/to/MICRO-ASXR/

# 3. Start ASXR server
npx @asxr/runtime-server start

# 4. Test API
curl -X POST http://localhost:3000/api/predict \
  -H "Content-Type: application/json" \
  -d '{"text": "machine learning"}'
```