<a href="https://colab.research.google.com/github/drzo/ArchiveBox/blob/main/Tiny_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn

# Define a tiny transformer-like model with around 1000 parameters
class TinyTransformer(nn.Module):
    def __init__(self, vocab_size=10, hidden_size=5, num_attention_heads=1, num_hidden_layers=1, intermediate_size=5):
        super(TinyTransformer, self).__init__()
        # Embedding layer
        self.embeddings = nn.Embedding(vocab_size, hidden_size)

        # Transformer encoder layer
        encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=num_attention_heads, dim_feedforward=intermediate_size)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_hidden_layers)

        # Output layer (simple linear classifier for vocab prediction)
        self.fc_out = nn.Linear(hidden_size, vocab_size)

    def forward(self, input_ids):
        # Embed the input tokens
        embeddings = self.embeddings(input_ids)

        # Pass through the transformer encoder
        encoder_output = self.encoder(embeddings)

        # Generate predictions
        logits = self.fc_out(encoder_output)
        return logits

# Initialize the tiny transformer model
tiny_model = TinyTransformer(vocab_size=10, hidden_size=5, num_attention_heads=1, num_hidden_layers=1, intermediate_size=5)

# Check the number of parameters
param_count = sum(p.numel() for p in tiny_model.parameters())
param_count  # Show the number of parameters






310

In [2]:
import torch
import torch.nn as nn
from transformers import PreTrainedModel, PretrainedConfig

# Define a tiny model config for ~1000 parameters using a small transformer
class TinyConfig(PretrainedConfig):
    def __init__(self, vocab_size=10, hidden_size=5, num_attention_heads=1, num_hidden_layers=1, intermediate_size=5, max_position_embeddings=10, **kwargs):
        super().__init__(**kwargs)
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.num_attention_heads = num_attention_heads
        self.num_hidden_layers = num_hidden_layers
        self.intermediate_size = intermediate_size
        self.max_position_embeddings = max_position_embeddings

class TinyModel(PreTrainedModel):
    config_class = TinyConfig

    def __init__(self, config):
        super().__init__(config)
        self.embeddings = nn.Embedding(config.vocab_size, config.hidden_size)
        encoder_layer = nn.TransformerEncoderLayer(d_model=config.hidden_size, nhead=config.num_attention_heads, dim_feedforward=config.intermediate_size)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=config.num_hidden_layers)
        self.fc_out = nn.Linear(config.hidden_size, config.vocab_size)

    def forward(self, input_ids):
        embeddings = self.embeddings(input_ids)
        encoder_output = self.encoder(embeddings)
        logits = self.fc_out(encoder_output)
        return logits

# Create a tiny model config with only ~1000 parameters
config = TinyConfig(vocab_size=10, hidden_size=5, num_attention_heads=1, num_hidden_layers=1, intermediate_size=5)
tiny_model = TinyModel(config)

# Check the number of parameters
param_count = sum(p.numel() for p in tiny_model.parameters())
param_count




310

In [5]:
import torch
import torch.nn as nn

# Define a tiny transformer-like model with ~1000 parameters
class TinyTransformer(nn.Module):
    def __init__(self, vocab_size=10, hidden_size=5, num_attention_heads=1, num_hidden_layers=1, intermediate_size=5):
        super(TinyTransformer, self).__init__()
        # Embedding layer
        self.embeddings = nn.Embedding(vocab_size, hidden_size)

        # Transformer encoder layer with batch_first=True for better performance
        encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=num_attention_heads, dim_feedforward=intermediate_size, batch_first=True)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_hidden_layers)

        # Output layer (simple linear classifier for vocab prediction)
        self.fc_out = nn.Linear(hidden_size, vocab_size)

    def forward(self, input_ids):
        # Embed the input tokens
        embeddings = self.embeddings(input_ids)

        # Pass through the transformer encoder
        encoder_output = self.encoder(embeddings)

        # Generate predictions
        logits = self.fc_out(encoder_output)
        return logits

# Initialize the tiny transformer model
tiny_model = TinyTransformer(vocab_size=10, hidden_size=5, num_attention_heads=1, num_hidden_layers=1, intermediate_size=5)

# Check the number of parameters
param_count = sum(p.numel() for p in tiny_model.parameters())
print(f"Tiny model parameter count: {param_count}")

# Test with a random input
input_ids = torch.randint(0, 10, (2, 5))  # Example input (batch_size=2, seq_len=5)
logits = tiny_model(input_ids)

print(f"Logits output: {logits}")

# Save the model to a file
torch.save(tiny_model.state_dict(), "tiny_transformer.pth")

# Download the saved model file
import shutil
shutil.move("tiny_transformer.pth", "/mnt/data/tiny_transformer.pth")


Tiny model parameter count: 310
Logits output: tensor([[[ 0.5601, -0.2599, -1.2039, -1.3598, -0.2848,  0.7035, -0.2403,
          -0.0189,  0.0961, -0.1059],
         [ 0.4575, -1.0482,  0.2084,  0.3093, -0.3500, -1.0421,  0.5726,
           0.8388,  1.3241,  0.1062],
         [ 0.5679,  0.1536, -0.6225, -0.7665,  0.4661,  0.3574, -0.0095,
          -0.1015, -1.0091,  0.4691],
         [ 0.3031, -1.0941,  0.7482,  0.5749, -0.1606, -1.7083,  0.9077,
           1.0214,  0.7131,  0.3250],
         [ 1.6425, -0.9661, -0.0105, -1.3926,  0.4635,  0.0649, -0.3751,
           0.7639, -1.0450,  0.3279]],

        [[-1.1167,  0.8018, -0.8326,  1.0902, -0.2196, -0.3823,  1.0223,
          -0.5940,  1.0485,  0.3720],
         [-0.4901,  0.0317, -0.1806, -0.1316, -0.0762, -0.9531,  0.8817,
           0.0846, -0.6760,  0.3716],
         [ 0.3822, -0.9963,  0.0968,  0.2390, -0.4120, -0.9724,  0.5654,
           0.7755,  1.3448,  0.0628],
         [ 1.7766, -1.6772,  0.8605, -0.6716,  0.3457, -0.9742,

FileNotFoundError: [Errno 2] No such file or directory: '/mnt/data/tiny_transformer.pth'

In [6]:
# Download the saved model file
import shutil
!mkdir /mnt/data # Create the directory
shutil.move("tiny_transformer.pth", "/mnt/data/tiny_transformer.pth")

'/mnt/data/tiny_transformer.pth'

In [7]:
# Export the model to ONNX format
dummy_input = torch.randint(0, 10, (2, 5))  # Example input (batch_size=2, seq_len=5)
torch.onnx.export(tiny_model, dummy_input, "tiny_transformer.onnx", input_names=['input'], output_names=['output'])

# Download the ONNX model
shutil.move("tiny_transformer.onnx", "/mnt/data/tiny_transformer.onnx")


OnnxExporterError: Module onnx is not installed!

In [8]:
!pip install onnx # Install the missing onnx module
# Export the model to ONNX format
dummy_input = torch.randint(0, 10, (2, 5))  # Example input (batch_size=2, seq_len=5)
torch.onnx.export(tiny_model, dummy_input, "tiny_transformer.onnx", input_names=['input'], output_names=['output'])

# Download the ONNX model
shutil.move("tiny_transformer.onnx", "/mnt/data/tiny_transformer.onnx")

Collecting onnx
  Downloading onnx-1.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Downloading onnx-1.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx
Successfully installed onnx-1.16.2


'/mnt/data/tiny_transformer.onnx'

In [9]:
import onnx
from onnx import numpy_helper
import toml

# Load the ONNX model
model = onnx.load('tiny_transformer.onnx')
graph = model.graph

# Extract initializers (weights)
initializers = {initializer.name: numpy_helper.to_array(initializer).tolist() for initializer in graph.initializer}

# Example GGUF-like TOML structure
gguf_data = {
    "nodes": {
        "embedding": {
            "type": "embedding",
            "input_dim": 10,
            "output_dim": 5,
            "weights": initializers.get('embedding_weights', [])
        },
        "attention": {
            "type": "self_attention",
            "input_dim": 5,
            "num_heads": 1,
            "weights": initializers.get('attention_weights', [])
        },
        "output": {
            "type": "linear",
            "input_dim": 5,
            "output_dim": 10,
            "weights": initializers.get('output_weights', [])
        }
    },
    "edges": {
        "embedding_to_attention": {"from": "embedding", "to": "attention"},
        "attention_to_output": {"from": "attention", "to": "output"}
    }
}

# Save the GGUF schema to a TOML file (simulating GGUF)
with open("tiny_model_gguf_test.toml", "w") as f:
    toml.dump(gguf_data, f)

# Provide a download link
import shutil
shutil.move("tiny_model_gguf_test.toml", "/mnt/data/tiny_model_gguf_test.toml")


FileNotFoundError: [Errno 2] No such file or directory: 'tiny_transformer.onnx'

In [10]:
import onnx
from onnx import numpy_helper
import toml
import shutil

# Export the model to ONNX format (Ensure this step is done before loading)
dummy_input = torch.randint(0, 10, (2, 5))  # Example input (batch_size=2, seq_len=5)
# Assuming tiny_model is defined and available in the current scope
torch.onnx.export(tiny_model, dummy_input, "tiny_transformer.onnx", input_names=['input'], output_names=['output'])

# Load the ONNX model
model = onnx.load('tiny_transformer.onnx')
graph = model.graph

# ... rest of your code ...

# Move the toml file after it has been generated
shutil.move("tiny_model_gguf_test.toml", "/mnt/data/tiny_model_gguf_test.toml")

FileNotFoundError: [Errno 2] No such file or directory: 'tiny_model_gguf_test.toml'

In [11]:
import onnx
from onnx import numpy_helper
import toml
import shutil

# Export the model to ONNX format (Ensure this step is done before loading)
dummy_input = torch.randint(0, 10, (2, 5))  # Example input (batch_size=2, seq_len=5)
# Assuming tiny_model is defined and available in the current scope
torch.onnx.export(tiny_model, dummy_input, "tiny_transformer.onnx", input_names=['input'], output_names=['output'])

# Load the ONNX model
model = onnx.load('tiny_transformer.onnx')
graph = model.graph

# ... rest of your code ...

# Save the GGUF schema to a TOML file (simulating GGUF)
with open("tiny_model_gguf_test.toml", "w") as f:
    toml.dump(gguf_data, f)

# Move the toml file after it has been generated
shutil.move("tiny_model_gguf_test.toml", "/mnt/data/tiny_model_gguf_test.toml")

NameError: name 'gguf_data' is not defined

In [None]:
import onnx
from onnx import numpy_helper
import toml
import shutil

# Export the model to ONNX format (Ensure this step is done before loading)
dummy_input = torch.randint(0, 10, (2, 5))  # Example input (batch_size=2, seq_len=5)
# Assuming tiny_model is defined and available in the current scope
torch.onnx.export(tiny_model, dummy_input, "tiny_transformer.onnx", input_names=['input'], output_names=['output'])

# Load the ONNX model
model = onnx.load('tiny_transformer.onnx')
graph = model.graph

# Extract initializers (weights)
initializers = {initializer.name: numpy_helper.to_array(initializer).tolist() for initializer in graph.initializer}

# Example GGUF-like TOML structure
gguf_data = { # This code was missing
    "nodes": {
        "embedding": {
            "type": "embedding",
            "input_dim": 10,
            "output_dim": 5,
            "weights": initializers.get('embedding_weights', [])
        },
        "attention": {
            "type": "self_attention",
            "input_dim": 5,
            "num_heads": 1,
            "weights": initializers.get('attention_weights', [])
        },
        "output": {
            "type": "linear",
            "input_dim": 5,
            "output_dim": 10,
            "weights": initializers.get('output_weights', [])
        }
    },
    "edges": {
        "embedding_to_attention": {"from": "embedding", "

In [12]:
import onnx
from onnx import numpy_helper
import toml
import shutil

# Export the model to ONNX format (Ensure this step is done before loading)
dummy_input = torch.randint(0, 10, (2, 5))  # Example input (batch_size=2, seq_len=5)
# Assuming tiny_model is defined and available in the current scope
torch.onnx.export(tiny_model, dummy_input, "tiny_transformer.onnx", input_names=['input'], output_names=['output'])

# Load the ONNX model
model = onnx.load('tiny_transformer.onnx')
graph = model.graph

# Extract initializers (weights)
initializers = {initializer.name: numpy_helper.to_array(initializer).tolist() for initializer in graph.initializer}

# Example GGUF-like TOML structure
gguf_data = { # This code was missing
    "nodes": {
        "embedding": {
            "type": "embedding",
            "input_dim": 10,
            "output_dim": 5,
            "weights": initializers.get('embedding_weights', [])
        },
        "attention": {
            "type": "self_attention",
            "input_dim": 5,
            "num_heads": 1,
            "weights": initializers.get('attention_weights', [])
        },
        "output": {
            "type": "linear",
            "input_dim": 5,
            "output_dim": 10,
            "weights": initializers.get('output_weights', [])
        }
    },
    "edges": {
        "embedding_to_attention": {"from": "embedding", "to": "attention"},
        "attention_to_output": {"from": "attention", "to": "output"}
    }
}

# Save the GGUF schema to a TOML file (simulating GGUF)
with open("tiny_model_gguf_test.toml", "w") as f:
    toml.dump(gguf_data, f)

# Move the toml file after it has been generated
shutil.move("tiny_model_gguf_test.toml", "/mnt/data/tiny_model_gguf_test.toml")

'/mnt/data/tiny_model_gguf_test.toml'

In [13]:
import torch
import toml

# Load the saved PyTorch model
tiny_model = TinyTransformer(vocab_size=10, hidden_size=5, num_attention_heads=1, num_hidden_layers=1, intermediate_size=5)
tiny_model.load_state_dict(torch.load('tiny_transformer.pth'))

# Extract the weights
embedding_weights = tiny_model.embeddings.weight.detach().numpy().tolist()
attention_weights = tiny_model.encoder.layers[0].self_attn.in_proj_weight.detach().numpy().tolist()  # Query, Key, Value projection weights
output_weights = tiny_model.fc_out.weight.detach().numpy().tolist()

# Update the GGUF-like structure with the extracted weights
gguf_data = {
    "nodes": {
        "embedding": {
            "type": "embedding",
            "input_dim": 10,
            "output_dim": 5,
            "weights": embedding_weights
        },
        "attention": {
            "type": "self_attention",
            "input_dim": 5,
            "num_heads": 1,
            "weights": attention_weights
        },
        "output": {
            "type": "linear",
            "input_dim": 5,
            "output_dim": 10,
            "weights": output_weights
        }
    },
    "edges": {
        "embedding_to_attention": {"from": "embedding", "to": "attention"},
        "attention_to_output": {"from": "attention", "to": "output"}
    }
}

# Save the updated GGUF-like TOML structure with weights
with open("tiny_model_gguf_with_weights.toml", "w") as f:
    toml.dump(gguf_data, f)

# Move the updated TOML file for download
import shutil
shutil.move("tiny_model_gguf_with_weights.toml", "/mnt/data/tiny_model_gguf_with_weights.toml")


  tiny_model.load_state_dict(torch.load('tiny_transformer.pth'))


FileNotFoundError: [Errno 2] No such file or directory: 'tiny_transformer.pth'

In [15]:
import torch
import toml

# Load the saved PyTorch model
tiny_model = TinyTransformer(vocab_size=10, hidden_size=5, num_attention_heads=1, num_hidden_layers=1, intermediate_size=5)
# Check if the file exists. If not, provide instructions or handle the exception
try:
    tiny_model.load_state_dict(torch.load('/mnt/data/tiny_transformer.pth'))
except FileNotFoundError:
    print("Error: 'tiny_transformer.pth' not found. Make sure the file exists in the correct directory or provide the correct path.")
    # You can add more comprehensive error handling or instructions here.

# Extract the weights
embedding_weights = tiny_model.embeddings.weight.detach().numpy().tolist()
attention_weights = tiny_model.encoder.layers[0].self_attn.in_proj_weight.detach().numpy().tolist()  # Query, Key, Value projection weights
output_weights = tiny_model.fc_out.weight.detach().numpy().tolist()

# Update the GGUF-like structure with the extracted weights
gguf_data = {
    "nodes": {
        "embedding": {
            "type": "embedding",
            "input_dim": 10,
            "output_dim": 5,
            "weights": embedding_weights
        },
        "attention": {
            "type": "self_attention",
            "input_dim": 5,
            "num_heads": 1,
            "weights": attention_weights
        },
        "output": {
            "type": "linear",
            "input_dim": 5,
            "output_dim": 10,
            "weights": output_weights
        }
    },
    "edges": {
        "embedding_to_attention": {"from": "embedding", "to": "attention"},
        "attention_to_output": {"from": "attention", "to": "output"}
    }
}

# Save the updated GGUF-like TOML structure with weights
with open("tiny_model_gguf_with_weights.toml", "w") as f:
    toml.dump(gguf_data, f)

# Move the updated TOML file for download
import shutil
shutil.move("tiny_model_gguf_with_weights.toml", "/mnt/data/tiny_model_gguf_with_weights.toml")

  tiny_model.load_state_dict(torch.load('/mnt/data/tiny_transformer.pth'))


'/mnt/data/tiny_model_gguf_with_weights.toml'

In [16]:
import json

# GGUF-like JSON structure
gguf_data = {
    "nodes": {
        "embedding": {
            "type": "embedding",
            "input_dim": 10,
            "output_dim": 5,
            "weights": embedding_weights
        },
        "attention": {
            "type": "self_attention",
            "input_dim": 5,
            "num_heads": 1,
            "weights": attention_weights
        },
        "output": {
            "type": "linear",
            "input_dim": 5,
            "output_dim": 10,
            "weights": output_weights
        }
    },
    "edges": {
        "embedding_to_attention": {"from": "embedding", "to": "attention"},
        "attention_to_output": {"from": "attention", "to": "output"}
    }
}

# Save the JSON file to a GGUF-like format for testing
with open("/mnt/data/tiny_model_gguf.json", "w") as f:
    json.dump(gguf_data, f)

# Output the confirmation of file creation
"GGUF-like JSON model saved."


'GGUF-like JSON model saved.'

In [17]:
import json
import struct

# Define the metadata for the tiny model
tiny_model_metadata = {
    'Model_Architecture': 'TinyTransformer',
    'Context_Length': 5,  # Sequence length is 5
    'Embedding_Length': 5,  # Embedding dimension is 5
    'Block_Count': 1,  # One transformer block
    'Feed_Forward_Layer_Size': 5,  # Feed-forward layer size
    'RoPE_Dimension_Count': 5,  # RoPE dimension count
    'Attention_Head_Count': 1,  # One attention head
    'Layer_Norm_Epsilon': 1e-5,  # Epsilon for layer norm
    'RoPE_Frequency_Base': 10000,  # RoPE frequency base
    'File_Type': 2  # Custom file type
}

# Step 1: Vocabulary section (token-to-ID mapping)
vocab = {
    "vocab": {str(i): f"token_{i}" for i in range(10)}
}

# Step 2: Extract model weights from the tiny model
tiny_model = TinyTransformer(vocab_size=10, hidden_size=5, num_attention_heads=1, num_hidden_layers=1, intermediate_size=5)
tiny_model.load_state_dict(torch.load('/mnt/data/tiny_transformer.pth'))

embedding_weights = tiny_model.embeddings.weight.detach().numpy().tolist()
attention_weights = tiny_model.encoder.layers[0].self_attn.in_proj_weight.detach().numpy().tolist()
feedforward_weights = tiny_model.encoder.layers[0].linear1.weight.detach().numpy().tolist()
output_weights = tiny_model.fc_out.weight.detach().numpy().tolist()

# Step 3: Serialize weights into a binary format (mimicking GGUF large model storage)
with open("/mnt/data/tiny_model_weights.bin", "wb") as f:
    # Write embedding weights
    for weight in embedding_weights:
        f.write(struct.pack('f'*len(weight), *weight))
    # Write attention weights
    for weight in attention_weights:
        f.write(struct.pack('f'*len(weight), *weight))
    # Write feed-forward weights
    for weight in feedforward_weights:
        f.write(struct.pack('f'*len(weight), *weight))
    # Write output layer weights
    for weight in output_weights:
        f.write(struct.pack('f'*len(weight), *weight))

# Step 4: Define GGUF-like structure including references to the binary file
gguf_data = {
    "metadata": tiny_model_metadata,
    "vocab": vocab,
    "nodes": {
        "embedding": {
            "type": "embedding",
            "input_dim": 10,
            "output_dim": 5,
            "weights": "binary: tiny_model_weights.bin"
        },
        "attention": {
            "type": "self_attention",
            "input_dim": 5,
            "num_heads": 1,
            "weights": "binary: tiny_model_weights.bin"
        },
        "feedforward": {
            "type": "feedforward",
            "input_dim": 5,
            "output_dim": 5,
            "weights": "binary: tiny_model_weights.bin"
        },
        "output": {
            "type": "linear",
            "input_dim": 5,
            "output_dim": 10,
            "weights": "binary: tiny_model_weights.bin"
        }
    },
    "edges": {
        "embedding_to_attention": {"from": "embedding", "to": "attention"},
        "attention_to_feedforward": {"from": "attention", "to": "feedforward"},
        "feedforward_to_output": {"from": "feedforward", "to": "output"}
    }
}

# Step 5: Save the GGUF-like JSON structure
with open("/mnt/data/tiny_model_gguf_full.json", "w") as f:
    json.dump(gguf_data, f)


  tiny_model.load_state_dict(torch.load('/mnt/data/tiny_transformer.pth'))


In [18]:
import struct

# Step 1: Prepare Metadata, Vocabulary, and Weights
metadata = {
    'Model_Architecture': 'TinyTransformer',
    'Context_Length': 5,  # Sequence length is 5
    'Embedding_Length': 5,  # Embedding dimension is 5
    'Block_Count': 1,  # One transformer block
    'Feed_Forward_Layer_Size': 5,  # Feed-forward layer size
    'RoPE_Dimension_Count': 5,  # RoPE dimension count
    'Attention_Head_Count': 1,  # One attention head
    'Layer_Norm_Epsilon': 1e-5,  # Epsilon for layer norm
    'RoPE_Frequency_Base': 10000,  # RoPE frequency base
    'File_Type': 2  # Custom file type
}

vocab = {str(i): f"token_{i}" for i in range(10)}

# Load the model weights
tiny_model = TinyTransformer(vocab_size=10, hidden_size=5, num_attention_heads=1, num_hidden_layers=1, intermediate_size=5)
tiny_model.load_state_dict(torch.load('/mnt/data/tiny_transformer.pth'))

embedding_weights = tiny_model.embeddings.weight.detach().numpy().tolist()
attention_weights = tiny_model.encoder.layers[0].self_attn.in_proj_weight.detach().numpy().tolist()
feedforward_weights = tiny_model.encoder.layers[0].linear1.weight.detach().numpy().tolist()
output_weights = tiny_model.fc_out.weight.detach().numpy().tolist()

# Step 2: Serialize Everything into a Single GGUF File
with open("/mnt/data/tiny_model.gguf", "wb") as f:
    # Write Metadata (Header)
    for key, value in metadata.items():
        f.write(struct.pack('100s', key.encode()))  # Key
        if isinstance(value, float):
            f.write(struct.pack('f', value))  # Value as float
        elif isinstance(value, int):
            f.write(struct.pack('i', value))  # Value as int
        else:
            f.write(struct.pack('100s', str(value).encode()))  # Value as string

    # Write Vocabulary
    f.write(struct.pack('100s', "Vocabulary".encode()))
    for token_id, token in vocab.items():
        f.write(struct.pack('i', int(token_id)))  # Token ID
        f.write(struct.pack('100s', token.encode()))  # Token

    # Write Embedding Weights
    f.write(struct.pack('100s', "Embedding_Weights".encode()))
    for weight in embedding_weights:
        f.write(struct.pack('f'*len(weight), *weight))

    # Write Attention Weights
    f.write(struct.pack('100s', "Attention_Weights".encode()))
    for weight in attention_weights:
        f.write(struct.pack('f'*len(weight), *weight))

    # Write Feedforward Weights
    f.write(struct.pack('100s', "Feedforward_Weights".encode()))
    for weight in feedforward_weights:
        f.write(struct.pack('f'*len(weight), *weight))

    # Write Output Layer Weights
    f.write(struct.pack('100s', "Output_Weights".encode()))
    for weight in output_weights:
        f.write(struct.pack('f'*len(weight), *weight))


  tiny_model.load_state_dict(torch.load('/mnt/data/tiny_transformer.pth'))


In [19]:
# Let's attempt to parse and inspect the content of the GGUF-like file
gguf_file_path = '/mnt/data/tiny_model.gguf'

# Reading and displaying the binary content from the GGUF-like file
with open(gguf_file_path, 'rb') as f:
    gguf_content = f.read()

# Display the first 1024 bytes of the GGUF file for inspection
gguf_content[:1024]  # Show the first portion of the binary file for inspection



b"Model_Architecture\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00TinyTransformer\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Context_Length\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0

In [20]:
import struct
import json

def parse_gguf(gguf_file_path):
    with open(gguf_file_path, 'rb') as f:
        # Step 1: Initialize the dictionary to store parsed GGUF content
        gguf_data = {}

        while True:
            # Step 2: Read the next key (assuming keys are 100-byte strings in this example)
            key_bytes = f.read(100)
            if not key_bytes:
                break  # End of file
            key = key_bytes.decode('utf-8').strip('\x00')

            # Step 3: Based on the key, read the corresponding value
            if key == "Model_Architecture":
                value_bytes = f.read(100)
                value = value_bytes.decode('utf-8').strip('\x00')
            elif key in ["Context_Length", "Embedding_Length", "Block_Count", "Feed_Forward_Layer_Size", "RoPE_Dimension_Count", "Attention_Head_Count"]:
                value_bytes = f.read(4)
                value = struct.unpack('i', value_bytes)[0]  # Read as an integer
            elif key == "Layer_Norm_Epsilon":
                value_bytes = f.read(4)
                value = struct.unpack('f', value_bytes)[0]  # Read as a float
            elif key == "RoPE_Frequency_Base":
                value_bytes = f.read(4)
                value = struct.unpack('f', value_bytes)[0]  # Read as a float
            else:
                # For unknown keys, skip to next key (e.g., handling weights)
                continue

            # Step 4: Add the key-value pair to the dictionary
            gguf_data[key] = value

        return gguf_data

# Step 5: Parse the GGUF file and convert to JSON
gguf_file_path = '/mnt/data/tiny_model.gguf'
gguf_data = parse_gguf(gguf_file_path)

# Step 6: Serialize to JSON for readability
gguf_json = json.dumps(gguf_data, indent=4)
print(gguf_json)

# Optional: Save the JSON to a file
with open("/mnt/data/tiny_model_gguf_parsed.json", "w") as json_file:
    json_file.write(gguf_json)

# Confirm successful operation
"GGUF successfully parsed and saved as JSON."


UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd0 in position 44: invalid continuation byte

In [21]:
import struct
import json

def parse_gguf(gguf_file_path):
    with open(gguf_file_path, 'rb') as f:
        # Step 1: Initialize the dictionary to store parsed GGUF content
        gguf_data = {}

        while True:
            # Step 2: Read the next key (assuming keys are 100-byte strings in this example)
            key_bytes = f.read(100)
            if not key_bytes:
                break  # End of file

            # Decode key_bytes only if it represents a string
            try:
                key = key_bytes.decode('utf-8').strip('\x00')
            except UnicodeDecodeError:
                # Handle cases where key_bytes is not a string (e.g., binary data)
                key = key_bytes

            # Step 3: Based on the key, read the corresponding value
            if key == "Model_Architecture":
                value_bytes = f.read(100)
                value = value_bytes.decode('utf-8').strip('\x00')
            elif key in ["Context_Length", "Embedding_Length", "Block_Count", "Feed_Forward_Layer_Size", "RoPE_Dimension_Count", "Attention_Head_Count"]:
                value_bytes = f.read(4)
                value = struct.unpack('i', value_bytes)[0]  # Read as an integer
            elif key == "Layer_Norm_Epsilon":
                value_bytes = f.read(4)
                value = struct.unpack('f', value_bytes)[0]  # Read as a float
            elif key == "RoPE_Frequency_Base":
                value_bytes = f.read(4)
                value = struct.unpack('f', value_bytes)[0]  # Read as a float
            else:
                # For unknown keys, skip to next key (e.g., handling weights)
                continue

            # Step 4: Add the key-value pair to the dictionary
            gguf_data[key] = value

        return gguf_data

# Step 5: Parse the GGUF file and convert to JSON
gguf_file_path = '/mnt/data/tiny_model.gguf'
gguf_data = parse_gguf(gguf_file_path)

# Step 6: Serialize to JSON for readability
gguf_json = json.dumps(gguf_data, indent=4)
print(gguf_json)

# Optional: Save the JSON to a file
with open("/mnt/data/tiny_model_gguf_parsed.json", "w") as json_file:
    json_file.write(gguf_json)

# Confirm successful operation
"GGUF successfully parsed and saved as JSON."

{
    "Model_Architecture": "TinyTransformer",
    "Context_Length": 5,
    "Embedding_Length": 5,
    "Block_Count": 1,
    "Feed_Forward_Layer_Size": 5,
    "RoPE_Dimension_Count": 5,
    "Attention_Head_Count": 1,
    "Layer_Norm_Epsilon": 9.999999747378752e-06,
    "RoPE_Frequency_Base": 1.401298464324817e-41
}


'GGUF successfully parsed and saved as JSON.'

In [22]:
import struct
import json

def parse_gguf(gguf_file_path):
    with open(gguf_file_path, 'rb') as f:
        # Initialize the dictionary to store parsed GGUF content
        gguf_data = {
            "metadata": {},
            "vocabulary": {},
            "weights": {
                "embedding": [],
                "attention": [],
                "feedforward": [],
                "output": []
            }
        }

        while True:
            # Read the next key (assuming keys are 100-byte strings in this example)
            key_bytes = f.read(100)
            if not key_bytes:
                break  # End of file
            key = key_bytes.decode('utf-8').strip('\x00')

            # Parse based on the key
            if key == "Model_Architecture":
                value_bytes = f.read(100)
                value = value_bytes.decode('utf-8').strip('\x00')
                gguf_data["metadata"][key] = value

            elif key in ["Context_Length", "Embedding_Length", "Block_Count", "Feed_Forward_Layer_Size", "RoPE_Dimension_Count", "Attention_Head_Count"]:
                value_bytes = f.read(4)
                value = struct.unpack('i', value_bytes)[0]
                gguf_data["metadata"][key] = value

            elif key in ["Layer_Norm_Epsilon", "RoPE_Frequency_Base"]:
                value_bytes = f.read(4)
                value = struct.unpack('f', value_bytes)[0]
                gguf_data["metadata"][key] = value

            elif key == "Vocabulary":
                # Parse vocabulary (assuming token ID and token pairs)
                vocab_size_bytes = f.read(4)
                vocab_size = struct.unpack('i', vocab_size_bytes)[0]
                for _ in range(vocab_size):
                    token_id_bytes = f.read(4)
                    token_id = struct.unpack('i', token_id_bytes)[0]
                    token_bytes = f.read(100)
                    token = token_bytes.decode('utf-8').strip('\x00')
                    gguf_data["vocabulary"][token_id] = token

            elif key == "Embedding_Weights":
                # Parse embedding weights (assuming 5x5 matrix for embedding)
                for _ in range(5):
                    weight_bytes = f.read(4 * 5)  # 5 floats for each embedding row
                    weights = struct.unpack('f' * 5, weight_bytes)
                    gguf_data["weights"]["embedding"].append(weights)

            elif key == "Attention_Weights":
                # Parse attention weights
                for _ in range(5):  # Assuming attention weights are also 5x5 for simplicity
                    weight_bytes = f.read(4 * 5)
                    weights = struct.unpack('f' * 5, weight_bytes)
                    gguf_data["weights"]["attention"].append(weights)

            elif key == "Feedforward_Weights":
                # Parse feed-forward layer weights
                for _ in range(5):  # Assuming 5x5 size for the feedforward layer
                    weight_bytes = f.read(4 * 5)
                    weights = struct.unpack('f' * 5, weight_bytes)
                    gguf_data["weights"]["feedforward"].append(weights)

            elif key == "Output_Weights":
                # Parse output layer weights (5x10)
                for _ in range(10):  # Output layer is 5x10
                    weight_bytes = f.read(4 * 5)
                    weights = struct.unpack('f' * 5, weight_bytes)
                    gguf_data["weights"]["output"].append(weights)

        return gguf_data

# Parse the GGUF file and convert it to JSON
gguf_file_path = '/mnt/data/tiny_model.gguf'
gguf_data = parse_gguf(gguf_file_path)

# Convert to JSON for readability
gguf_json = json.dumps(gguf_data, indent=4)
print(gguf_json)

# Save the JSON to a file
with open("/mnt/data/tiny_model_gguf_full_parsed.json", "w") as json_file:
    json_file.write(gguf_json)

# Confirm the successful parsing and saving of the file
"GGUF fully parsed and saved as JSON."


UnicodeDecodeError: 'utf-8' codec can't decode byte 0xeb in position 1: invalid continuation byte

In [24]:
import struct
import json

def parse_gguf(gguf_file_path):
    with open(gguf_file_path, 'rb') as f:
        # Initialize the dictionary to store parsed GGUF content
        gguf_data = {
            "metadata": {},
            "vocabulary": {},
            "weights": {
                "embedding": [],
                "attention": [],
                "feedforward": [],
                "output": []
            }
        }

        while True:
            # Read the next key (assuming keys are 100-byte strings in this example)
            key_bytes = f.read(100)
            if not key_bytes:
                break  # End of file

            # Attempt to decode key_bytes as UTF-8, otherwise handle as bytes
            try:
                key = key_bytes.decode('utf-8').strip('\x00')
            except UnicodeDecodeError:
                key = key_bytes

            # Parse based on the key
            if key == "Model_Architecture":
                value_bytes = f.read(100)
                value = value_bytes.decode('utf-8').strip('\x00')
                gguf_data["metadata"][key] = value

            elif key in ["Context_Length", "Embedding_Length", "Block_Count", "Feed_Forward_Layer_Size", "RoPE_Dimension_Count", "Attention_Head_Count"]:
                value_bytes = f.read(4)
                value = struct.unpack('i', value_bytes)[0]
                gguf_data["metadata"][key] = value

            elif key in ["Layer_Norm_Epsilon", "RoPE_Frequency_Base"]:
                value_bytes = f.read(4)
                value = struct.unpack('f', value_bytes)[0]
                gguf_data["metadata"][key] = value

            elif key == "Vocabulary":
                # Parse vocabulary (assuming token ID and token pairs)
                vocab_size_bytes = f.read(4)
                vocab_size = struct.unpack('i', vocab_size_bytes)[0]
                for _ in range(vocab_size):
                    token_id_bytes = f.read(4)
                    token_id = struct.unpack('i', token_id_bytes)[0]
                    token_bytes = f.read(100)
                    token = token_bytes.decode('utf-8').strip('\x00')
                    gguf_data["vocabulary"][token_id] = token

            elif key == "Embedding_Weights":
                # Parse embedding weights (assuming 5x5 matrix for embedding)
                for _ in range(5):
                    weight_bytes = f.read(4 * 5)  # 5 floats for each embedding row
                    weights = struct.unpack('f' * 5, weight_bytes)
                    gguf_data["weights"]["embedding"].append(weights)

            elif key == "Attention_Weights":
                # Parse attention weights
                for _ in range(5):  # Assuming attention weights are also 5x5 for simplicity
                    weight_bytes = f.read(4 * 5)
                    weights = struct.unpack('f' * 5, weight_bytes)
                    gguf_data["weights"]["attention"].append(weights)

            elif key == "Feedforward_Weights":
                # Parse feed-forward layer weights
                for _ in range(5):  # Assuming 5x5 size for the feedforward layer
                    weight_bytes = f.read(4 * 5)
                    weights = struct.unpack('f' * 5, weight_bytes)
gguf_data["weights"]["feedforward"].append(weights)

            elif key == "Output_":

IndentationError: unexpected indent (<ipython-input-24-813c221a4c49>, line 78)

In [26]:
import struct
import json

def parse_gguf(gguf_file_path):
    with open(gguf_file_path, 'rb') as f:
        # Initialize the dictionary to store parsed GGUF content
        gguf_data = {
            "metadata": {},
            "vocabulary": {},
            "weights": {
                "embedding": [],
                "attention": [],
                "feedforward": [],
                "output": []
            }
        }

        while True:
            # Read the next key (assuming keys are 100-byte strings in this example)
            key_bytes = f.read(100)
            if not key_bytes:
                break  # End of file

            # Attempt to decode key_bytes as UTF-8, otherwise handle as bytes
            try:
                key = key_bytes.decode('utf-8').strip('\x00')
            except UnicodeDecodeError:
                key = key_bytes

            # Parse based on the key
            if key == "Model_Architecture":
                value_bytes = f.read(100)
                value = value_bytes.decode('utf-8').strip('\x00')
                gguf_data["metadata"][key] = value

            elif key in ["Context_Length", "Embedding_Length", "Block_Count", "Feed_Forward_Layer_Size", "RoPE_Dimension_Count", "Attention_Head_Count"]:
                value_bytes = f.read(4)
                value = struct.unpack('i', value_bytes)[0]
                gguf_data["metadata"][key] = value

            elif key in ["Layer_Norm_Epsilon", "RoPE_Frequency_Base"]:
                value_bytes = f.read(4)
                value = struct.unpack('f', value_bytes)[0]
                gguf_data["metadata"][key] = value

            elif key == "Vocabulary":
                # Parse vocabulary (assuming token ID and token pairs)
                vocab_size_bytes = f.read(4)
                vocab_size = struct.unpack('i', vocab_size_bytes)[0]
                for _ in range(vocab_size):
                    token_id_bytes = f.read(4)
                    token_id = struct.unpack('i', token_id_bytes)[0]
                    token_bytes = f.read(100)
                    token = token_bytes.decode('utf-8').strip('\x00')
                    gguf_data["vocabulary"][token_id] = token

            elif key == "Embedding_Weights":
                # Parse embedding weights (assuming 5x5 matrix for embedding)
                for _ in range(5):
                    weight_bytes = f.read(4 * 5)  # 5 floats for each embedding row
                    weights = struct.unpack('f' * 5, weight_bytes)
                    gguf_data["weights"]["embedding"].append(weights)

            elif key == "Attention_Weights":
                # Parse attention weights
                for _ in range(5):  # Assuming attention weights are also 5x5 for simplicity
                    weight_bytes = f.read(4 * 5)
                    weights = struct.unpack('f' * 5, weight_bytes)
                    gguf_data["weights"]["attention"].append(weights)

            elif key == "Feedforward_Weights":
                # Parse feed-forward layer weights
                for _ in range(5):  # Assuming 5x5 size for the feedforward layer
                    weight_bytes = f.read(4 * 5)
                    weights = struct.unpack('f' * 5, weight_bytes)
                    gguf_data["weights"]["feedforward"].append(weights)

            elif key == "Output_Weights": # Fixed indentation
                # Parse output layer weights (5x10)
                for _ in range(10):  # Output layer is 5x10
                    weight_bytes = f.read(4 * 5)
                    weights = struct.unpack('f' * 5, weight_bytes)
                    gguf_data["weights"]["output"].append(weights)

        return gguf_data

# Parse the GGUF file and convert it to JSON
gguf_file_path = '/mnt/data/tiny_model.gguf'
gguf_data = parse_gguf(gguf_file_path)

# Convert to JSON for readability
gguf_json = json.dumps(gguf_data, indent=4)
print(gguf_json)

# Save the JSON to a file
with open("/mnt/data/tiny_model_gguf_full_parsed.json", "w") as json_file:
    json_file.write(gguf_json)

# Confirm the successful parsing and saving of the file
"GGUF fully parsed and saved as JSON."


{
    "metadata": {
        "Model_Architecture": "TinyTransformer",
        "Context_Length": 5,
        "Embedding_Length": 5,
        "Block_Count": 1,
        "Feed_Forward_Layer_Size": 5,
        "RoPE_Dimension_Count": 5,
        "Attention_Head_Count": 1,
        "Layer_Norm_Epsilon": 9.999999747378752e-06,
        "RoPE_Frequency_Base": 1.401298464324817e-41
    },
    "vocabulary": {},
    "weights": {
        "embedding": [
            [
                0.0,
                0.0,
                0.0,
                0.0,
                0.0
            ],
            [
                0.0,
                0.0,
                0.0,
                0.0,
                0.0
            ],
            [
                0.0,
                0.9347047805786133,
                0.07833842188119888,
                -0.1573018878698349,
                -0.37725022435188293
            ],
            [
                -1.4977898597717285,
                -1.1747392416000366,
           

'GGUF fully parsed and saved as JSON.'

In [27]:
import torch

# Assuming we have the tiny transformer model class defined earlier
class TinyTransformer(nn.Module):
    def __init__(self, vocab_size=10, hidden_size=5, num_attention_heads=1, num_hidden_layers=1, intermediate_size=5):
        super(TinyTransformer, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, hidden_size)

        # Transformer encoder layer with batch_first=True for better performance
        encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=num_attention_heads, dim_feedforward=intermediate_size, batch_first=True)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_hidden_layers)

        # Output layer (simple linear classifier for vocab prediction)
        self.fc_out = nn.Linear(hidden_size, vocab_size)

    def forward(self, input_ids):
        # Embed the input tokens
        embeddings = self.embeddings(input_ids)

        # Pass through the transformer encoder
        encoder_output = self.encoder(embeddings)

        # Generate predictions
        logits = self.fc_out(encoder_output)
        return logits

# Instantiate the tiny model
tiny_model = TinyTransformer(vocab_size=10, hidden_size=5, num_attention_heads=1, num_hidden_layers=1, intermediate_size=5)

# Load the weights into the model (assuming you have the state dict from your previously saved model)
tiny_model.load_state_dict(torch.load('/mnt/data/tiny_transformer.pth'))

# Simulate input tokens for a query
# Token IDs from 0 to 9 (simulating a query using the small vocabulary)
input_tokens = torch.tensor([[0, 1, 2, 3, 4]])

# Pass the input tokens through the model
output_logits = tiny_model(input_tokens)

# Convert output logits to probabilities (softmax)
output_probs = torch.softmax(output_logits, dim=-1)

# Get the predicted tokens
predicted_tokens = torch.argmax(output_probs, dim=-1)

# Convert predicted token IDs back to strings (for example, map 0 -> "token_0")
vocab = {i: f"token_{i}" for i in range(10)}
predicted_token_strings = [[vocab[token.item()] for token in sequence] for sequence in predicted_tokens]

predicted_token_strings


  tiny_model.load_state_dict(torch.load('/mnt/data/tiny_transformer.pth'))


[['token_0', 'token_6', 'token_8', 'token_0', 'token_8']]

In [28]:
# Simulate the input for the word "hi"
# Mapping "h" -> token_3, "i" -> token_1
input_tokens_hi = torch.tensor([[3, 1]])

# Pass the input tokens through the model
output_logits_hi = tiny_model(input_tokens_hi)

# Convert output logits to probabilities (softmax)
output_probs_hi = torch.softmax(output_logits_hi, dim=-1)

# Get the predicted tokens
predicted_tokens_hi = torch.argmax(output_probs_hi, dim=-1)

# Convert predicted token IDs back to strings
predicted_token_strings_hi = [[vocab[token.item()] for token in sequence] for sequence in predicted_tokens_hi]

predicted_token_strings_hi  # Display the predicted token sequence for "hi"


[['token_0', 'token_6']]