In [1]:
# This example no longer works since v6, use v5 or prior to use it.
from IPython.display import FileLink, display
import torch.nn as torch_nn
from torch.utils.data import DataLoader
import numpy as np
import os, subprocess, time, json, torch

def download(download_file_name):
    os.chdir(f"/kaggle/working/")
    name = f"{download_file_name}.uai"
    display(FileLink(f'{name}'))

class carbono:
    def __init__(self, debug=True):
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.layers = []
        self.model = None
        self.debug = debug
        self.labels = None

    def save_pytorch(self, filename='model'):
        filename = filename + '.pt'
        """Save model in PyTorch format"""
        torch.save(self.model.state_dict(), filename)
    
    def load_pytorch(self, filename='model.pt'):
        """Load model from PyTorch format"""
        if self.model is None:
            raise ValueError("Model architecture must be defined before loading weights")
        self.model.load_state_dict(torch.load(filename))
    
    def layer(self, input_size, output_size, activation='tanh'):
        """Add a layer to the network, similar to carbono.js"""
        self.layers.append({
            'input_size': input_size,
            'output_size': output_size,
            'activation': activation
        })

        # Check if layers are compatible
        if len(self.layers) > 1:
            prev_layer = self.layers[-2]
            if prev_layer['output_size'] != input_size:
                raise ValueError(f"Layer input size {input_size} doesn't match previous layer output size {prev_layer['output_size']}")

        # Build/rebuild model when layer is added
        self._build_model()
        
        if self.debug:
            print(f"Added layer: {input_size} → {output_size} with {activation} activation")

    def _build_model(self):
        """Build PyTorch model from layers"""
        if not self.layers:
            return
    
        layers = []
        for i, layer_info in enumerate(self.layers):
            # Add linear layer
            layers.append(torch_nn.Linear(layer_info['input_size'], layer_info['output_size']))
            
            # Add activation, but skip softmax for the last layer
            if i < len(self.layers) - 1:  # Only add activation for non-final layers
                if layer_info['activation'] == 'tanh':
                    layers.append(torch_nn.Tanh())
                elif layer_info['activation'] == 'relu':
                    layers.append(torch_nn.ReLU())
                elif layer_info['activation'] == 'sigmoid':
                    layers.append(torch_nn.Sigmoid())
    
        self.model = torch_nn.Sequential(*layers).to(self.device)

    def train(self, train_set, options=None):
        if options is None:
            options = {}
    
        # Default options similar to carbono.js
        epochs = options.get('epochs', 200)
        learning_rate = options.get('learningRate', 0.212)
        print_every_epochs = options.get('printEveryEpochs', 10)
        early_stop_threshold = options.get('earlyStopThreshold', 1e-6)
        optimizer_type = options.get('optimizer', 'adam')
        loss_function = options.get('lossFunction', 'cross-entropy')
    
        # Convert data to PyTorch format
        if isinstance(train_set[0]['output'], str):
            unique_labels = list(set(item['output'] for item in train_set))
            self.labels = unique_labels
            
            num_classes = len(unique_labels)
            label_to_idx = {label: i for i, label in enumerate(unique_labels)}
            
            x_data = torch.tensor([item['input'] for item in train_set], dtype=torch.float32).to(self.device)
            # Change this part - use class indices instead of one-hot encoding
            y_data = torch.tensor([label_to_idx[item['output']] for item in train_set], dtype=torch.long).to(self.device)
        else:
            x_data = torch.tensor([item['input'] for item in train_set], dtype=torch.float32).to(self.device)
            y_data = torch.tensor([item['output'] for item in train_set], dtype=torch.float32).to(self.device)
    
        # Create DataLoader
        dataset = torch.utils.data.TensorDataset(x_data, y_data)
        train_loader = DataLoader(dataset, batch_size=32, shuffle=True)  # Remove generator parameter
    
        # Rest of the training code remains the same
        if loss_function == 'mse':
            criterion = torch_nn.MSELoss()
        elif loss_function == 'cross-entropy':
            criterion = torch_nn.CrossEntropyLoss()
    
        if optimizer_type == 'adam':
            optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
        else:
            optimizer = torch.optim.SGD(self.model.parameters(), lr=learning_rate)
    
        start_time = time.time()
    
        for epoch in range(epochs):
            total_loss = 0
            for inputs, targets in train_loader:
                optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                
                total_loss += loss.item()
    
            avg_loss = total_loss / len(train_loader)
            
            if (epoch + 1) % print_every_epochs == 0 and self.debug:
                print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.6f}')
    
            if avg_loss < early_stop_threshold:
                if self.debug:
                    print(f'Early stopping at epoch {epoch+1} with loss: {avg_loss:.6f}')
                break
    
        training_time = (time.time() - start_time) * 1000
    
        total_params = sum(p.numel() for p in self.model.parameters())
        self.model.details = {
            'loss': avg_loss,
            'parameters': total_params,
            'training': {
                'time': training_time,
                'epochs': epoch + 1,
                'learningRate': learning_rate
            },
            'layers': self.layers
        }
        return self.model.details

    def predict(self, input_data, tags=True):
        """Make predictions similar to carbono.js"""
        with torch.no_grad():
            input_tensor = torch.tensor(input_data, dtype=torch.float32).to(self.device)
            if len(input_tensor.shape) == 1:
                input_tensor = input_tensor.unsqueeze(0)
            
            output = self.model(input_tensor)
            predictions = output.cpu().numpy()

            if self.labels and tags:
                # Return labeled probabilities like carbono.js
                return [
                    {
                        'label': self.labels[i],
                        'probability': float(prob)
                    }
                    for i, prob in enumerate(predictions[0])
                ]
            
            return predictions[0].tolist()

    def save(self, filename='model'):
        filename = filename + '.uai'
        """Export model in carbono.js format"""
        carbono_model = {
            'weights': [],
            'biases': [],
            'layers': self.layers,
            'labels': self.labels,
            'details': self.model.details
        }
        
        current_layer = None
        for layer in self.model:
            if isinstance(layer, torch_nn.Linear):
                weights = layer.weight.detach().cpu().numpy().tolist()
                biases = layer.bias.detach().cpu().numpy().tolist()
                carbono_model['weights'].append(weights)
                carbono_model['biases'].append(biases)

        with open(filename, 'w') as f:
            json.dump(carbono_model, f)

In [None]:
# Create a more complex dataset with 4 inputs and 3 outputs
train_set = [
    {'input': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'output': '😀'},  # Smiling Face
    {'input': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], 'output': '😊'},  # Smiling Face with Smiling Eyes
    {'input': [0, 0, 1, 0, 0, 0, 0, 0, 0, 0], 'output': '😂'},  # Face with Tears of Joy
    {'input': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], 'output': '😍'},  # Smiling Face with Heart-Eyes
    {'input': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], 'output': '😎'},  # Smiling Face with Sunglasses
    {'input': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], 'output': '😢'},  # Crying Face
    {'input': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0], 'output': '😡'},  # Pouting Face
    {'input': [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], 'output': '😴'},  # Sleeping Face
    {'input': [0, 0, 0, 0, 0, 0, 0, 0, 1, 0], 'output': '🤔'},  # Thinking Face
    {'input': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], 'output': '🤢'},  # Nauseated Face
]

# Create and train model
nn = carbono(debug=True)

# Add layers with specified architecture
nn.layer(10, 128, 'sigmoid')
nn.layer(128, 512, 'relu')
nn.layer(512, 128, 'relu')
nn.layer(128, 10, 'softmax')  # Output layer (3 outputs for 3 classes)

# Train the model with adjusted parameters
training_summary = nn.train(train_set, {
    'epochs': 1000,
    'learningRate': 0.0002,
    'printEveryEpochs': 100,
    'optimizer': 'adam',
    'lossFunction': 'cross-entropy'
})

# Export and download model
model_name = "model"
nn.save(model_name)
download(model_name)
# gpu time: 4317.9385 ~4 seconds
# cpu time: 144691.6623 ~2.5 minutes

In [2]:
# Vocabulary (tokens)
vocab = {
    '<PAD>': 0,  # Padding token
    '<EOS>': 1,  # End of string token
    'hello': 2,
    'world': 3,
    'how': 4,
    'are': 5,
    'you': 6,
    'today': 7,
    'nice': 8,
    'weather': 9,
}
# Reverse vocabulary for token to word conversion
reverse_vocab = {v: k for k, v in vocab.items()}

train_set = [
    # "hello world"
    {'input': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'output': 'world'},
    {'input': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], 'output': 'EOS'},
    
    # "how are you"
    {'input': [0, 0, 1, 0, 0, 0, 0, 0, 0, 0], 'output': 'are'},
    {'input': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], 'output': 'you'},
    {'input': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], 'output': 'EOS'},
    
    # "nice weather today"
    {'input': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], 'output': 'weather'},
    {'input': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0], 'output': 'today'},
    {'input': [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], 'output': 'EOS'}
]
    
# Create and train model
nn = carbono(debug=True)
# Add layers with specified architecture
nn.layer(10, 128, 'tanh')
nn.layer(128, 6, 'softmax')  # Output layer

# Train the model with adjusted parameters
training_summary = nn.train(train_set, {
    'epochs': 40,
    'learningRate': 0.001,
    'printEveryEpochs': 10,
    'optimizer': 'adam',
    'lossFunction': 'cross-entropy',
    'earlyStopThreshold': 1e-9
})

# Export and download model
model_name = "labels_test1"
nn.save(model_name)
download(model_name)

Added layer: 10 → 128 with tanh activation
Added layer: 128 → 6 with softmax activation
Epoch [10/40], Loss: 1.529232
Epoch [20/40], Loss: 1.325388
Epoch [30/40], Loss: 1.138497
Epoch [40/40], Loss: 0.964135


In [None]:
rm ./*

In [None]:
# Sample text with longer sequences
text = """
the quick brown fox jumps over the lazy dog
she sells seashells by the seashore
how much wood would a woodchuck chuck if a woodchuck could chuck wood
to be or not to be that is the question
all that glitters is not gold
a journey of a thousand miles begins with a single step
"""

# Preprocess the text
sentences = [line.strip().split() for line in text.splitlines() if line.strip()]

# Automatically create vocabulary from the text
vocab = {'<PAD>': 0, '<EOS>': 1}  # Start with special tokens
word_set = set(word for sentence in sentences for word in sentence)
for i, word in enumerate(sorted(word_set)):
    vocab[word] = i + 2  # +2 because we already have PAD and EOS

# Assign values between 0 and 1 to each token
num_tokens = len(vocab)
token_values = {token: (id + 1) / (num_tokens + 1) for token, id in vocab.items()}

# Append '<EOS>' to each sentence
sentences = [sentence + ['<EOS>'] for sentence in sentences]

# Determine the context window size (max sentence length)
context_window = max(len(sentence) for sentence in sentences)

# Initialize the training set
train_set = []

# Generate training examples
for sentence in sentences:
    # Only generate examples up to the actual sentence length (including <EOS>)
    for i in range(len(sentence)-1):  # -1 because we don't need to predict after <EOS>
        # Create input with tokens on the left and padding on the right
        input_seq = sentence[:i+1] + ['<PAD>'] * (context_window - (i+1))
        # Map tokens to their values
        input_values = [token_values[token] for token in input_seq]
        # Output is the next token
        output_token = sentence[i+1]
        train_set.append({'input': input_values, 'output': output_token})

# Print vocabulary
print("Vocabulary:")
for token, id in vocab.items():
    print(f"{token}: {id} (value: {token_values[token]:.3f})")

print("\nSample of training set:")
# Print first 10 examples
for example in train_set[:10]:
    print(f"\nInput sequence: {example['input']}")
    print(f"Target token: {example['output']}")
    
print(f"\nTotal number of training examples: {len(train_set)}")
print(f"Context window size: {context_window}")
print(f"Vocabulary size: {len(vocab)}")

In [None]:
import numpy as np
from collections import defaultdict

class TextTokenizer:
    def __init__(self, max_length=50):
        self.vocab = {"<PAD>": 0, "<UNK>": 1}
        self.reverse_vocab = {0: "<PAD>", 1: "<UNK>"}
        self.vocab_size = 2
        self.max_length = max_length
    
    def fit(self, texts):
        # Build vocabulary from texts
        for text in texts:
            for token in text.split():
                if token not in self.vocab:
                    self.vocab[token] = self.vocab_size
                    self.reverse_vocab[self.vocab_size] = token
                    self.vocab_size += 1
    
    def encode(self, text):
        # Convert text to padded sequence of token IDs
        tokens = text.split()
        encoded = [self.vocab.get(token, self.vocab["<UNK>"]) for token in tokens]
        # Pad sequence
        padded = encoded + [self.vocab["<PAD>"]] * (self.max_length - len(encoded))
        return padded[:self.max_length]
    
    def normalize_tokens(self, token_ids):
        # Normalize token IDs to range [0,1]
        return [id / self.vocab_size for id in token_ids]

# Example usage:
texts = [
    "the cat sat on the mat",
    "dogs are friendly pets",
    "birds fly in the sky"
]

# Create training set
def create_training_set(texts, max_length=50):
    tokenizer = TextTokenizer(max_length=max_length)
    tokenizer.fit(texts)
    
    train_set = []
    for text in texts:
        tokens = tokenizer.encode(text)
        normalized_tokens = tokenizer.normalize_tokens(tokens)
        
        # For each position, predict the next token
        for i in range(len(tokens) - 1):
            input_sequence = normalized_tokens[:i+1] + [0] * (max_length - (i+1))
            target_token = tokens[i+1]
            
            train_set.append({
                'input': input_sequence,
                'output': target_token
            })
    
    return train_set, tokenizer

# Create and prepare the dataset
train_set, tokenizer = create_training_set(texts)

# Model architecture (similar to your example but adjusted for text)
nn = carbono(debug=True)
nn.layer(50, 256, 'relu')  # Input size = max_length
nn.layer(256, 512, 'relu')
nn.layer(512, 256, 'relu')
nn.layer(256, tokenizer.vocab_size, 'softmax')  # Output size = vocab_size

# Train the model
training_summary = nn.train(train_set, {
    'epochs': 1000,
    'learningRate': 0.0002,
    'printEveryEpochs': 100,
    'optimizer': 'adam',
    'lossFunction': 'cross-entropy'
})

# Save model
nn.save("text_model")

In [16]:
# Model architecture (similar to your example but adjusted for text)
nn = carbono(debug=True)
nn.layer(10, 256, 'tanh')
nn.layer(256, 2, 'sigmoid') 

train_set = [
    # "hello world"
    {'input': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'output': [0,1]},
    {'input': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], 'output': [0,1]},
    {'input': [0, 0, 1, 0, 0, 0, 0, 0, 0, 0], 'output': [1,0]},
    {'input': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], 'output': [1,0]},
]

# Train the model
training_summary = nn.train(train_set, {
    'epochs': 1000,
    'learningRate': 0.001,
    'printEveryEpochs': 100,
    'optimizer': 'adam'
})

# Save model
nn.save("numbers2")
download("numbers2")

Added layer: 10 → 256 with tanh activation
Added layer: 256 → 2 with sigmoid activation
Epoch [100/1000], Loss: 0.019082
Epoch [200/1000], Loss: 0.004546
Epoch [300/1000], Loss: 0.002052
Epoch [400/1000], Loss: 0.001175
Epoch [500/1000], Loss: 0.000763
Epoch [600/1000], Loss: 0.000536
Epoch [700/1000], Loss: 0.000397
Epoch [800/1000], Loss: 0.000306
Epoch [900/1000], Loss: 0.000243
Epoch [1000/1000], Loss: 0.000197
