In [3]:
from IPython.display import FileLink, display
import torch.nn as torch_nn
from torch.utils.data import DataLoader
import numpy as np
import os, subprocess, time, json, torch
import struct

def download(download_file_name):
    os.chdir(f"/kaggle/working/")
    name = f"{download_file_name}.uai"
    display(FileLink(f'{name}'))

class carbono:
    def __init__(self, debug=True):
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.layers = []
        self.model = None
        self.debug = debug
        self.labels = None
        self.details = {}

    def save_pytorch(self, filename='model'):
        filename = filename + '.pt'
        """Save model in PyTorch format"""
        torch.save(self.model.state_dict(), filename)
    
    def load_pytorch(self, filename='model.pt'):
        """Load model from PyTorch format"""
        if self.model is None:
            raise ValueError("Model architecture must be defined before loading weights")
        self.model.load_state_dict(torch.load(filename))
    
    def layer(self, input_size, output_size, activation='tanh'):
        """Add a layer to the network, similar to carbono.js"""
        self.layers.append({
            'input_size': input_size,
            'output_size': output_size,
            'activation': activation
        })

        # Check if layers are compatible
        if len(self.layers) > 1:
            prev_layer = self.layers[-2]
            if prev_layer['output_size'] != input_size:
                raise ValueError(f"Layer input size {input_size} doesn't match previous layer output size {prev_layer['output_size']}")

        # Build/rebuild model when layer is added
        self._build_model()
        
        if self.debug:
            print(f"Added layer: {input_size} → {output_size} with {activation} activation")

    def _build_model(self):
        """Build PyTorch model from layers"""
        if not self.layers:
            return
    
        layers = []
        for i, layer_info in enumerate(self.layers):
            # Add linear layer
            layers.append(torch_nn.Linear(layer_info['input_size'], layer_info['output_size']))
            
            # Add activation, but skip softmax for the last layer
            if i < len(self.layers) - 1:  # Only add activation for non-final layers
                if layer_info['activation'] == 'tanh':
                    layers.append(torch_nn.Tanh())
                elif layer_info['activation'] == 'relu':
                    layers.append(torch_nn.ReLU())
                elif layer_info['activation'] == 'sigmoid':
                    layers.append(torch_nn.Sigmoid())
    
        self.model = torch_nn.Sequential(*layers).to(self.device)

    def train(self, train_set, options=None):
        if options is None:
            options = {}
    
        # Default options similar to carbono.js
        epochs = options.get('epochs', 200)
        learning_rate = options.get('learningRate', 0.212)
        print_every_epochs = options.get('printEveryEpochs', 10)
        early_stop_threshold = options.get('earlyStopThreshold', 1e-6)
        optimizer_type = options.get('optimizer', 'adam')
        loss_function = options.get('lossFunction', 'cross-entropy')
    
        # Convert data to PyTorch format
        if isinstance(train_set[0]['output'], str):
            unique_labels = list(set(item['output'] for item in train_set))
            self.labels = unique_labels
            
            num_classes = len(unique_labels)
            label_to_idx = {label: i for i, label in enumerate(unique_labels)}
            
            x_data = torch.tensor([item['input'] for item in train_set], dtype=torch.float32).to(self.device)
            # Change this part - use class indices instead of one-hot encoding
            y_data = torch.tensor([label_to_idx[item['output']] for item in train_set], dtype=torch.long).to(self.device)
        else:
            x_data = torch.tensor([item['input'] for item in train_set], dtype=torch.float32).to(self.device)
            y_data = torch.tensor([item['output'] for item in train_set], dtype=torch.float32).to(self.device)
    
        # Create DataLoader
        dataset = torch.utils.data.TensorDataset(x_data, y_data)
        train_loader = DataLoader(dataset, batch_size=32, shuffle=True)  # Remove generator parameter
    
        # Rest of the training code remains the same
        if loss_function == 'mse':
            criterion = torch_nn.MSELoss()
        elif loss_function == 'cross-entropy':
            criterion = torch_nn.CrossEntropyLoss()
    
        if optimizer_type == 'adam':
            optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
        else:
            optimizer = torch.optim.SGD(self.model.parameters(), lr=learning_rate)
    
        start_time = time.time()
    
        for epoch in range(epochs):
            total_loss = 0
            for inputs, targets in train_loader:
                optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                
                total_loss += loss.item()
    
            avg_loss = total_loss / len(train_loader)
            
            if (epoch + 1) % print_every_epochs == 0 and self.debug:
                print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.6f}')
    
            if avg_loss < early_stop_threshold:
                if self.debug:
                    print(f'Early stopping at epoch {epoch+1} with loss: {avg_loss:.6f}')
                break
    
        training_time = (time.time() - start_time) * 1000
    
        total_params = sum(p.numel() for p in self.model.parameters())
        self.details = {
            'loss': avg_loss,
            'parameters': total_params,
            'training': {
                'time': training_time,
                'epochs': epoch + 1,
                'learningRate': learning_rate
            },
            'layers': self.layers
        }
        return self.details

    async def predict(self, input_data, tags=True):
        """Make predictions with support for URL inputs"""
        try:
            # Preprocess the input if it's a URL or raw data
            processed_input = await self.preprocess_data(input_data)
            
            # Convert to tensor and ensure correct shape
            with torch.no_grad():
                input_tensor = torch.tensor(processed_input, dtype=torch.float32).to(self.device)
                if len(input_tensor.shape) == 1:
                    input_tensor = input_tensor.unsqueeze(0)
                
                output = self.model(input_tensor)
                
                # Apply softmax if using classification
                if self.labels:
                    probabilities = torch.nn.functional.softmax(output, dim=1)
                    predictions = probabilities.cpu().numpy()
                else:
                    predictions = output.cpu().numpy()
    
                if self.labels and tags:
                    # Return labeled probabilities
                    return [
                        {
                            'label': self.labels[i],
                            'probability': float(prob)
                        }
                        for i, prob in enumerate(predictions[0])
                    ]
                
                return predictions[0].tolist()
                
        except Exception as e:
            if self.debug:
                print(f"Error during prediction: {str(e)}")
            raise


    def save(self, filename='model', useBinary=True):
        try:
            print("Starting save process...")
            if not self.model:
                raise ValueError("No model to save")
    
            # Extract current model state
            weights = []
            biases = []
            for layer in self.model:
                if isinstance(layer, torch_nn.Linear):
                    weights.append(layer.weight.detach().cpu().numpy().tolist())
                    biases.append(layer.bias.detach().cpu().numpy().tolist())
    
            print("Model state:", {
                "weightLayers": len(weights),
                "biasLayers": len(biases),
                "modelLayers": len(self.layers)
            })
    
            # Validate weights and biases
            if not weights or not biases:
                raise ValueError("Weights or biases are empty. Cannot save model.")
    
            # Prepare metadata
            if 'info' not in self.details:
                self.details['info'] = {
                    'name': filename,
                    'author': '',
                    'license': 'MIT',
                    'note': '',
                    'date': time.strftime('%Y-%m-%dT%H:%M:%SZ')
                }
    
            # Prepare layer info
            layer_info = {
                'weightShapes': [[len(layer), len(layer[0])] for layer in weights],
                'biasShapes': [len(layer) for layer in biases]
            }
    
            # Create metadata object
            metadata = {
                'layers': self.layers,
                'details': self.details,
                'layerInfo': layer_info
            }
            if self.labels:
                metadata['labels'] = self.labels
    
            if not useBinary:
                print("Using JSON mode")
                metadata['weights'] = weights
                metadata['biases'] = biases
                with open(f"{filename}.json", 'w') as f:
                    json.dump(metadata, f)
                return True
    
            print("Using binary compression mode")
    
            # Calculate total buffer sizes
            total_weights = sum(shape[0] * shape[1] for shape in layer_info['weightShapes'])
            total_biases = sum(shape for shape in layer_info['biasShapes'])
    
            # Convert metadata to bytes and calculate padding
            metadata_bytes = json.dumps(metadata).encode('utf-8')
            metadata_padding = (4 - (len(metadata_bytes) % 4)) % 4
    
            # Create header with sizes and padding info
            header = struct.pack('4I',
                len(metadata_bytes),    # metadata length
                metadata_padding,       # padding bytes
                total_weights,          # total weights
                total_biases           # total biases
            )
    
            print("Header values:", {
                'metadataLength': len(metadata_bytes),
                'padding': metadata_padding,
                'weightLength': total_weights,
                'biasLength': total_biases
            })
    
            # Calculate total size with padding
            total_size = (
                len(header) +                  # header size
                len(metadata_bytes) +          # metadata size
                metadata_padding +             # padding
                (total_weights * 4) +          # weights size (float32)
                (total_biases * 4)             # biases size (float32)
            )
    
            # Prepare weights and biases as flat arrays
            weight_data = struct.pack(f'{total_weights}f',
                *[w for layer in weights for row in layer for w in row])
            bias_data = struct.pack(f'{total_biases}f',
                *[b for layer in biases for b in layer])  # Changed 'bias' to 'layer'
    
            # Write the full binary file
            with open(f"{filename}.uai", 'wb') as f:
                f.write(header)
                f.write(metadata_bytes)
                f.write(b'\x00' * metadata_padding)
                f.write(weight_data)
                f.write(bias_data)
    
            print("Binary file created:", {
                'size': total_size,
                'path': f"{filename}.uai"
            })
    
            print("Save process completed successfully")
            return True
    
        except Exception as e:
            print("Save process failed:", str(e))
            raise
    
    def load(self, filename):
        try:
            print("Starting load process...")
    
            if not os.path.exists(filename):
                raise FileNotFoundError(f"File not found: {filename}")
    
            file_extension = os.path.splitext(filename)[1]
            useBinary = file_extension.lower() == '.uai'
    
            if not useBinary:
                # Handle JSON loading
                with open(filename, 'r') as f:
                    data = json.load(f)
                    self.layers = data['layers']
                    self.details = data['details']
                    if 'labels' in data:
                        self.labels = data['labels']
                    
                    self._build_model()
                    
                    # Load weights and biases
                    for i, layer in enumerate(self.model):
                        if isinstance(layer, torch_nn.Linear):
                            weight_tensor = torch.tensor(data['weights'][i], dtype=torch.float32)
                            bias_tensor = torch.tensor(data['biases'][i], dtype=torch.float32)
                            layer.weight.data = weight_tensor.to(self.device)
                            layer.bias.data = bias_tensor.to(self.device)
                    
                    print("JSON model loaded successfully")
                    return True
    
            # Binary loading
            with open(filename, 'rb') as f:
                # Read header (4 uint32 values)
                header = struct.unpack('4I', f.read(16))
                metadata_length, metadata_padding, weight_length, bias_length = header
    
                print("Header values:", {
                    'metadataLength': metadata_length,
                    'padding': metadata_padding,
                    'weightLength': weight_length,
                    'biasLength': bias_length
                })
    
                # Validate file size
                expected_size = (
                    16 +                    # header size
                    metadata_length +       # metadata size
                    metadata_padding +      # padding
                    (weight_length * 4) +   # weights size
                    (bias_length * 4)       # biases size
                )
                
                file_size = os.path.getsize(filename)
                if file_size != expected_size:
                    raise ValueError(f"Invalid file size: expected {expected_size}, got {file_size}")
    
                # Read metadata
                metadata_bytes = f.read(metadata_length)
                metadata = json.loads(metadata_bytes.decode('utf-8'))
    
                # Skip padding
                f.read(metadata_padding)
    
                # Read weights and biases
                weight_data = f.read(weight_length * 4)
                bias_data = f.read(bias_length * 4)
    
                # Unpack weights and biases
                weights = struct.unpack(f'{weight_length}f', weight_data)
                biases = struct.unpack(f'{bias_length}f', bias_data)
    
                # Load metadata into model
                self.layers = metadata['layers']
                self.details = metadata['details']
                if 'labels' in metadata:
                    self.labels = metadata['labels']
    
                # Rebuild model architecture
                self._build_model()
    
                # Reshape and assign weights and biases
                weight_idx = 0
                bias_idx = 0
                layer_idx = 0
    
                for layer in self.model:
                    if isinstance(layer, torch_nn.Linear):
                        weight_shape = metadata['layerInfo']['weightShapes'][layer_idx]
                        bias_shape = metadata['layerInfo']['biasShapes'][layer_idx]
    
                        # Reshape and assign weights
                        layer_weights = weights[weight_idx:weight_idx + (weight_shape[0] * weight_shape[1])]
                        weight_tensor = torch.tensor(layer_weights, dtype=torch.float32).reshape(weight_shape[0], weight_shape[1])
                        layer.weight.data = weight_tensor.to(self.device)
                        weight_idx += weight_shape[0] * weight_shape[1]
    
                        # Reshape and assign biases
                        layer_biases = biases[bias_idx:bias_idx + bias_shape]
                        bias_tensor = torch.tensor(layer_biases, dtype=torch.float32)
                        layer.bias.data = bias_tensor.to(self.device)
                        bias_idx += bias_shape
    
                        layer_idx += 1
    
                print("Binary model loaded successfully")
                return True
    
        except Exception as e:
            print("Load process failed:", str(e))
            raise
    
        

    def info(self, info_updates):
        """Update model metadata"""
        if 'info' not in self.details:
            self.details['info'] = {}
        self.details['info'].update(info_updates)

In [4]:
# Example usage
train_set = [
    {'input': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'output': '😀'},  # Smiling Face
    {'input': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], 'output': '😊'},  # Smiling Face with Smiling Eyes
    {'input': [0, 0, 1, 0, 0, 0, 0, 0, 0, 0], 'output': '😂'},  # Face with Tears of Joy
    {'input': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], 'output': '😍'},  # Smiling Face with Heart-Eyes
    {'input': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], 'output': '😎'},  # Smiling Face with Sunglasses
    {'input': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], 'output': '😢'},  # Crying Face
    {'input': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0], 'output': '😡'},  # Pouting Face
    {'input': [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], 'output': '😴'},  # Sleeping Face
    {'input': [0, 0, 0, 0, 0, 0, 0, 0, 1, 0], 'output': '🤔'},  # Thinking Face
    {'input': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], 'output': '🤢'},  # Nauseated Face
]

# Create and train model
nn = carbono(debug=True)

# Add layers with specified architecture
nn.layer(10, 128, 'sigmoid')
nn.layer(128, 1024, 'relu')
nn.layer(1024, 128, 'relu')
nn.layer(128, 10, 'softmax')  # Output layer (3 outputs for 3 classes)

# Train the model with adjusted parameters
training_summary = nn.train(train_set, {
    'epochs': 100,
    'learningRate': 0.0002,
    'printEveryEpochs': 10,
    'optimizer': 'adam',
    'lossFunction': 'cross-entropy'
})

# Export and download model
model_name = "emoji"
nn.save(model_name, True)
download(model_name)

Added layer: 10 → 128 with sigmoid activation
Added layer: 128 → 1024 with relu activation
Added layer: 1024 → 128 with relu activation
Added layer: 128 → 10 with softmax activation
Epoch [10/100], Loss: 2.275840
Epoch [20/100], Loss: 2.247234
Epoch [30/100], Loss: 2.211915
Epoch [40/100], Loss: 2.166773
Epoch [50/100], Loss: 2.108056
Epoch [60/100], Loss: 2.031236
Epoch [70/100], Loss: 1.932660
Epoch [80/100], Loss: 1.810511
Epoch [90/100], Loss: 1.662430
Epoch [100/100], Loss: 1.488777
Starting save process...
Model state: {'weightLayers': 4, 'biasLayers': 4, 'modelLayers': 4}
Using binary compression mode
Header values: {'metadataLength': 1075, 'padding': 1, 'weightLength': 264704, 'biasLength': 1290}
Binary file created: {'size': 1065068, 'path': 'emoji.uai'}
Save process completed successfully
