In [26]:
from IPython.display import FileLink, display
import torch.nn as torch_nn
from torch.utils.data import DataLoader
import numpy as np
import os, subprocess, time, json, torch
import struct

import requests
from PIL import Image
import io
import librosa
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer

def download(download_file_name):
    os.chdir(f"/kaggle/working/")
    name = f"{download_file_name}.uai"
    display(FileLink(f'{name}'))

class carbono:
    def __init__(self, debug=True):
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.layers = []
        self.model = None
        self.debug = debug
        self.labels = None
        self.details = {}

    def _is_url(self, input_str):
        """Check if input is a URL"""
        try:
            from urllib.parse import urlparse
            result = urlparse(input_str)
            return all([result.scheme, result.netloc])
        except:
            return False

    def _infer_content_type(self, url, content_type=None):
        """Infer content type from URL or content-type header"""
        if content_type and 'binary/octet-stream' not in content_type:
            if content_type.startswith('image/'): return 'image'
            if content_type.startswith('audio/'): return 'audio'
            if content_type.startswith('text/'): return 'text'

        extension = url.split('.')[-1].lower()
        if extension in ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp']:
            return 'image'
        elif extension in ['wav', 'mp3', 'ogg', 'aac', 'flac']:
            return 'audio'
        elif extension in ['txt', 'csv', 'json', 'html', 'xml']:
            return 'text'

        raise ValueError(f"Unable to infer content type for {url}")

    def _preprocess_image(self, content):
        """Preprocess image data"""
        img = Image.open(io.BytesIO(content))
        img = img.resize((32, 32))
        img = img.convert('RGB')
        img_array = np.array(img)
        normalized = img_array.reshape(-1) / 255.0
        return normalized

    def _preprocess_audio(self, content):
        """Preprocess audio data"""
        try:
            y, sr = librosa.load(io.BytesIO(content), sr=44100, duration=5)
            # Extract mel spectrogram features
            mel_spect = librosa.feature.melspectrogram(y=y, sr=sr)
            mel_spect_db = librosa.power_to_db(mel_spect, ref=np.max)
            # Flatten and normalize
            normalized = (mel_spect_db - mel_spect_db.min()) / (mel_spect_db.max() - mel_spect_db.min())
            return normalized.flatten()
        except Exception as e:
            raise ValueError(f"Error preprocessing audio: {str(e)}")

    def _preprocess_text(self, content):
        """Preprocess text data"""
        text = content.decode('utf-8')
        vectorizer = TfidfVectorizer(max_features=1024)
        tfidf_matrix = vectorizer.fit_transform([text])
        return tfidf_matrix.toarray()[0]

    def _pad_or_truncate(self, data, target_size):
        """Pad or truncate data to target size"""
        if len(data) > target_size:
            return data[:target_size]
        elif len(data) < target_size:
            return np.pad(data, (0, target_size - len(data)))
        return data

    async def preprocess_data(self, input_data):
        """Preprocess input data based on type"""
        if isinstance(input_data, str) and self._is_url(input_data):
            try:
                response = requests.get(input_data)
                response.raise_for_status()
                content_type = response.headers.get('Content-Type', '')
                inferred_type = self._infer_content_type(input_data, content_type)

                if inferred_type == 'image':
                    processed = self._preprocess_image(response.content)
                elif inferred_type == 'audio':
                    processed = self._preprocess_audio(response.content)
                elif inferred_type == 'text':
                    processed = self._preprocess_text(response.content)
                else:
                    raise ValueError(f"Unsupported data type for {input_data}")

                # Get target size from first layer if available
                target_size = self.layers[0]['input_size'] if self.layers else len(processed)
                return self._pad_or_truncate(processed, target_size)

            except Exception as e:
                raise ValueError(f"Error preprocessing data from {input_data}: {str(e)}")
        return input_data

    async def train_from_urls(self, train_set_urls, options=None):
        """Train model using URLs as input"""
        processed_train_set = []
        for data in train_set_urls:
            try:
                processed_input = await self.preprocess_data(data['url'])
                processed_train_set.append({
                    'input': processed_input,
                    'output': data['output']
                })
            except Exception as e:
                if self.debug:
                    print(f"Error processing {data['url']}: {str(e)}")
                continue

        return self.train(processed_train_set, options)
    
    def save_pytorch(self, filename='model'):
        filename = filename + '.pt'
        """Save model in PyTorch format"""
        torch.save(self.model.state_dict(), filename)
    
    def load_pytorch(self, filename='model.pt'):
        """Load model from PyTorch format"""
        if self.model is None:
            raise ValueError("Model architecture must be defined before loading weights")
        self.model.load_state_dict(torch.load(filename))
    
    def layer(self, input_size, output_size, activation='tanh'):
        """Add a layer to the network, similar to carbono.js"""
        self.layers.append({
            'input_size': input_size,
            'output_size': output_size,
            'activation': activation
        })

        # Check if layers are compatible
        if len(self.layers) > 1:
            prev_layer = self.layers[-2]
            if prev_layer['output_size'] != input_size:
                raise ValueError(f"Layer input size {input_size} doesn't match previous layer output size {prev_layer['output_size']}")

        # Build/rebuild model when layer is added
        self._build_model()
        
        if self.debug:
            print(f"Added layer: {input_size} → {output_size} with {activation} activation")

    def _build_model(self):
        """Build PyTorch model from layers"""
        if not self.layers:
            return
    
        layers = []
        for i, layer_info in enumerate(self.layers):
            # Add linear layer
            layers.append(torch_nn.Linear(layer_info['input_size'], layer_info['output_size']))
            
            # Add activation, but skip softmax for the last layer
            if i < len(self.layers) - 1:  # Only add activation for non-final layers
                if layer_info['activation'] == 'tanh':
                    layers.append(torch_nn.Tanh())
                elif layer_info['activation'] == 'relu':
                    layers.append(torch_nn.ReLU())
                elif layer_info['activation'] == 'sigmoid':
                    layers.append(torch_nn.Sigmoid())
    
        self.model = torch_nn.Sequential(*layers).to(self.device)

    def train(self, train_set, options=None):
        if options is None:
            options = {}
    
        # Default options
        epochs = options.get('epochs', 200)
        learning_rate = options.get('learningRate', 0.212)
        print_every_epochs = options.get('printEveryEpochs', 10)
        early_stop_threshold = options.get('earlyStopThreshold', 1e-6)
        optimizer_type = options.get('optimizer', 'adam')
        loss_function = options.get('lossFunction', 'cross-entropy')
    
        # Ensure train_set is a list
        if not isinstance(train_set, list):
            train_set = list(train_set)
    
        # Convert data to PyTorch format and handle labels
        if isinstance(train_set[0]['output'], str):
            # Get unique labels while preserving order of first appearance
            seen = set()
            self.labels = [x['output'] for x in train_set 
                          if not (x['output'] in seen or seen.add(x['output']))]
            label_to_idx = {label: i for i, label in enumerate(self.labels)}
            
            x_data = torch.tensor([item['input'] for item in train_set], dtype=torch.float32).to(self.device)
            y_data = torch.tensor([label_to_idx[item['output']] for item in train_set], dtype=torch.long).to(self.device)
            
            if self.debug:
                print(f"Training with {len(self.labels)} classes: {self.labels}")
        else:
            x_data = torch.tensor([item['input'] for item in train_set], dtype=torch.float32).to(self.device)
            y_data = torch.tensor([item['output'] for item in train_set], dtype=torch.float32).to(self.device)
            self.labels = None
    
        # Create DataLoader
        dataset = torch.utils.data.TensorDataset(x_data, y_data)
        train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
    
        # Loss function
        if loss_function == 'mse':
            criterion = torch_nn.MSELoss()
        elif loss_function == 'cross-entropy':
            criterion = torch_nn.CrossEntropyLoss()
    
        # Optimizer
        if optimizer_type == 'adam':
            optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
        else:
            optimizer = torch.optim.SGD(self.model.parameters(), lr=learning_rate)
    
        start_time = time.time()
    
        for epoch in range(epochs):
            total_loss = 0
            for inputs, targets in train_loader:
                optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                
                total_loss += loss.item()
    
            avg_loss = total_loss / len(train_loader)
            
            if (epoch + 1) % print_every_epochs == 0 and self.debug:
                print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.6f}')
    
            if avg_loss < early_stop_threshold:
                if self.debug:
                    print(f'Early stopping at epoch {epoch+1} with loss: {avg_loss:.6f}')
                break
    
        training_time = (time.time() - start_time) * 1000
    
        total_params = sum(p.numel() for p in self.model.parameters())
        self.details = {
            'loss': avg_loss,
            'parameters': total_params,
            'training': {
                'time': training_time,
                'epochs': epoch + 1,
                'learningRate': learning_rate
            },
            'layers': self.layers
        }
    
        if self.debug:
            print(f"Training completed. Model has {len(self.labels)} classes: {self.labels}")
        
        return self.details
        
    async def predict(self, input_data, tags=True):
        """Make predictions with support for URL inputs"""
        try:
            # Preprocess the input if it's a URL or raw data
            processed_input = await self.preprocess_data(input_data)
            
            # Convert to tensor and ensure correct shape
            with torch.no_grad():
                input_tensor = torch.tensor(processed_input, dtype=torch.float32).to(self.device)
                if len(input_tensor.shape) == 1:
                    input_tensor = input_tensor.unsqueeze(0)
                
                output = self.model(input_tensor)
                
                # Apply softmax for classification
                if self.labels is not None:
                    probabilities = torch.nn.functional.softmax(output, dim=1)
                    predictions = probabilities.cpu().numpy()[0]
                    
                    # # Debug output
                    # if self.debug:
                    #     print(f"Raw predictions shape: {predictions.shape}")
                    #     print(f"Number of labels: {len(self.labels)}")
                    #     print(f"Predictions: {predictions}")
                    #     print(f"Labels: {self.labels}")
                    
                    if tags:
                        # Ensure predictions match number of labels
                        predictions = predictions[:len(self.labels)]
                        # Return labeled probabilities sorted by probability
                        result = [
                            {
                                'label': self.labels[i],
                                'probability': float(predictions[i])
                            }
                            for i in range(len(self.labels))
                        ]
                        return sorted(result, key=lambda x: x['probability'], reverse=True)
                    
                    return predictions.tolist()
                
                # For regression, return raw outputs
                return output.cpu().numpy()[0].tolist()
                
        except Exception as e:
            if self.debug:
                print(f"Error during prediction: {str(e)}")
                print(f"Shape of output: {output.shape}")
                print(f"Number of labels: {len(self.labels) if self.labels else 'No labels'}")
                raise
            
    def save(self, filename='model'):
        filename = filename + '.uai'
        """Export model in carbono.js format"""
        carbono_model = {
            'weights': [],
            'biases': [],
            'layers': self.layers,
            'labels': self.labels,
            'details': self.details
        }
        
        current_layer = None
        for layer in self.model:
            if isinstance(layer, torch_nn.Linear):
                weights = layer.weight.detach().cpu().numpy().tolist()
                biases = layer.bias.detach().cpu().numpy().tolist()
                carbono_model['weights'].append(weights)
                carbono_model['biases'].append(biases)

        # Convert weights and biases to binary format
        weight_bin = b''.join([struct.pack('f', w) for layer in carbono_model['weights'] for row in layer for w in row])
        bias_bin = b''.join([struct.pack('f', b) for layer in carbono_model['biases'] for b in layer])

        # Prepare metadata
        metadata = {
            'layers': self.layers,
            'details': self.details,
            'layerInfo': {
                'weightShapes': [list(map(len, [layer, layer[0]])) for layer in carbono_model['weights']],
                'biasShapes': [len(layer) for layer in carbono_model['biases']]
            },
            'labels': self.labels
        }

        # Combine metadata and binary data
        metadata_str = json.dumps(metadata)
        separator = b'\n---BINARY_SEPARATOR---\n'
        binary_data = metadata_str.encode('utf-8') + separator + weight_bin + bias_bin

        # Save to file
        with open(filename, 'wb') as f:
            f.write(binary_data)

    def load(self, filename):
        """Load model from carbono.js format"""
        with open(filename, 'rb') as f:
            data = f.read()

        # Find separator
        separator = b'\n---BINARY_SEPARATOR---\n'
        sep_index = data.find(separator)
        if sep_index == -1:
            raise ValueError("Invalid file format")

        # Extract metadata and binary data
        metadata_str = data[:sep_index].decode('utf-8')
        binary_data = data[sep_index + len(separator):]

        # Parse metadata
        metadata = json.loads(metadata_str)
        self.layers = metadata['layers']
        self.details = metadata['details']
        self.labels = metadata.get('labels', None)

        # Rebuild model
        self._build_model()

        # Extract weights and biases
        weight_shapes = metadata['layerInfo']['weightShapes']
        bias_shapes = metadata['layerInfo']['biasShapes']

        # Reconstruct weights and biases
        weight_size = sum(shape[0] * shape[1] for shape in weight_shapes)
        bias_size = sum(shape for shape in bias_shapes)

        weights = struct.unpack('f' * weight_size, binary_data[:weight_size * 4])
        biases = struct.unpack('f' * bias_size, binary_data[weight_size * 4:])

        # Assign weights and biases to model
        weight_index = 0
        bias_index = 0
        layer_index = 0
        for layer in self.model:
            if isinstance(layer, torch_nn.Linear):
                # Assign weights
                weight_shape = weight_shapes[layer_index]
                weight_values = weights[weight_index:weight_index + weight_shape[0] * weight_shape[1]]
                weight_tensor = torch.tensor(weight_values, dtype=torch.float32).reshape(weight_shape[0], weight_shape[1])
                layer.weight.data = weight_tensor.to(self.device)
                weight_index += weight_shape[0] * weight_shape[1]

                # Assign biases
                bias_shape = bias_shapes[layer_index]
                bias_values = biases[bias_index:bias_index + bias_shape]
                bias_tensor = torch.tensor(bias_values, dtype=torch.float32)
                layer.bias.data = bias_tensor.to(self.device)
                bias_index += bias_shape

                layer_index += 1

        if self.debug:
            print("Model loaded successfully!")

    def info(self, info_updates):
        """Update model metadata"""
        if 'info' not in self.details:
            self.details['info'] = {}
        self.details['info'].update(info_updates)

In [29]:
model = carbono()
model.layer(1024, 24, 'sigmoid')
model.layer(24, 4, 'softmax')

train_set_urls = [
    {'url': 'https://cdn.pixabay.com/photo/2018/03/31/06/31/dog-3277416_1280.jpg', 'output': 'dog image'},
    {'url': 'https://cdn.pixabay.com/photo/2014/11/30/14/11/cat-551554_1280.jpg', 'output': 'cat image'},
    {'url': 'https://cdn.jsdelivr.net/gh/lunu-bounir/audio-equalizer/test/left.ogg', 'output': 'left audio'},
    {'url': 'https://raw.githubusercontent.com/appvoid/carbono/refs/heads/main/examples.md', 'output': 'markdown text'},
]

await model.train_from_urls(train_set_urls, {
    'epochs': 10,
    'optimizer': 'adam',
    'learningRate': 0.1,
    'printEveryEpochs': 1
})

# Iterate over each item in the train_set_urls
for item in train_set_urls:
    url = item['url']
    expected_output = item['output']
    
    # Predict the output for the current URL
    result = await model.predict(url)
    
    # Print the prediction along with the expected output
    print(f'Expected Output: {expected_output}')
    print(f'Prediction: {result}')
    print('---')

# Export and download model
model_name = "multimodal"
model.save(model_name)
download(model_name)

Added layer: 1024 → 24 with sigmoid activation
Added layer: 24 → 4 with softmax activation
Training with 4 classes: ['dog image', 'cat image', 'left audio', 'markdown text']
Epoch [1/10], Loss: 1.436800
Epoch [2/10], Loss: 1.398230
Epoch [3/10], Loss: 0.947650
Epoch [4/10], Loss: 0.709429
Epoch [5/10], Loss: 0.595756
Epoch [6/10], Loss: 0.472842
Epoch [7/10], Loss: 0.351180
Epoch [8/10], Loss: 0.240556
Epoch [9/10], Loss: 0.153561
Epoch [10/10], Loss: 0.094467
Training completed. Model has 4 classes: ['dog image', 'cat image', 'left audio', 'markdown text']
Expected Output: dog image
Prediction: [{'label': 'dog image', 'probability': 0.9542640447616577}, {'label': 'left audio', 'probability': 0.03245037421584129}, {'label': 'markdown text', 'probability': 0.010960397310554981}, {'label': 'cat image', 'probability': 0.00232517602853477}]
---
Expected Output: cat image
Prediction: [{'label': 'cat image', 'probability': 0.9913267493247986}, {'label': 'dog image', 'probability': 0.00400280