In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
import wandb
import h5py
import os
import time
import json
from datetime import datetime

In [2]:
class Generator(nn.Module):
    def __init__(self, x_dim, vocab_sizes, nb_numeric, h_dims, z_dim):
        """
        Generator network for conditional GAN
        Args:
            x_dim: Dimension of output data
            vocab_sizes: List of vocabulary sizes for each categorical variable
            nb_numeric: Number of numeric covariates
            h_dims: List of hidden dimensions
            z_dim: Dimension of latent noise vector
        """
        super(Generator, self).__init__()
        
        # Embedding layers for categorical variables
        self.embeddings = nn.ModuleList([
            nn.Embedding(vocab_size, min(50, vocab_size)) 
            for vocab_size in vocab_sizes
        ])
        
        # Calculate total embedding dimension
        embedding_dim = sum(min(50, vocab_size) for vocab_size in vocab_sizes)
        
        # Input dimension is latent dim + embedding dim + numeric covariates
        input_dim = z_dim + embedding_dim + nb_numeric
        
        # Build generator network
        layers = []
        current_dim = input_dim
        
        # Add hidden layers
        for h_dim in h_dims:
            layers.extend([
                nn.Linear(current_dim, h_dim),
                nn.BatchNorm1d(h_dim),
                nn.ReLU()
            ])
            current_dim = h_dim
        
        # Output layer
        layers.append(nn.Linear(current_dim, x_dim))
        
        self.network = nn.Sequential(*layers)

    def forward(self, z, cat_covs, num_covs):
        # Process categorical covariates through embeddings
        embeddings = [emb(cat_covs[:, i]) for i, emb in enumerate(self.embeddings)]
        embedded = torch.cat(embeddings, dim=1)
        
        # Concatenate all inputs
        gen_input = torch.cat([z, embedded, num_covs], dim=1)
        
        # Generate output
        return self.network(gen_input)

In [3]:
class Discriminator(nn.Module):
    def __init__(self, x_dim, vocab_sizes, nb_numeric, h_dims):
        """
        Discriminator network for conditional GAN
        Args:
            x_dim: Dimension of input data
            vocab_sizes: List of vocabulary sizes for each categorical variable
            nb_numeric: Number of numeric covariates
            h_dims: List of hidden dimensions
        """
        super(Discriminator, self).__init__()
        
        # Embedding layers for categorical variables
        self.embeddings = nn.ModuleList([
            nn.Embedding(vocab_size, min(50, vocab_size))
            for vocab_size in vocab_sizes
        ])
        
        # Calculate total embedding dimension
        embedding_dim = sum(min(50, vocab_size) for vocab_size in vocab_sizes)
        
        # Input dimension is data dim + embedding dim + numeric covariates
        input_dim = x_dim + embedding_dim + nb_numeric
        
        # Build discriminator network
        layers = []
        current_dim = input_dim
        
        # Add hidden layers
        for h_dim in h_dims:
            layers.extend([
                nn.Linear(current_dim, h_dim),
                nn.LeakyReLU(0.2),
                nn.Dropout(0.3)
            ])
            current_dim = h_dim
        
        # Output layer
        layers.append(nn.Linear(current_dim, 1))
        
        self.network = nn.Sequential(*layers)

    def forward(self, x, cat_covs, num_covs):
        # Process categorical covariates through embeddings
        embeddings = [emb(cat_covs[:, i]) for i, emb in enumerate(self.embeddings)]
        embedded = torch.cat(embeddings, dim=1)
        
        # Concatenate all inputs
        disc_input = torch.cat([x, embedded, num_covs], dim=1)
        
        # Generate output
        return self.network(disc_input)


In [4]:
def train_gan(generator, discriminator, dataloader, cat_covs, num_covs, 
              config, device, score_fn=None, save_fn=None):
    """
    Train the conditional GAN with progress tracking and proper device handling
    """
    
    # Optimizers
    g_optimizer = optim.RMSprop(generator.parameters(), lr=config['lr'])
    d_optimizer = optim.RMSprop(discriminator.parameters(), lr=config['lr'])
    
    # Lambda for gradient penalty
    lambda_gp = 10
    
    # Convert covariates to tensors and move to device
    cat_covs = torch.tensor(cat_covs, dtype=torch.long).to(device)
    num_covs = torch.tensor(num_covs, dtype=torch.float32).to(device)
    
    total_batches = len(dataloader)
    
    print(f"Starting training for {config['epochs']} epochs...")
    print(f"Total batches per epoch: {total_batches}")
    print(f"Using device: {device}")
    
    for epoch in range(config['epochs']):
        d_losses = []
        g_losses = []
        print(f"\nEpoch [{epoch+1}/{config['epochs']}]")
        
        for batch_idx, (real_data,) in enumerate(dataloader):
            batch_size = real_data.size(0)
            
            # Move real data to device
            real_data = real_data.to(device)
            
            # Get random batch of categorical and numerical covariates
            batch_indices = torch.randint(0, cat_covs.size(0), (batch_size,))
            batch_cat_covs = cat_covs[batch_indices]
            batch_num_covs = num_covs[batch_indices]
            
            # Train Discriminator
            for _ in range(config['nb_critic']):
                d_optimizer.zero_grad()
                
                # Generate fake data
                z = torch.randn(batch_size, config['latent_dim']).to(device)
                fake_data = generator(z, batch_cat_covs, batch_num_covs)
                
                # Calculate discriminator output for real and fake data
                real_validity = discriminator(real_data, batch_cat_covs, batch_num_covs)
                fake_validity = discriminator(fake_data.detach(), batch_cat_covs, batch_num_covs)
                
                # Calculate gradient penalty
                gp = compute_gradient_penalty(
                    discriminator,
                    real_data,
                    fake_data.detach(),
                    batch_cat_covs,
                    batch_num_covs,
                    device)
                
                # Calculate discriminator loss with gradient penalty
                d_loss = -torch.mean(real_validity) + torch.mean(fake_validity) + lambda_gp * gp
                
                d_loss.backward()
                d_optimizer.step()
                
                d_losses.append(d_loss.item())
            
            # Train Generator
            g_optimizer.zero_grad()
            
            # Generate fake data
            z = torch.randn(batch_size, config['latent_dim']).to(device)
            fake_data = generator(z, batch_cat_covs, batch_num_covs)
            
            # Calculate generator loss
            fake_validity = discriminator(fake_data, batch_cat_covs, batch_num_covs)
            g_loss = -torch.mean(fake_validity)
            
            g_loss.backward()
            g_optimizer.step()
            
            g_losses.append(g_loss.item())
            
            # Print progress every 10 batches
            if batch_idx % 10 == 0:
                print(f"  Batch [{batch_idx}/{total_batches}] " \
                      f"D_loss: {d_loss.item():.4f}, " \
                      f"G_loss: {g_loss.item():.4f}")
        
        # Print epoch summary
        avg_d_loss = np.mean(d_losses)
        avg_g_loss = np.mean(g_losses)
        print(f"\nEpoch {epoch+1} Summary:")
        print(f"  Average D_loss: {avg_d_loss:.4f}")
        print(f"  Average G_loss: {avg_g_loss:.4f}")
        
        # Log metrics
        if wandb.run is not None:
            wandb.log({
                'epoch': epoch,
                'd_loss': np.mean(d_losses),
                'g_loss': np.mean(g_losses)
            })
        
        # Evaluate and save model if needed
        if score_fn is not None and epoch % 10 == 0:
            score = score_fn(generator)
            print(f'Epoch {epoch}: Score = {score:.4f}')
        
        if save_fn is not None and epoch % 20 == 0:
            save_fn(generator, discriminator, epoch)

In [5]:
def main(selected_categories=None):
    """
    Train the GAN with selected categorical variables
    Args:
        selected_categories: List of column names to use as categorical variables.
                           If None, uses all columns except 'cell_id'
    """
    # Configuration
    CONFIG = {
        'epochs': 100,
        'latent_dim': 64,
        'batch_size': 32,
        'nb_layers': 3,
        'hdim': 256,
        'lr': 1e-4,
        'nb_critic': 5,
        'lambda_gp': 10  # Gradient penalty coefficient
    }
    
    # Device configuration
    if torch.cuda.is_available():
        device = torch.device('cuda')
    elif torch.backends.mps.is_available():
        device = torch.device('mps')
    else:
        device = torch.device('cpu')
    
    print(f"Using device: {device}")
    
    # Load data
    data_path = "/Users/guyshani/Documents/PHD/Aim_2/10x_data_mouse/20_1_2025__normalized/"
    
    # Load expression matrix
    # matrix with cells as columns and genes as rows
    with h5py.File(data_path+'combined_normalized_data.h5', 'r') as f:
        x_train = f['matrix'][:]
    
    # Load all categorical variables from single file
    cat_data = pd.read_csv(data_path+'combined_metadata.csv', sep=';')
    print("Categorical data shape:", cat_data.shape)
    print("Available categorical variables:", [col for col in cat_data.columns if col != 'cell_id'])
    
    # Determine which categories to use
    if selected_categories is None:
        # Use all columns except cell_id
        categories_to_use = [col for col in cat_data.columns if col != 'cell_id']
    else:
        # Validate selected categories
        invalid_categories = [cat for cat in selected_categories if cat not in cat_data.columns]
        if invalid_categories:
            raise ValueError(f"Invalid categories: {invalid_categories}")
        categories_to_use = selected_categories
    
    print(f"\nUsing categorical variables: {categories_to_use}")
    
    # Create dictionaries and inverse mappings for categorical variables
    cat_dicts = []
    encoded_covs = []
    
    # Process each selected column as a categorical variable
    for column in categories_to_use:
        # Get the column data
        cat_vec = cat_data[column]
        print(f"\nProcessing categorical variable: {column}")
        
        # Create list of unique category names, sorted
        dict_inv = np.array(list(sorted(set(cat_vec.values))))
        dict_map = {t: i for i, t in enumerate(dict_inv)}
        cat_dicts.append(dict_inv)
        
        # Convert categorical variables to integers
        encoded = np.vectorize(lambda t: dict_map[t])(cat_vec)
        encoded = encoded.reshape(-1, 1)  # Reshape to column vector
        encoded_covs.append(encoded)
        
        print(f"Categories in {column}:", dict_inv)
        print(f"Number of categories:", len(dict_inv))
    
    # Combine all categorical covariates
    cat_covs = np.hstack(encoded_covs)
    print("\nCombined categorical covariates shape:", cat_covs.shape)
    
    # Load numerical covariates (currently empty)
    num_covs = np.zeros((x_train.shape[0], 0))
    
    # Convert data to PyTorch tensors and move to device
    x_train = torch.tensor(x_train, dtype=torch.float32)  # Keep on CPU for DataLoader
    
    # Create data loader
    train_dataset = TensorDataset(x_train)
    train_loader = DataLoader(
        train_dataset, 
        batch_size=CONFIG['batch_size'],
        shuffle=True,
        drop_last=True
    )
    
    # Initialize models
    vocab_sizes = [len(c) for c in cat_dicts]
    print("\nVocabulary sizes for categorical variables:", vocab_sizes)
    nb_numeric = num_covs.shape[-1]
    x_dim = x_train.shape[-1]
    
    generator = Generator(
        x_dim=x_dim,
        vocab_sizes=vocab_sizes,
        nb_numeric=nb_numeric,
        h_dims=[CONFIG['hdim']] * CONFIG['nb_layers'],
        z_dim=CONFIG['latent_dim']).to(device)
    
    discriminator = Discriminator(
        x_dim=x_dim,
        vocab_sizes=vocab_sizes,
        nb_numeric=nb_numeric,
        h_dims=[CONFIG['hdim']] * CONFIG['nb_layers']).to(device)
    
    # Define save function
    def save_models(generator, discriminator, epoch):
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        # create save directory
        categories_str = "+".join(categories_to_use)
        save_dir = os.path.join(data_path, "saved_models")
        os.makedirs(save_dir, exist_ok=True)
        # Create run folder
        run_dir = os.path.join(save_dir, f"run_{timestamp}_{categories_str}")
        os.makedirs(run_dir, exist_ok=True)

        # Save model initialization parameters
        model_config = {
            'x_dim': x_dim,
            'vocab_sizes': vocab_sizes,
            'nb_numeric': nb_numeric,
            'h_dims': [CONFIG['hdim']] * CONFIG['nb_layers'],
            'z_dim': CONFIG['latent_dim'],
            'categories': categories_to_use,
            'training_config': CONFIG}
        config_path = os.path.join(run_dir, 'model_config.json')
        with open(config_path, 'w') as f:
            json.dump(model_config, f, indent=4)
        
        # Save generator
        generator_path = os.path.join(run_dir, f"generator_{timestamp}_{categories_str}_epoch_{epoch+1}.pt")
        torch.save(generator.state_dict(), generator_path)
        
        
        # Save discriminator
        discriminator_path = os.path.join(run_dir, f"discriminator_{timestamp}_{categories_str}_epoch_{epoch+1}.pt")
        torch.save(discriminator.state_dict(), discriminator_path)
        
        print(f"\nModels saved at epoch {epoch + 1}:")
        print(f"Generator: {generator_path}")
        print(f"Discriminator: {discriminator_path}")
        
        # Log to wandb
        if wandb.run is not None:
            wandb.save(generator_path)
            wandb.save(discriminator_path)

    # Initialize wandb with unique run name
    run_name = f"run_{int(time.time())}"  # Uses timestamp for unique name
    wandb.init(
        project='adversarial_gene_expr',
        config=CONFIG,
        name=run_name,
        reinit=True  # Ensures new run each time
    )
    
    # Add selected categories to wandb config
    wandb.config.update({'selected_categories': categories_to_use})
    
    # Train model
    train_gan(
        generator=generator,
        discriminator=discriminator,
        dataloader=train_loader,
        cat_covs=cat_covs,
        num_covs=num_covs,
        config=CONFIG,
        device=device,
        save_fn=save_models
        #save_fn=None
    )

if __name__ == '__main__':
    # Example usage:
    # Use specific categories:
    main(selected_categories=['dataset','cell_type'])
    
    # Or use all available categories:
    # main()

Using device: mps
Categorical data shape: (41588, 4)
Available categorical variables: ['dataset', 'cluster', 'cell_type']

Using categorical variables: ['dataset', 'cell_type']

Processing categorical variable: dataset
Categories in dataset: ['dataset1' 'dataset2' 'dataset3' 'dataset4' 'dataset5' 'dataset6'
 'dataset7']
Number of categories: 7

Processing categorical variable: cell_type
Categories in cell_type: ['B cells' 'Dendritic cells' 'Endothelial cells' 'Erythrocytes'
 'Fibroblasts' 'Granulocytes' 'Macrophages' 'Monocytes' 'NK cells'
 'T cells']
Number of categories: 10

Combined categorical covariates shape: (41588, 2)

Vocabulary sizes for categorical variables: [7, 10]


[34m[1mwandb[0m: Currently logged in as: [33mguyshani3[0m ([33mguyshani-tel-aviv-university[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Starting training for 100 epochs...
Total batches per epoch: 1299
Using device: mps

Epoch [1/100]
  Batch [0/1299] D_loss: -0.5605, G_loss: 0.3044
  Batch [10/1299] D_loss: -2.8216, G_loss: 1.5951
  Batch [20/1299] D_loss: -2.2083, G_loss: 1.3289
  Batch [30/1299] D_loss: -2.0722, G_loss: 1.2758
  Batch [40/1299] D_loss: -1.0944, G_loss: 0.0772
  Batch [50/1299] D_loss: -1.0580, G_loss: -0.3197
  Batch [60/1299] D_loss: -0.9674, G_loss: 0.3070
  Batch [70/1299] D_loss: -1.5179, G_loss: 0.1291
  Batch [80/1299] D_loss: -0.6571, G_loss: 0.1424
  Batch [90/1299] D_loss: -1.0714, G_loss: -0.0338
  Batch [100/1299] D_loss: -1.0053, G_loss: -0.4887
  Batch [110/1299] D_loss: -1.5703, G_loss: -0.2438
  Batch [120/1299] D_loss: -1.1552, G_loss: 1.2693
  Batch [130/1299] D_loss: -1.2954, G_loss: 0.7968
  Batch [140/1299] D_loss: -1.1264, G_loss: 1.4317
  Batch [150/1299] D_loss: -0.9933, G_loss: 1.5298
  Batch [160/1299] D_loss: -0.9129, G_loss: -0.2407
  Batch [170/1299] D_loss: -0.8028, G_lo




Epoch 1 Summary:
  Average D_loss: -0.2844
  Average G_loss: 0.2044

Models saved at epoch 1:
Generator: /Users/guyshani/Documents/PHD/Aim_2/10x_data_mouse/20_1_2025__normalized/saved_models/run_20250120_142110_dataset+cell_type/generator_20250120_142110_dataset+cell_type_epoch_1.pt
Discriminator: /Users/guyshani/Documents/PHD/Aim_2/10x_data_mouse/20_1_2025__normalized/saved_models/run_20250120_142110_dataset+cell_type/discriminator_20250120_142110_dataset+cell_type_epoch_1.pt

Epoch [2/100]
  Batch [0/1299] D_loss: -1.7853, G_loss: -1.0287
  Batch [10/1299] D_loss: -0.2461, G_loss: 0.5622
  Batch [20/1299] D_loss: 0.0149, G_loss: 0.2563
  Batch [30/1299] D_loss: -0.0716, G_loss: -0.6569
  Batch [40/1299] D_loss: -0.0680, G_loss: 0.3703
  Batch [50/1299] D_loss: -0.1452, G_loss: -0.9772
  Batch [60/1299] D_loss: -0.4741, G_loss: -0.7263
  Batch [70/1299] D_loss: -0.6283, G_loss: 0.1565
  Batch [80/1299] D_loss: -0.3867, G_loss: -0.5935
  Batch [90/1299] D_loss: -0.6718, G_loss: -0.484

In [10]:
### Functions for data gneration
def inspect_generator_dims(generator):
    """
    Inspect the generator's dimensions and architecture
    
    Parameters:
        generator: Generator model
    
    Returns:
        dict containing dimension information
    """
    # Get embedding dimensions
    embedding_dims = [emb.embedding_dim for emb in generator.embeddings]
    total_embedding_dim = sum(embedding_dims)
    
    # Get first layer dimension
    first_layer_in_dim = generator.network[0].in_features
    
    return {
        'embedding_dims': embedding_dims,
        'total_embedding_dim': total_embedding_dim,
        'first_layer_in_dim': first_layer_in_dim,
        'recommended_latent_dim': first_layer_in_dim - total_embedding_dim
    }

def generate_expression_profiles(generator, n_samples, dataset_category, device='mps', debug=False):
    """
    Generate gene expression profiles using the trained cWGAN generator
    
    Parameters:
        generator: Trained Generator model
        n_samples: Number of profiles to generate
        dataset_category: Integer indicating which dataset category to generate (0-6 for dataset1-dataset7)
        device: Device to run generation on ('cuda', 'mps', or 'cpu')
        debug: If True, print debugging information
    
    Returns:
        numpy array of generated expression profiles with shape (n_samples, n_genes)
    """
    # Set generator to eval mode
    generator.eval()
    
    # Inspect dimensions
    dims = inspect_generator_dims(generator)
    
    if debug:
        print("Generator dimensions:")
        for k, v in dims.items():
            print(f"{k}: {v}")
    
    # Create latent vectors
    latent_dim = dims['recommended_latent_dim']
    z = torch.randn(n_samples, latent_dim, device=device)
    
    if debug:
        print(f"\nLatent vector shape: {z.shape}")
    
    # Create categorical condition tensor
    cat_covs = torch.full((n_samples, 1), dataset_category, dtype=torch.long, device=device)
    
    if debug:
        print(f"Categorical covariates shape: {cat_covs.shape}")
    
    # Create empty numeric covariates tensor
    num_covs = torch.zeros((n_samples, 0), device=device)
    
    # Generate samples
    try:
        with torch.no_grad():
            # Get embeddings
            embeddings = [emb(cat_covs[:, i]) for i, emb in enumerate(generator.embeddings)]
            embedded = torch.cat(embeddings, dim=1)
            
            if debug:
                print(f"Embedded shape: {embedded.shape}")
            
            # Concatenate inputs
            gen_input = torch.cat([z, embedded, num_covs], dim=1)
            
            if debug:
                print(f"Generator input shape: {gen_input.shape}")
                print(f"First layer input dim: {generator.network[0].in_features}")
                print(f"First layer weight shape: {generator.network[0].weight.shape}")
            
            # Generate samples
            fake_samples = generator.network(gen_input)
            
    except RuntimeError as e:
        print("\nError during generation:")
        print(e)
        print("\nGenerator architecture:")
        print(generator)
        raise
    
    # Convert to numpy array
    return fake_samples.cpu().numpy()

def generate_and_save_profiles(generator, n_samples_per_category, save_path, device='mps', debug=False):
    """
    Generate expression profiles for all dataset categories and save to file
    
    Parameters:
        generator: Trained Generator model
        n_samples_per_category: Number of samples to generate per dataset category
        save_path: Path to save the generated profiles
        device: Device to run generation on ('cuda', 'mps', or 'cpu')
        debug: If True, print debugging information
    """
    all_samples = []
    all_categories = []
    
    # Generate samples for each dataset category
    for category in range(7):  # 7 datasets (dataset1-dataset7)
        if debug:
            print(f"\nGenerating samples for dataset{category+1}")
        
        samples = generate_expression_profiles(
            generator, 
            n_samples_per_category, 
            category, 
            device,
            debug=debug
        )
        all_samples.append(samples)
        all_categories.extend([f'dataset{category+1}'] * n_samples_per_category)
    # Print saved data path
    print("Save location: "+str(save_path))

    # Combine all samples
    all_samples = np.vstack(all_samples)
    
    # Save generated profiles
    np.save(f'{save_path}_profiles.npy', all_samples)
    
    # Save category labels
    with open(f'{save_path}_categories.txt', 'w') as f:
        for category in all_categories:
            f.write(f'{category}\n')
            
    return all_samples, all_categories

In [16]:
# Generate data

# Set directories
# 2 hidden layers
#run_dir = "/Users/guyshani/Documents/PHD/Aim_2/10x_data_mouse/13_1_2025__normalized/saved_models/run_20250113_114232_dataset/"
#generator_model = "generator_20250113_114232_dataset.pt"
# 3 hidden layers
run_dir = "/Users/guyshani/Documents/PHD/Aim_2/10x_data_mouse/13_1_2025__normalized/saved_models/run_20250113_135205_dataset/"
generator_model = "generator_20250113_135205_dataset_epoch_51.pt"

# Device configuration
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')
    
print(f"Using device: {device}")

# Load configuration
config_path = os.path.join(run_dir, 'model_config.json')
with open(config_path, 'r') as f:
    model_config = json.load(f)
    
# Initialize models with saved configuration
generator = Generator(
    x_dim=model_config['x_dim'],
    vocab_sizes=model_config['vocab_sizes'],
    nb_numeric=model_config['nb_numeric'],
    h_dims=model_config['h_dims'],
    z_dim=model_config['z_dim']).to(device)
    
discriminator = Discriminator(
    x_dim=model_config['x_dim'],
    vocab_sizes=model_config['vocab_sizes'],
    nb_numeric=model_config['nb_numeric'],
    h_dims=model_config['h_dims']).to(device)



#discriminator_path = os.path.join(run_dir, "discriminator.pt")
#discriminator.load_state_dict(torch.load(discriminator_path, map_location=device, weights_only=True))
generator_path = os.path.join(run_dir, generator_model)
generator.load_state_dict(torch.load(generator_path, map_location=device, weights_only=True))


all_samples, categories = generate_and_save_profiles(
    generator,
    n_samples_per_category=1000,
    save_path=run_dir+'generated_data',
    debug=False  # Enable debug output
)

Using device: mps
Save location: /Users/guyshani/Documents/PHD/Aim_2/10x_data_mouse/13_1_2025__normalized/saved_models/run_20250113_135205_dataset/generated_data


In [17]:
## Load generated data
# Load the generated profiles
profiles = np.load(run_dir + 'generated_data_profiles.npy')

# Load categories
with open(run_dir + 'generated_data_categories.txt', 'r') as f:
    categories = [line.strip() for line in f]

# Convert to pandas DataFrame
df = pd.DataFrame(profiles)

# Add categories as a column
df['dataset'] = categories
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,991,992,993,994,995,996,997,998,999,dataset
0,0.029197,0.320732,0.105634,0.059878,-0.033626,0.315196,0.355873,0.098052,1.415885,0.138555,...,0.038606,0.017874,0.049724,0.033215,0.174404,0.011404,0.013922,0.012524,-0.024358,dataset1
1,0.795014,11.064900,7.698052,6.852327,0.702508,1.186466,0.304192,6.257954,3.651256,5.293654,...,-0.029625,-0.164511,-0.066902,-0.005725,-0.053271,0.016862,0.056550,-0.123670,-0.149712,dataset1
2,0.100273,2.477551,1.751069,1.583150,-0.010378,0.883565,0.253218,1.477944,1.379792,1.206260,...,0.014636,-0.010550,0.019563,-0.004031,-0.040558,0.042852,0.056843,-0.005354,0.013900,dataset1
3,-0.034639,1.667600,1.251640,1.145788,-0.147350,-0.009043,0.021922,1.158313,0.585592,0.775481,...,-0.008856,-0.005037,-0.006993,0.000234,0.022809,0.020079,0.043271,-0.002714,0.044753,dataset1
4,0.099730,2.358865,1.671180,1.510806,-0.010824,0.802133,0.232048,1.419516,1.297776,1.144986,...,0.013183,-0.012915,0.016879,-0.003016,-0.038905,0.038836,0.054853,-0.007304,0.010891,dataset1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6995,0.168555,3.913768,2.765483,2.494067,-0.015362,0.981787,0.322876,2.348282,1.871573,1.913221,...,0.005220,-0.019285,-0.025493,-0.022838,-0.026965,0.032246,0.085965,0.015422,0.032537,dataset7
6996,2.949266,6.560752,4.519499,4.007252,0.760103,-5.030645,0.517229,4.105344,0.180469,3.276859,...,-0.082215,-0.041214,0.152816,0.310174,0.466038,-0.133744,-0.015720,-0.038499,0.360135,dataset7
6997,0.381713,5.291831,3.660735,3.238459,0.432806,1.683521,0.287335,2.875701,2.568585,2.648107,...,-0.003065,0.001313,-0.005536,-0.044282,0.051976,0.068840,0.014409,-0.000424,-0.000549,dataset7
6998,-1.652966,-0.480020,-0.284708,-0.158320,-0.800277,0.109397,-0.567243,-0.265171,-0.451420,0.389981,...,-0.055769,0.094848,0.044443,-0.125318,-0.165968,0.050426,0.132208,0.117812,-0.112642,dataset7
