In [6]:
 #Model Comparison for Tune Similarity
# This notebook compares different models on tune similarity tasks using positive/negative sets

import pandas as pd
import numpy as np
import tensorflow as tf
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import PCA
import collections
import random
from typing import List, Tuple, Dict, Any
import warnings
from tf_sentence_transformers import SentenceTransformer
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)
random.seed(42)

print("Libraries imported successfully!")

## 1. Data Loading and Preparation

# Load your tokenized dataset
df = pd.read_pickle('/home/devcontainers/git/Tune_Similarity/notebooks/embedding/data/tokenized_dataset.pkl')

# Load the vocabulary
with open("/home/devcontainers/git/Tune_Similarity/notebooks/embedding/note_vocab.pkl", "rb") as f:
    vocab_list = pickle.load(f)

# Create note lookup layer
note_lookup = tf.keras.layers.StringLookup(
    vocabulary=vocab_list,
    mask_token=None,
    oov_token="[UNK]"
)

print(f"Dataset shape: {df.shape}")
print(f"Vocabulary size: {len(vocab_list)}")
print(f"Number of unique tunes: {df.tune_id.nunique()}")
print(f"Number of unique tune types: {df.type.nunique()}")

## 2. Create Positive and Negative Sets

def create_positive_negative_sets(df: pd.DataFrame, 
                                 min_settings_per_tune: int = 2,
                                 max_pairs_per_tune: int = 10,
                                 negative_ratio: float = 2.0) -> Tuple[List, List]:
    """
    Create positive pairs (same tune_id) and negative pairs (different tune_id, potentially different type)
    
    Returns:
        positive_pairs: List of tuples (idx1, idx2) where both belong to same tune
        negative_pairs: List of tuples (idx1, idx2) where they belong to different tunes
    """
    # Group by tune_id
    tune_groups = df.groupby('tune_id').groups
    
    # Filter tunes with at least min_settings_per_tune settings
    valid_tunes = {tune_id: indices for tune_id, indices in tune_groups.items() 
                   if len(indices) >= min_settings_per_tune}
    
    print(f"Found {len(valid_tunes)} tunes with at least {min_settings_per_tune} settings")
    
    positive_pairs = []
    negative_pairs = []
    
    # Create positive pairs (same tune, different settings)
    for tune_id, indices in valid_tunes.items():
        indices_list = list(indices)
        # Create all possible pairs, but limit to max_pairs_per_tune
        for i in range(len(indices_list)):
            for j in range(i+1, len(indices_list)):
                positive_pairs.append((indices_list[i], indices_list[j]))
                if len(positive_pairs) % max_pairs_per_tune == 0:
                    break
            if len(positive_pairs) % max_pairs_per_tune == 0:
                break
    
    # Create negative pairs (different tunes)
    all_indices = [idx for indices in valid_tunes.values() for idx in indices]
    tune_id_map = {idx: df.loc[idx, 'tune_id'] for idx in all_indices}
    
    target_negative_pairs = int(len(positive_pairs) * negative_ratio)
    
    while len(negative_pairs) < target_negative_pairs:
        idx1, idx2 = random.sample(all_indices, 2)
        if tune_id_map[idx1] != tune_id_map[idx2]:
            negative_pairs.append((idx1, idx2))
    
    print(f"Created {len(positive_pairs)} positive pairs and {len(negative_pairs)} negative pairs")
    
    return positive_pairs, negative_pairs

# Create the positive and negative sets
positive_pairs, negative_pairs = create_positive_negative_sets(df)

Libraries imported successfully!
Dataset shape: (50473, 14)
Vocabulary size: 64
Number of unique tunes: 22027
Number of unique tune types: 12
Found 9305 tunes with at least 2 settings
Created 32944 positive pairs and 65888 negative pairs


In [None]:
# load up both oob model and custom embedding model
oob_embed = SentenceTransformer.from_pretrained(
    "sentence-transformers/all-MiniLM-L6-v2"
)


In [17]:
# Try this immediate fix for your current model
def load_with_custom_objects(model_path):
    """
    Load model with custom objects to handle Lambda layers
    """
    
    custom_objects = {
        # Handle the Lambda layer functions
        'lambda_2': lambda x: tf.expand_dims(x, -1),
        'lambda_3': lambda z: tf.math.l2_normalize(z, axis=1),
        
        # If you have custom loss function
        'loss': lambda y_true, y_pred: tf.constant(0.0),  # Dummy loss for loading
    }
    
    try:
        model = tf.keras.models.load_model(
            model_path, 
            custom_objects=custom_objects, 
            compile=False  # Don't compile, just load architecture and weights
        )
        print("Model loaded successfully!")
        return model
    except Exception as e:
        print(f"Loading failed: {e}")
        return None

# Try loading your model
model_path = '/home/devcontainers/git/Tune_Similarity/notebooks/embedding/saved_models/tune_embedder_v0.keras'
model = load_with_custom_objects(model_path)

if model is not None:
    print("✓ Model loaded successfully!")
    print(f"Model summary:")
    model.summary()
else:
    print("✗ Model loading failed")

Loading failed: Exception encountered when calling Lambda.call().

[1mWe could not automatically infer the shape of the Lambda's output. Please specify the `output_shape` argument for this Lambda layer.[0m

Arguments received by Lambda.call():
  • args=('<KerasTensor shape=(None, None), dtype=float32, sparse=False, ragged=False, name=durations>',)
  • kwargs={'mask': 'None'}
✗ Model loading failed
