In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [58]:
movies = [
    "tt0114709", # Toy Story
    "tt0120363", # Toy Story 2
    "tt0441773", # Kung Fu Panda
    "tt0347149", # Howl's moving castle
    "tt1375666", # Inception
    "tt0816692", # Interstellar
    "tt0074103", # L'aile ou la cuisse
    "tt0250223", # Asterix & Obelix, Mission cleopatra
    "tt1675434", # Intouchables
    "tt1074638", # Skyfall
]

movies = [
    "tt1119646", # hangover
    "tt0892769", # how to train your dargon,
    "tt0371746", # iron man
    "tt0430651", # survive style
    "tt0876563", # ponyo
    "tt1436045", # 13 assassins
    "tt1675434", # Intouchables
    "tt1673702", # cat in paris
    "tt0414852", # district 13
]

In [54]:
encodings = np.load('./SAE_embedding.npy', allow_pickle=True)
movie_mapping_SAE = np.load('./mapping_SAE.npy', allow_pickle=True).item()
reverse_movie_mapping_SAE = np.load('./reverse_mapping_SAE.npy', allow_pickle=True).item()


In [55]:
def inverse_imdb_transform(imdb):
    return int(imdb[2:])

def formating_imdbId(x) :
    x= round(x)
    x_str= str(x)
    while len(x_str) < 7 :
        x_str= '0' + x_str
    return 'tt' + x_str

In [85]:
def recommend_SAE(imdb_ids, isin=None, return_similarities=False, min_similarity=0.0):
    """
    Recommend movies based on a list of IMDB IDs, excluding the input movies from recommendations.
    
    Parameters:
    -----------
    imdb_ids : list
        List of IMDB IDs to base recommendations on
    isin : list, optional
        List of IMDB IDs to restrict recommendations to
    return_similarities : bool, optional
        Whether to return similarity scores along with recommendations
    min_similarity : float, optional
        Minimum similarity threshold (0.0 to 1.0) for recommendations
        
    Returns:
    --------
    list or tuple
        If return_similarities is False, returns list of recommended IMDB IDs
        If return_similarities is True, returns tuple of (recommendations, similarities)
    """
    encodings_ = encodings
    input_movies = [inverse_imdb_transform(id) for id in imdb_ids]
    
    if isin is not None:
        isin_imdb_ids = [inverse_imdb_transform(id) for id in isin]
        encodings_ = encodings[[movie_mapping_SAE[id] for id in isin_imdb_ids]]
    
    similarities = np.zeros(encodings_.shape[0])
    
    # Calculate similarities
    for movie in input_movies:
        id = movie_mapping_SAE[movie]
        embedding = encodings[id].reshape(1,-1)
        similarities += cosine_similarity(encodings_, embedding).ravel()
    
    # Normalize similarities by number of input movies
    similarities = similarities / len(input_movies)
    
    # Set similarity scores of input movies to negative infinity to exclude them
    if isin is not None:
        for movie in input_movies:
            for i, isin_movie in enumerate(isin_imdb_ids):
                if movie == isin_movie:
                    similarities[i] = float('-inf')
    else:
        for movie in input_movies:
            similarities[movie_mapping_SAE[movie]] = float('-inf')
    
    # Apply similarity threshold
    similarities[similarities < min_similarity] = float('-inf')
    
    # Sort and get recommendations
    similar_indices = np.argsort(similarities)[::-1]
    
    # Filter out movies below threshold
    valid_indices = similar_indices[similarities[similar_indices] > float('-inf')]
    
    if isin is not None:
        recommendations = [isin[rec_id] for rec_id in valid_indices[:5]]
    else:
        recommendations = [formating_imdbId(reverse_movie_mapping_SAE[rec_id]) 
                         for rec_id in valid_indices[:5]]
    
    if return_similarities:
        if isin is not None:
            filtered_similarities = similarities[valid_indices[:5]]
        else:
            filtered_similarities = similarities[valid_indices[:5]]
        return recommendations, filtered_similarities
    
    return recommendations

In [95]:
list_, sim = recommend_SAE([movies[2]], movies, return_similarities=True)
list_

['tt0892769', 'tt1119646', 'tt0414852', 'tt1436045', 'tt0430651']

In [69]:
movie_names_df = pd.read_csv('movie_names.csv', index_col=0)
movie_names = {id: movie_names_df[movie_names_df['imdb_id'] == id]['title'].item() for id in movies}
movie_names

{'tt1119646': 'The Hangover',
 'tt0892769': 'How to Train Your Dragon',
 'tt0371746': 'Iron Man',
 'tt0430651': 'Survive Style 5+',
 'tt0876563': 'Ponyo',
 'tt1436045': '13 Assassins',
 'tt1675434': 'The Intouchables',
 'tt1673702': 'A Cat in Paris',
 'tt0414852': 'District B13'}

In [93]:
import json

# Create JSON structure from existing recommendations
recommendations_json = {}

for r in range(1, 9):  # up to 8 movies
    for combo in combinations(movies, r):
        # Get recommendations for this combination
        recs = recommend_SAE(list(combo), movies, min_similarity=0.5)
        
        # Format input movies and create key
        input_movies = [movie_names[movie_id] for movie_id in combo]
        key = " + ".join(sorted(input_movies))
        
        # Format recommended movies
        rec_movies = [movie_names[rec_id] for rec_id in recs]
        
        # Store in dictionary
        recommendations_json[key] = {
            "input_movies": input_movies,
            "recommended_movies": rec_movies
        }

# Save to JSON file
with open('movie_recommendations.json', 'w') as f:
    json.dump(recommendations_json, f, indent=4)

In [78]:
import numpy as np
import networkx as nx
import plotly.graph_objects as go
from sklearn.manifold import MDS

def create_3d_visualization(movies, similarities, movie_names=None):
    """
    Create a 3D visualization of movie similarities using MDS for dimensionality reduction
    
    Parameters:
    movies: list of movie IDs
    similarities: numpy array of similarity scores between movies
    movie_names: optional dictionary mapping movie IDs to readable names
    """
    # Convert similarities to distances (higher similarity = lower distance)
    distances = 1 - similarities
    
    # Use MDS to convert distances to 3D coordinates
    mds = MDS(n_components=3, dissimilarity='precomputed', random_state=42)
    coords_3d = mds.fit_transform(distances)
    
    # Create edges between movies that are similar enough (you can adjust the threshold)
    threshold = np.percentile(similarities, 75)  # Top 25% of similarities
    edges = []
    edge_weights = []
    
    for i in range(len(movies)):
        for j in range(i+1, len(movies)):
            if similarities[i,j] > threshold:
                edges.append((i, j))
                edge_weights.append(similarities[i,j])
    
    # Create the 3D visualization using Plotly
    edge_trace = go.Scatter3d(
        x=[], y=[], z=[],
        line=dict(width=1, color='rgb(200,200,200)'),
        hoverinfo='none',
        mode='lines'
    )
    
    # Add edges to the visualization
    for edge in edges:
        x0, y0, z0 = coords_3d[edge[0]]
        x1, y1, z1 = coords_3d[edge[1]]
        edge_trace['x'] += (x0, x1, None)
        edge_trace['y'] += (y0, y1, None)
        edge_trace['z'] += (z0, z1, None)
    
    # Create nodes
    node_trace = go.Scatter3d(
        x=coords_3d[:,0],
        y=coords_3d[:,1],
        z=coords_3d[:,2],
        mode='markers+text',
        marker=dict(
            size=10,
            color='rgb(255, 127, 14)',
            line=dict(width=1, color='rgb(50,50,50)')
        ),
        text=[movie_names.get(movie_id, movie_id) if movie_names else movie_id for movie_id in movies],
        hoverinfo='text'
    )
    
    # Create the figure
    fig = go.Figure(data=[edge_trace, node_trace])
    
    # Update layout
    fig.update_layout(
        title='3D Movie Similarity Visualization',
        showlegend=False,
        scene=dict(
            xaxis=dict(showticklabels=False),
            yaxis=dict(showticklabels=False),
            zaxis=dict(showticklabels=False)
        ),
        margin=dict(l=0, r=0, t=40, b=0)
    )
    
    return fig

# Create similarity matrix from your recommend_SAE function
# This is just an example - you'll need to adapt this to your actual data
n_movies = len(movies)
similarities = np.zeros((n_movies, n_movies))
for i, movie1 in enumerate(movies):
    rec, sim = recommend_SAE([movie1], movies, return_similarities=True)
    similarities[i] = sim
    similarities[i][i] = 0.0


# Create and show the visualization
fig = create_3d_visualization(movies, similarities, movie_names)
fig.show()

(9, 50)
(9, 50)
(9, 50)
(9, 50)
(9, 50)
(9, 50)
(9, 50)
(9, 50)
(9, 50)


In [94]:
distances = 1 - similarities

# Use MDS to convert distances to 3D coordinates
mds = MDS(n_components=3, dissimilarity='precomputed', random_state=42)
coords_3d = mds.fit_transform(distances)

# Create edges between movies that are similar enough
threshold = np.percentile(similarities, 75)  # Top 25% of similarities
links = []
for i in range(len(movies)):
    for j in range(i+1, len(movies)):
        if similarities[i,j] > threshold:
            links.append({
                "source": movies[i],
                "target": movies[j],
                "value": float(similarities[i,j])  # Convert to float for JSON serialization
            })

# Create nodes with names and coordinates
nodes = []
for i, movie_id in enumerate(movies):
    nodes.append({
        "id": movie_id,
        "name": movie_names.get(movie_id, movie_id),
        "group": 1,  # You can modify this if you want to group movies
        "x": float(coords_3d[i,0]),
        "y": float(coords_3d[i,1]),
        "z": float(coords_3d[i,2])
    })

# Create the final graph data structure
graph_data = {
    "nodes": nodes,
    "links": links
}

# Save to JSON file
with open('graph-data.json', 'w') as f:
    json.dump(graph_data, f, indent=2)

# Print first few nodes and links as a sample
print("Sample of the generated JSON:")
print("\nFirst 2 nodes:")
print(json.dumps(graph_data["nodes"][:2], indent=2))
print("\nFirst 2 links:")
print(json.dumps(graph_data["links"][:2], indent=2))

Sample of the generated JSON:

First 2 nodes:
[
  {
    "id": "tt1119646",
    "name": "The Hangover",
    "group": 1,
    "x": -0.42934324271585556,
    "y": 0.0486620362777406,
    "z": -0.1693967812764746
  },
  {
    "id": "tt0892769",
    "name": "How to Train Your Dragon",
    "group": 1,
    "x": -0.26008028988181264,
    "y": -0.1362499910951961,
    "z": -0.24641066830352898
  }
]

First 2 links:
[
  {
    "source": "tt1119646",
    "target": "tt0892769",
    "value": 0.7814019322395325
  },
  {
    "source": "tt1119646",
    "target": "tt0371746",
    "value": 0.7243795394897461
  }
]


In [91]:
from itertools import combinations

def get_all_combination_recommendations(movies, movie_names, max_size=8):
    """
    Generate recommendations for all possible combinations of movies up to max_size.
    
    Args:
        movies: List of movie IDs
        movie_names: Dictionary mapping movie IDs to movie names
        max_size: Maximum number of movies to combine (default: 8)
    
    Returns:
        Dictionary mapping combination strings to recommendation strings
    """
    results = {}
    
    # Generate all possible combinations from 1 to max_size movies
    for r in range(1, max_size + 1):
        for combo in combinations(movies, r):
            # Get recommendations for this combination
            recs = recommend_SAE(list(combo), movies, min_similarity=0.5)
            
            # Format the input movies
            input_movies = [movie_names[movie_id] for movie_id in combo]
            input_str = " + ".join(input_movies)
            
            # Format the recommended movies
            rec_movies = [movie_names[rec_id] for rec_id in recs]
            rec_str = " | ".join(rec_movies)
            
            # Store the result
            results[input_str] = rec_str
    
    return results

# Generate all recommendations
all_recommendations = get_all_combination_recommendations(movies, movie_names)

# Print results in a formatted way
for input_movies, recommendations in all_recommendations.items():
    print(f"\n[{input_movies}] : [{recommendations}]")


[The Hangover] : [How to Train Your Dragon | Iron Man]

[How to Train Your Dragon] : [The Hangover | Iron Man | A Cat in Paris | Ponyo]

[Iron Man] : [How to Train Your Dragon | The Hangover | District B13]

[Survive Style 5+] : [The Intouchables | A Cat in Paris | Ponyo]

[Ponyo] : [A Cat in Paris | 13 Assassins | District B13 | Survive Style 5+ | The Intouchables]

[13 Assassins] : [District B13 | Ponyo]

[The Intouchables] : [Survive Style 5+ | A Cat in Paris | Ponyo]

[A Cat in Paris] : [Ponyo | The Intouchables | District B13 | Survive Style 5+ | How to Train Your Dragon]

[District B13] : [13 Assassins | A Cat in Paris | Iron Man | Ponyo]

[The Hangover + How to Train Your Dragon] : [Iron Man]

[The Hangover + Iron Man] : [How to Train Your Dragon]

[The Hangover + Survive Style 5+] : [Iron Man | How to Train Your Dragon | The Intouchables]

[The Hangover + Ponyo] : [How to Train Your Dragon | A Cat in Paris]

[The Hangover + 13 Assassins] : [Iron Man | District B13 | How to Tra