In [2]:
import pandas as pd
import numpy as np
from itertools import combinations

In [3]:
def load_movie_data(file_path):
    return pd.read_csv(file_path)["movie"].tolist()

def generate_random_vector(dimensions):
    return np.random.rand(dimensions)


In [4]:
def calculate_angle_similarity(vector1, vector2):
    dot_product = np.dot(vector1, vector2)
    magnitude1 = np.linalg.norm(vector1)
    magnitude2 = np.linalg.norm(vector2)
    angle = np.arccos(dot_product / (magnitude1 * magnitude2))
    return angle

def find_top_similar_movies(movie_vectors, num_top_pairs):
    similar_movies = []

    movie_combinations = combinations(movie_vectors.keys(), 2)
    for movie1, movie2 in movie_combinations:
        angle = calculate_angle_similarity(movie_vectors[movie1], movie_vectors[movie2])
        similar_movies.append((movie1, movie2, angle))

    similar_movies.sort(key=lambda x: x[2])
    return similar_movies[:num_top_pairs]

In [5]:
def main():
    movie_data_file = "moviereviews.csv" 
    num_dimensions = 300
    num_top_pairs = 3

    movies = load_movie_data(movie_data_file)
    
    movie_vectors = {movie: generate_random_vector(num_dimensions) for movie in movies}
    
    top_similar_movies = find_top_similar_movies(movie_vectors, num_top_pairs)

    print("Top similar movie pairs:")
    for movie1, movie2, angle in top_similar_movies:
        print(f"Movies '{movie1}' and '{movie2}' with angle similarity of {angle:.2f} radians.")

if __name__ == "__main__":
    main()

Top similar movie pairs:
Movies 'The Dark Knight' and 'It happened one night' with angle similarity of 0.66 radians.
Movies 'The Dark Knight' and 'Gone with the wind' with angle similarity of 0.67 radians.
Movies 'It happened one night' and 'Before Sunrise' with angle similarity of 0.67 radians.
