In [1]:
import pandas as pd
import numpy as np
import graphistry
import os
from dotenv import load_dotenv
from dotenv import find_dotenv

## Load Credentials 

In [2]:
load_dotenv(find_dotenv(), verbose=True)

# Graphistry API Key
# https://www.graphistry.com/api-request
graphistry.register(key=os.getenv("graphistry_api_key"))

In [3]:
def get_movie(df, movie_id):
    """
    Get movie data based on Graphistry analysis
    
    INPUT
        df: Pandas DataFrame
        movie_id: movie ID from movies DataFrame
        
    OUTPUT
        Single row from df, showing the data from requested movieId
    """
    return df[df["movieId"] == movie_id]

def get_ratings(ratings_df, movies_df, movie_id):
    """
    Aggregated rating data for single movie.
    
    INPUT
        df: Pandas DataFrame
        movie_id: movie ID from movies DataFrame
    """
    df = pd.merge(ratings_df, movies_df, on="movieId", how="outer")
    movie = df[df["movieId"] == movie_id]
    
    movie_name = movie.title.unique()
    movie_ratings = movie.shape[0]
    print("{} has {} reviews".format(movie_name, movie_ratings))
    
    avg_review = movie.rating.mean()
    print("----------")
    print("Average Review: {} of 5".format(avg_review))

## Import MovieLens Dataset 

In [None]:
# wget http://files.grouplens.org/datasets/movielens/ml-20m.zip

In [4]:
movies = pd.read_csv("../data/ml-20m/movies.csv")

In [5]:
ratings = pd.read_csv("../data/ml-20m/ratings.csv")

In [6]:
ratings = ratings.sample(100000)

## Plot Ratings Graph 

In [7]:
graphistry.bind(source="userId", destination="movieId", edge_weight="rating").plot(ratings)

In [8]:
get_ratings(ratings, movies, movie_id=6538)

['Swimming Pool (2003)'] has 9 reviews
----------
Average Review: 3.2777777777777777 of 5
