# **AI@GT Applied Research Fall 2025 Project**

##### Referenced Tutorial: https://www.youtube.com/watch?v=eyEabQRBMQA

## Importing Libraries and Movie Data

In [10]:
! pip install datasets pandas numpy scikit-learn
! jupyter labextension install @jupyter-widgets/jupyterlab-manager

[33m(Deprecated) Installing extensions with the jupyter labextension install command is now deprecated and will be removed in a future major version of JupyterLab.

Users should manage prebuilt extensions with package managers like pip and conda, and extension authors are encouraged to distribute their extensions as prebuilt packages [0m


In [11]:
# Importing Movie Data

import pandas as pd
from datasets import load_dataset

dataset = load_dataset("ashraq/movielens_ratings")
train_data = dataset['train'].to_pandas()

train_data.head()

Unnamed: 0,imdbId,tmdbId,movie_id,user_id,rating,title,genres,posters
0,tt2096673,150540,2307,11923,3.5,Inside Out (2015),Adventure|Animation|Children|Comedy|Drama|Fantasy,https://m.media-amazon.com/images/M/MV5BOTgxMD...
1,tt5160928,393732,7157,32503,3.0,Mean Dreams (2017),Thriller,https://m.media-amazon.com/images/M/MV5BMDM2OD...
2,tt3498820,271110,1398,20241,3.0,Captain America: Civil War (2016),Action|Sci-Fi|Thriller,https://m.media-amazon.com/images/M/MV5BMjQ0MT...
3,tt3040964,278927,2625,42975,5.0,The Jungle Book (2016),Adventure|Drama|Fantasy,https://m.media-amazon.com/images/M/MV5BMTc3NT...
4,tt5308322,440021,8457,31336,3.0,Happy Death Day (2017),Horror|Mystery|Thriller,https://m.media-amazon.com/images/M/MV5BYzZhY2...


## Search Functionality

In [15]:
# Preprocess the data
import re

def clean_title(title):
    return re.sub("[^a-zA-Z0-9 ]", "", title)

In [17]:
train_data["clean_title"] = train_data["title"].apply(clean_title)

In [18]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(1,2))
tfidf = vectorizer.fit_transform(train_data["clean_title"])

In [20]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def search(title):
    title = clean_title(title)
    query_vec = vectorizer.transform([title])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = train_data.iloc[indices][::-1]
    return results

In [23]:
import ipywidgets as widgets
from IPython.display import display

movie_input = widgets.Text(
    value="Toy Story",
    description="Movie Title:",
    disabled=False
)
movie_list = widgets.Output()

def on_type(data):
    with movie_list:
        movie_list.clear_output()
        title = data["new"]
        if len(title) > 5:
            display(search(title))

movie_input.observe(on_type, names='value')

display(movie_input, movie_list)

Text(value='Toy Story', description='Movie Title:')

Output()

## Recommendation System

In [26]:
def find_similar_movies(movie_id):
    similar_users = train_data[(train_data["movie_id"] == movie_id) & train_data["rating"] >= 3]["user_id"].unique()
    similar_user_recs = train_data[(train_data["user_id"].isin(similar_users)) & (train_data["rating"] >= 3)]["movie_id"]
    
    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)
    similar_users_recs = similar_user_recs[similar_user_recs > .1]
    
    all_users = train_data[(train_data["movie_id"].isin(similar_user_recs.index)) & (train_data["rating"] > 3)]
    all_users_recs = all_users["movie_id"].value_counts() / len(all_users["user_id"].unique())

    rec_percentages = pd.concat([similar_user_recs, all_users_recs], axis=1)
    rec_percentages.columns = ["similar", "all"]

    rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]

    rec_percentages = rec_percentages.sort_values("score", ascending=False)
    return rec_percentages.head(10).merge(train_data, left_index=True, right_on="movie_id")

In [28]:
movie_name_input = widgets.Text(
    value="Toy Story",
    description="Movie Title:",
    disabled=False
)

recommendation_list = widgets.Output()

def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        title = data["new"]
        if len(title) > 5:
            results = search(title)
            movie_id = results.iloc[0]["movie_id"]
            display(find_similar_movies(movie_id))

movie_name_input.observe(on_type, names="value")
display(movie_name_input, recommendation_list)

Text(value='Toy Story', description='Movie Title:')

Output()