In [52]:
# ----------------------------
# Movie Class Definition
# ----------------------------

In [53]:
class Movie:
    def __init__(self, index, title, genre, rating, review, advice, influenced, suggested):
        self.index = index
        self.title = title.strip()
        self.genre = genre.strip()
        self.rating = float(rating)
        self.review = review.lower()
        self.advice = advice.lower()
        self.influenced = int(influenced)
        self.suggested = suggested.strip().lower() == "yes"

In [54]:
# ----------------------------
# Load CSV Dataset Without pandas
# ----------------------------

In [55]:
def parse_csv_line(line):
    fields = []
    field = ''
    in_quotes = False
    i = 0
    while i < len(line):
        char = line[i]
        if char == '"':
            if in_quotes and i + 1 < len(line) and line[i + 1] == '"':
                field += '"'
                i += 1  # Skip escaped quote
            else:
                in_quotes = not in_quotes
        elif char == ',' and not in_quotes:
            fields.append(field)
            field = ''
        else:
            field += char
        i += 1
    fields.append(field)
    return fields

In [56]:
def load_dataset(filepath):
    movies = []
    with open(filepath, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        header = True
        index = 0
        for line in lines:
            if header:
                header = False
                continue
            parts = parse_csv_line(line.strip())
            if len(parts) < 7:
                continue  # skip malformed rows
            title = parts[0]
            genre = parts[1]
            rating = parts[2]
            review = parts[3]
            advice = parts[4]
            influenced = parts[5]
            suggested = parts[6]
            try:
                movie = Movie(index, title, genre, rating, review, advice, influenced, suggested)
                movies.append(movie)
                index += 1
            except:
                continue  # skip rows with bad values
    return movies

In [57]:
# ----------------------------
# Similarity Function
# ----------------------------

In [58]:
def compute_similarity(m1, m2):
    score = 0

    if m1.genre == m2.genre:
        score += 5

    advice1 = m1.advice.split()
    advice2 = m2.advice.split()
    review1 = m1.review.split()
    review2 = m2.review.split()

    advice_overlap = 0
    for word in advice1:
        if word in advice2:
            advice_overlap += 1
    score += 2 * advice_overlap

    review_overlap = 0
    for word in review1:
        if word in review2:
            review_overlap += 1
    score += review_overlap

    score -= abs(m1.rating - m2.rating)

    if m2.suggested:
        score += 2

    return score

In [59]:
# ----------------------------
# Build Graph as Adjacency List
# ----------------------------

In [60]:
def build_graph(movies, threshold):
    graph = {}
    for i in range(len(movies)):
        graph[i] = []
        for j in range(len(movies)):
            if i != j:
                score = compute_similarity(movies[i], movies[j])
                if score >= threshold:
                    graph[i].append((j, score))
    return graph

In [61]:
# ----------------------------
# Topological Sort
# ----------------------------

In [62]:
def topological_sort(graph, num_nodes):
    indegree = [0] * num_nodes
    for u in graph:
        for v, _ in graph[u]:
            indegree[v] += 1

    queue = []
    for i in range(num_nodes):
        if indegree[i] == 0:
            queue.append(i)

    order = []
    while queue:
        current = queue.pop(0)
        order.append(current)
        for neighbor, _ in graph.get(current, []):
            indegree[neighbor] -= 1
            if indegree[neighbor] == 0:
                queue.append(neighbor)

    return order

In [63]:
# ----------------------------
# Find Longest Path in DAG
# ----------------------------

In [64]:
def find_longest_path(movies, graph, start_title):
    n = len(movies)
    start = -1
    for movie in movies:
        if movie.title.lower() == start_title.lower():
            start = movie.index
            break
    if start == -1:
        return ["Start movie not found."]

    order = topological_sort(graph, n)

    dist = [-1000000] * n
    prev = [-1] * n
    dist[start] = 0

    for u in order:
        for v, weight in graph.get(u, []):
            if dist[u] + weight > dist[v]:
                dist[v] = dist[u] + weight
                prev[v] = u

    max_index = start
    for i in range(n):
        if dist[i] > dist[max_index]:
            max_index = i

    path = []
    while max_index != -1:
        path.append(movies[max_index].title)
        max_index = prev[max_index]

    path.reverse()
    return path

In [65]:
# ----------------------------
# Main Execution
# ----------------------------

In [69]:
def main():
    filepath = "Netflix Life Impact Dataset (NLID).csv"  # Update path if needed
    movies = load_dataset(filepath)
    threshold = 3.0
    graph = build_graph(movies, threshold)

    start_title = "Parasite"  # <-- CHANGE to your starting movie
    path = find_longest_path(movies, graph, start_title)

    print("Longest recommendation chain:")
    for title in path:
        print("->", title)

In [70]:
# Run
main()

Longest recommendation chain:
-> Start movie not found.
