In [1]:
import pandas as pd
import numpy as np
import json
import re
import random

In [2]:
df = pd.read_csv(r"D:\Bruno Serra PC\Documents\Bruno\GitHub\chat_bot\mid\netflix_titles.csv")

df.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [3]:
def read_json(filepath):
    with open(filepath, "r") as f:
        data = json.load(f)
    return pd.DataFrame(data["intents"])

df_intents = read_json("intents.json")

# Transform the df into a list of dictionaries.
# I could access each dicionary use indexes ([0], etc.).
list_dict_intents = df_intents.to_dict(orient="records")

# Print.
print(list_dict_intents)

[{'tag': 'greeting', 'patterns': ['Hi', 'How are you?', 'Hello', 'Good day', 'Whats up'], 'responses': ["Hello again! My name is Muvie and I'm here to help you select a movie or TV show. First, tell me if you want to watch a movie or a TV show."]}, {'tag': 'inform_content_type', 'patterns': ['movie', 'tv show'], 'responses': ['Great. You chose content_type. What is the max duration_or_seasons you expect to watch? REMEMBER to tell me if you are talking about hours or minutes!']}, {'tag': 'duration', 'patterns': ['hours', 'seconds', 'minutes', 'seasons'], 'responses': ["Great. I'll search for a program with duration_selected. Now, tell me if you want this tittle to be released after or before a specific year. Please type: after <year> or before <year>"]}, {'tag': 'country', 'patterns': ['United States', 'United Kingdom', 'Canada', 'India', 'France', 'Germany', 'Spain', 'Italy', 'Portugal', 'Brazil', 'Mexico', 'Argentina', 'Chile', 'Colombia', 'Japan', 'South Korea', 'China', 'Hong Kong',

In [4]:
def build_regex2int(list_dict_intents):
    regex2int = {}

    for intent in list_dict_intents:
        tag = intent["tag"]
        patterns = intent["patterns"]

        # junta os padrões em uma única regex
        regex_pattern = r"\b(" + "|".join(map(re.escape, patterns)) + r")\b"
        regex2int[regex_pattern] = tag

    return regex2int


def build_int2res(list_dict_intents):
    int2res = {}

    for intent in list_dict_intents:
        tag = intent["tag"]
        responses = intent["responses"]
        int2res[tag] = responses

    return int2res


In [5]:
regex2int = build_regex2int(list_dict_intents)
int2res = build_int2res(list_dict_intents)

In [6]:
df

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies","On a photo shoot in Ghana, an American model s..."
1,s9,TV Show,The Great British Baking Show,Andy Devonshire,"Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho...",United Kingdom,"September 24, 2021",2021,TV-14,9 Seasons,"British TV Shows, Reality TV",A talented batch of amateur bakers face off in...
2,s10,Movie,The Starling,Theodore Melfi,"Melissa McCarthy, Chris O'Dowd, Kevin Kline, T...",United States,"September 24, 2021",2021,PG-13,104 min,"Comedies, Dramas",A woman adjusting to life after a loss contend...
3,s13,Movie,Je Suis Karl,Christian Schwochow,"Luna Wedler, Jannis Niewöhner, Milan Peschel, ...","Germany, Czech Republic","September 23, 2021",2021,TV-MA,127 min,"Dramas, International Movies",After most of her family is murdered in a terr...
4,s25,Movie,Jeans,S. Shankar,"Prashanth, Aishwarya Rai Bachchan, Sri Lakshmi...",India,"September 21, 2021",1998,TV-14,166 min,"Comedies, International Movies, Romantic Movies",When the father of the man she loves insists t...
...,...,...,...,...,...,...,...,...,...,...,...,...
5327,s8802,Movie,Zinzana,Majid Al Ansari,"Ali Suliman, Saleh Bakri, Yasa, Ali Al-Jabri, ...","United Arab Emirates, Jordan","March 9, 2016",2015,TV-MA,96 min,"Dramas, International Movies, Thrillers",Recovering alcoholic Talal wakes up inside a s...
5328,s8803,Movie,Zodiac,David Fincher,"Mark Ruffalo, Jake Gyllenhaal, Robert Downey J...",United States,"November 20, 2019",2007,R,158 min,"Cult Movies, Dramas, Thrillers","A political cartoonist, a crime reporter and a..."
5329,s8805,Movie,Zombieland,Ruben Fleischer,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ...",United States,"November 1, 2019",2009,R,88 min,"Comedies, Horror Movies",Looking to survive in a world taken over by zo...
5330,s8806,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero..."


In [7]:
def select_movie(df, user_state):
    df = pd.read_csv(r"D:\Bruno Serra PC\Documents\Bruno\GitHub\chat_bot\mid\netflix_titles.csv")

    df["type"] = df["type"].astype(str)
    df["country"] = df["country"].astype(str)
    df["duration"] = df["duration"].astype(str)
    df["release_year"] = pd.to_numeric(df["release_year"], errors="coerce")
    df = df.dropna(subset=["release_year"])

    
    if user_state["content_type"] == "movie":
        df = df[df["type"] == "Movie"]
    elif user_state["content_type"] == "tv show":
        df = df[df["type"] == "TV Show"]

    if user_state["after_before_year"] == "after":
        df = df[df["release_year"] > user_state["year"]]
    elif user_state["after_before_year"] == "before":
        df = df[df["release_year"] < user_state["year"]]

    if user_state["country"] is not None:
        df = df[df["country"].fillna("").str.lower().str.contains(user_state["country"].lower())]

    if user_state["content_type"] == "movie" and user_state["duration"] is not None:

        duration_value = int(user_state["duration"].split()[0])
        duration_minutes = duration_value * 60

        df["duration_minutes"] = (
            df["duration"]
            .str.extract(r"(\d+)")
            .astype(float)
        )

        df = df[df["duration_minutes"] <= duration_minutes]

    return df



In [8]:
user_state = {'content_type': 'movie', 'duration': '2 hours', 'year': 2000, 'after_before_year': 'after', 'country': "chile"}

select_movie(df, user_state)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,duration_minutes
609,s1262,Movie,Fuga,Pablo Larraín,"Benjamín Vicuña, Gastón Pauls, Alfredo Castro,...","Argentina, Chile","February 26, 2021",2006,TV-MA,112 min,"Dramas, Independent Movies, International Movies",A struggling music student discovers and tries...,112.0
613,s1267,Movie,Tony Manero,Pablo Larraín,"Alfredo Castro, Paola Lattus, Héctor Morales, ...","Chile, Brazil","February 26, 2021",2008,TV-MA,97 min,"Dramas, Independent Movies, International Movies",Amid the violence of Pinochet's reign in 1978 ...,97.0
1238,s2345,Movie,Nobody Knows I'm Here,Gaspar Antillo,"Jorge García, Millaray Lobos, Luis Gnecco, Ale...",Chile,"June 24, 2020",2020,TV-MA,92 min,"Dramas, Independent Movies, International Movies","A former child singer, traumatized by his expe...",92.0
1332,s2486,Movie,Mujeres arriba,Andrés Feddersen,"Loretto Bernal, Natalia Valdebenito, Alison Ma...",Chile,"May 21, 2020",2019,TV-14,98 min,"Comedies, International Movies, Romantic Movies",A trio of best friends — a carefree bacheloret...,98.0
1793,s3263,Movie,The Green Inferno,Eli Roth,"Ignacia Allamand, Daryl Sabara, Nicolás Martín...","United States, Chile","November 16, 2019",2013,R,101 min,"Horror Movies, Independent Movies",Determined to save an Amazon tribe being squee...,101.0
1931,s3550,Movie,Mi amigo Alexis,Alejandro Fernández Almendras,"Luciano González, Alexis Sánchez, Daniel Muñoz...",Chile,"August 30, 2019",2019,TV-PG,102 min,"Children & Family Movies, Comedies, Dramas",When a young soccer fan and his idol forge an ...,102.0
2056,s3833,Movie,Dry Martina,Che Sandoval,"Antonella Costa, Patricio Contreras, Geraldine...","Chile, Argentina","May 11, 2019",2018,TV-MA,100 min,"Dramas, Independent Movies, International Movies",An odd encounter with a fan and a tryst with t...,100.0
2620,s4809,Movie,Jani Dueñas: Grandes fracasos de ayer y hoy,"Raúl Campos, Jan Suter",Jani Dueñas,Chile,"June 29, 2018",2018,TV-MA,77 min,Stand-Up Comedy,Chilean comedian and personality Jani Dueñas p...,77.0
2633,s4831,Movie,The last hour,Eduardo Mendoza de Echave,"Pietro Sibille, Nidia Bermejo, Toño Vega, Tomm...","Chile, Peru","June 15, 2018",2017,TV-MA,119 min,"Dramas, International Movies","In this compelling true story, two Peruvian de...",119.0
2738,s5003,Movie,Natalia Valdebenito: El especial,"Raúl Campos, Jan Suter",Natalia Valdebenito,Chile,"March 2, 2018",2018,TV-MA,69 min,Stand-Up Comedy,A Chilean comedian fuses activism with irrever...,69.0


In [None]:
# Function to get user input and respond
def chatbot():

    # User welcome and termination instruction.
    print("Chatbot: Welcome!")
    print("Chatbot: My name is Muvie and I'm here to help you select a movie or TV show.")
    print("Chatbot: If you want to end this session, just type 'bye'.")
    print("Chatbot: Do you want to watch a movie or a TV show?")

    user_input_queue = []

    # State variables
    content_type = None
    duration_selected = None
    expecting_duration = False
    expecting_year = False
    expecting_country = False

    # User state (used later to filter the dataset)
    user_state = {
        "content_type": None,
        "duration": None,
        "year": None,
        "after_before_year": None,
        "country": None
    }

    # Main loop:
    while True:
        
        # Get user input
        user_input = input("You: ")

        # Append user input into a list.
        user_input_queue.append(user_input)

        # Check if the user wants to exit
        if user_input.lower() == "bye":
            print("Chatbot: Thank you for your interaction.")
            break

        # Restart conversation
        if user_input.lower() in ["restart", "reset"]:
            print("Chatbot: Ok, lets restart.")

            # Reset state variables
            content_type = None
            duration_selected = None
            expecting_duration = False
            expecting_year = False
            expecting_country = False

            # Reset user state
            user_state = {
                "content_type": None,
                "duration": None,
                "year": None,
                "after_before_year": None,
                "country": None
            }

            # Clear input history
            user_input_queue = []

            print("Chatbot: Do you want to watch a movie or a TV show?")
            continue

        # Handle year input (after/before)
        if expecting_year:
            year_match = re.search(r"\b(after|before)\s+(\d{4})\b", user_input.lower())

            if year_match:
                user_state["after_before_year"] = year_match.group(1)
                user_state["year"] = int(year_match.group(2))

                print(
                    f"Chatbot: Got it. I'll search for titles {user_state['after_before_year']} {user_state['year']}."
                )

                expecting_year = False
                expecting_country = True
                print("Chatbot: Finally, tell me the country you would like the movie or TV show")
                continue
            else:
                print("Chatbot: Please type `after <year>` or `before <year>` (e.g., after 2015).")
                continue

        # Handle country input
        if expecting_country:
            user_state["country"] = user_input.strip()

            var_output = random.choice(int2res["country"])
            print("Chatbot:", var_output)

            # Call the recommendation function
            df_result = select_movie(df=None, user_state=user_state)

            if df_result.empty:
                print("Chatbot: Sorry, I couldn't find any titles with these characteristics.")

                print("Chatbot: Let's start over again.")

                # Reset state variables
                content_type = None
                duration_selected = None
                expecting_duration = False
                expecting_year = False
                expecting_country = False

                # Reset user state
                user_state = {
                    "content_type": None,
                    "duration": None,
                    "year": None,
                    "after_before_year": None,
                    "country": None
                }

                # Clear input history
                user_input_queue = []

                print("Chatbot: Do you want to watch a movie or a TV show?")
                continue


            else:
                print("Chatbot: Here are 5 recommendations for you:")

                # show up to 5 results
                recommendations = df_result.sample(min(5, len(df_result)))

                for _, row in recommendations.iterrows():
                    print(
                        f"- {row['title']}, released in {row['release_year']}, "
                        f"from {row['country']} with a duration of {row['duration']}."
                    )

            expecting_country = False
            continue


        # Detect content type from user input
        # The "r" means raw string. It avoids inverted bars to break.
        # \b represents a word boudary.
        # Therefore by writting \bmovie\b means that movie must be a word by itself.
        if re.search(r"\bmovie\b", user_input.lower()):
            content_type = "movie"
            user_state["content_type"] = "movie"

        elif re.search(r"\btv show\b|\bseries\b", user_input.lower()):
            content_type = "tv show"
            user_state["content_type"] = "tv show"

        # Extract numeric duration
        # \b is the word boundary.
        # (\d+) = \d (any digit [0-9]), + (one or more).
        number_match = re.search(r"\b\d+\b", user_input.lower())

        # Only treat numbers as duration if we are explicitly expecting it
        if (
            number_match
            and content_type is not None
            and expecting_duration
            and duration_selected is None
        ):
            value = number_match.group()

            # Movie logic: check explicit unit
            if content_type == "movie":
                if "minute" in user_input.lower():
                    duration_selected = f"{value} minutes"
                    user_state["duration"] = duration_selected
                elif "hour" in user_input.lower():
                    duration_selected = f"{value} hours"
                    user_state["duration"] = duration_selected
                else:
                    # fallback when no unit is specified.
                    print("Chatbot: Please tell me if the duration is in hours or minutes.")
                    continue

            # TV show logic: always seasons
            elif content_type == "tv show":
                duration_selected = f"{value} seasons"
                user_state["duration"] = duration_selected

            # Respond immediately using duration intent
            var_output = random.choice(int2res["duration"])

            # Handle duration_selected placeholder
            if "duration_selected" in var_output:
                var_output = var_output.replace("duration_selected", duration_selected)

            print("Chatbot:", var_output)

            # Duration successfully captured
            expecting_duration = False
            expecting_year = True
            continue

        # Search for a pattern in the intents
        response_found = False

        # Iterate over regex -> tag mapping
        for regex, tag in regex2int.items():

            # Check if user input matches the regex
            if re.search(regex, user_input, re.IGNORECASE):

                var_output = random.choice(int2res[tag])

                # Handle content_type placeholder
                if "content_type" in var_output and content_type is not None:
                    var_output = var_output.replace("content_type", content_type)

                # Handle duration_or_seasons placeholder
                if "duration_or_seasons" in var_output and content_type is not None:
                    if content_type == "movie":
                        var_output = var_output.replace("duration_or_seasons", "duration")
                    elif content_type == "tv show":
                        var_output = var_output.replace("duration_or_seasons", "seasons")

                    # chatbot is now waiting for duration input
                    expecting_duration = True

                print("Chatbot:", var_output)

                response_found = True
                break

        if response_found == False:
            print("Chatbot: Sorry, could you rephrase that again?")


# Run the chatbot
chatbot()

Chatbot: Welcome!
Chatbot: My name is Muvie and I'm here to help you select a movie or TV show. If you want to end this session, just type 'bye'.
Chatbot: Do you want to watch a movie or a TV show?
Chatbot: Great. You chose movie. What is the max duration you expect to watch? REMEMBER to tell me if you are talking about hours or minutes!
{'content_type': 'movie', 'duration': None, 'year': None, 'after_before_year': None, 'country': None}
Chatbot: Ok, lets restart.
Chatbot: Do you want to watch a movie or a TV show?
Chatbot: Great. You chose movie. What is the max duration you expect to watch? REMEMBER to tell me if you are talking about hours or minutes!
{'content_type': 'movie', 'duration': None, 'year': None, 'after_before_year': None, 'country': None}
Chatbot: Thank you for your interaction.
