In [None]:
from langchain.agents.agent_types import AgentType

from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

import pandas as pd
import re
import ast
import numpy as np
df = pd.read_csv('MovieSummaries/plot_summaries.txt',
                 sep="	", header = None)
df.columns = ['Wikipedia_movie_ID','summary']


meta_data = pd.read_csv('MovieSummaries/movie.metadata.tsv' ,
                        sep='\t', header = None)
meta_data.columns = ['Wikipedia_movie_ID', 'Freebase_movie_ID', 'movie_name',
                     'relase_date','box_office_revenue','language','countries','country', 'genres']
# join meta_data and df on Wikipedia_movie_ID
df = pd.merge(df, meta_data[['Wikipedia_movie_ID','relase_date', 'movie_name', 'genres']],
              on = 'Wikipedia_movie_ID')
actors_info =  pd.read_csv('MovieSummaries//character.metadata.tsv' ,
                           sep='\t', header = None)
actors_info.columns = ["Wikipedia_movie_ID", "Freebase Movie ID","Release Date",
                       "Character Name", "Actor DOB", "Actor gender", "Actor height",
                       "Actor ethnicity", "Actor Name", "Actor age at movie release", "Freebase character map",
                       "Temp1", "Temp2"]
df = pd.merge(df, # join with the list of the actor names of the movie
         actors_info.groupby('Wikipedia_movie_ID')['Actor Name'].agg(list).reset_index()
          , how='left', on='Wikipedia_movie_ID')

def extract_genres(genres):
    genres = list(ast.literal_eval(genres).values())
    genres = [re.split('/|&', genre) for genre in genres]
    flattened_genres = [item.strip() for sublist in genres for item in sublist]
    return list(set(flattened_genres))

df['genres'] = df['genres'].apply(extract_genres)

def expand_summary(summary, movie_name, genres, actors_names):
    str_genres = " the movie has the genres of " + ', '.join(genres)
    str_movie_name = 'the movie ' + movie_name + ' is a movie '

    if isinstance(actors_names, float) and pd.isna(actors_names):
        return str_movie_name+ 'in which ' + summary + str_genres
    else:
        actors_str = ''
        if isinstance(actors_names, list):
            valid_actors = [str(name) for name in actors_names if not pd.isna(name)]
            actors_str = ' of the actors ' + ', '.join(valid_actors) + ' in which '

        return str_movie_name + actors_str.lower() + summary + str_genres


df['expanded_summary'] = df.apply(lambda x: expand_summary(x['summary'],
                                                           x['movie_name'],
                                                           x['genres'],
                                                           x['Actor Name']),axis=1)



from langchain_community.llms import Ollama


llm = Ollama(model='llama3')





import os
import glob
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
import ollama
import time
from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.embeddings import OllamaEmbeddings
import re

class RAGSystem:
    def __init__(self):
        self.vector_store = None
        self.chat_history = [{'role': 'system', 'content': "Ollama model"}]

    def get_vector_store(self, text_chunks: list):
        embed = OllamaEmbeddings(model='nomic-embed-text')
        return FAISS.from_texts(text_chunks, embedding=embed)

    def initialize_rag(self,df):
        text_chunks = df[:10000]['expanded_summary'].tolist()
        self.vector_store = self.get_vector_store(text_chunks)
        return self.vector_store

rag_system = RAGSystem()
vectorstore = rag_system.initialize_rag(df)
vectorstore.index.ntotal

from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
llm = Ollama(model="llama3-chatqa", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Act like a movie recommender with the information you have
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(
    input_variables=["context", "question"],
    template=template,
)

from langchain.chains import RetrievalQA


qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vectorstore.as_retriever(),
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    )

query = "can you suggest a good thriller movie with little bit of description about the movie"
result = qa_chain({"query": query})



In [35]:
result

{'query': 'can you suggest a good thriller movie with little bit of description about the movie',
 'result': 'definitely! i would recommend watching "The Caller" (2014) by Matthew Bogart and Jayson Krause. Here is a short summary of the plot: The story starts out in a small town where Min Soo-Ah, who has the disability of not being able to see, works as a detective. One day when she was on her way home from visiting the orphanage she had grown up in, she got into an argument with another driver and was stranded by the side of the road. A stranger came out of nowhere and offered her a ride back.'}

In [37]:
vectorstore.save_local("faiss_index")

In [44]:
df = pd.read_csv('Downloads/MovieSummaries/plot_summaries.txt',
                 sep="	", header = None)

df.columns = ['Wikipedia_movie_ID','summary']
df.head()

Unnamed: 0,Wikipedia_movie_ID,summary
0,23890098,"Shlykov, a hard-working taxi driver and Lyosha..."
1,31186339,The nation of Panem consists of a wealthy Capi...
2,20663735,Poovalli Induchoodan is sentenced for six yea...
3,2231378,"The Lemon Drop Kid , a New York City swindler,..."
4,595909,Seventh-day Adventist Church pastor Michael Ch...


In [45]:
meta_data = pd.read_csv('Downloads/MovieSummaries/movie.metadata.tsv' ,
                        sep='\t', header = None)
meta_data.columns = ['Wikipedia_movie_ID', 'Freebase_movie_ID', 'movie_name',
                     'relase_date','box_office_revenue','language','countries','country', 'genres']

meta_data.head()

Unnamed: 0,Wikipedia_movie_ID,Freebase_movie_ID,movie_name,relase_date,box_office_revenue,language,countries,country,genres
0,975900,/m/03vyhn,Ghosts of Mars,2001-08-24,14010832.0,98.0,"{""/m/02h40lc"": ""English Language""}","{""/m/09c7w0"": ""United States of America""}","{""/m/01jfsb"": ""Thriller"", ""/m/06n90"": ""Science..."
1,3196793,/m/08yl5d,Getting Away with Murder: The JonBenét Ramsey ...,2000-02-16,,95.0,"{""/m/02h40lc"": ""English Language""}","{""/m/09c7w0"": ""United States of America""}","{""/m/02n4kr"": ""Mystery"", ""/m/03bxz7"": ""Biograp..."
2,28463795,/m/0crgdbh,Brun bitter,1988,,83.0,"{""/m/05f_3"": ""Norwegian Language""}","{""/m/05b4w"": ""Norway""}","{""/m/0lsxr"": ""Crime Fiction"", ""/m/07s9rl0"": ""D..."
3,9363483,/m/0285_cd,White Of The Eye,1987,,110.0,"{""/m/02h40lc"": ""English Language""}","{""/m/07ssc"": ""United Kingdom""}","{""/m/01jfsb"": ""Thriller"", ""/m/0glj9q"": ""Erotic..."
4,261236,/m/01mrr1,A Woman in Flames,1983,,106.0,"{""/m/04306rv"": ""German Language""}","{""/m/0345h"": ""Germany""}","{""/m/07s9rl0"": ""Drama""}"


In [46]:
# join meta_data and df on Wikipedia_movie_ID
df = pd.merge(df, meta_data[['Wikipedia_movie_ID','relase_date', 'movie_name', 'genres']],
              on = 'Wikipedia_movie_ID')

df.head()

Unnamed: 0,Wikipedia_movie_ID,summary,relase_date,movie_name,genres
0,23890098,"Shlykov, a hard-working taxi driver and Lyosha...",1990-09-07,Taxi Blues,"{""/m/07s9rl0"": ""Drama"", ""/m/03q4nz"": ""World ci..."
1,31186339,The nation of Panem consists of a wealthy Capi...,2012-03-12,The Hunger Games,"{""/m/03btsm8"": ""Action/Adventure"", ""/m/06n90"":..."
2,20663735,Poovalli Induchoodan is sentenced for six yea...,2000,Narasimham,"{""/m/04t36"": ""Musical"", ""/m/02kdv5l"": ""Action""..."
3,2231378,"The Lemon Drop Kid , a New York City swindler,...",1951-03-08,The Lemon Drop Kid,"{""/m/06qm3"": ""Screwball comedy"", ""/m/01z4y"": ""..."
4,595909,Seventh-day Adventist Church pastor Michael Ch...,1988-11-03,A Cry in the Dark,"{""/m/0lsxr"": ""Crime Fiction"", ""/m/07s9rl0"": ""D..."


In [47]:
actors_info =  pd.read_csv('Downloads/MovieSummaries//character.metadata.tsv' ,
                           sep='\t', header = None)
actors_info.columns = ["Wikipedia_movie_ID", "Freebase Movie ID","Release Date",
                       "Character Name", "Actor DOB", "Actor gender", "Actor height",
                       "Actor ethnicity", "Actor Name", "Actor age at movie release", "Freebase character map",
                       "Temp1", "Temp2"]

actors_info.head()

Unnamed: 0,Wikipedia_movie_ID,Freebase Movie ID,Release Date,Character Name,Actor DOB,Actor gender,Actor height,Actor ethnicity,Actor Name,Actor age at movie release,Freebase character map,Temp1,Temp2
0,975900,/m/03vyhn,2001-08-24,Akooshay,1958-08-26,F,1.62,,Wanda De Jesus,42.0,/m/0bgchxw,/m/0bgcj3x,/m/03wcfv7
1,975900,/m/03vyhn,2001-08-24,Lieutenant Melanie Ballard,1974-08-15,F,1.78,/m/044038p,Natasha Henstridge,27.0,/m/0jys3m,/m/0bgchn4,/m/0346l4
2,975900,/m/03vyhn,2001-08-24,Desolation Williams,1969-06-15,M,1.727,/m/0x67,Ice Cube,32.0,/m/0jys3g,/m/0bgchn_,/m/01vw26l
3,975900,/m/03vyhn,2001-08-24,Sgt Jericho Butler,1967-09-12,M,1.75,,Jason Statham,33.0,/m/02vchl6,/m/0bgchnq,/m/034hyc
4,975900,/m/03vyhn,2001-08-24,Bashira Kincaid,1977-09-25,F,1.65,,Clea DuVall,23.0,/m/02vbb3r,/m/0bgchp9,/m/01y9xg


In [48]:
df = pd.merge(df, # join with the list of the actor names of the movie
         actors_info.groupby('Wikipedia_movie_ID')['Actor Name'].agg(list).reset_index()
          , how='left', on='Wikipedia_movie_ID')

df.head()

Unnamed: 0,Wikipedia_movie_ID,summary,relase_date,movie_name,genres,Actor Name
0,23890098,"Shlykov, a hard-working taxi driver and Lyosha...",1990-09-07,Taxi Blues,"{""/m/07s9rl0"": ""Drama"", ""/m/03q4nz"": ""World ci...","[Natalia Koliakanova, Pyotr Mamonov, Hal Singe..."
1,31186339,The nation of Panem consists of a wealthy Capi...,2012-03-12,The Hunger Games,"{""/m/03btsm8"": ""Action/Adventure"", ""/m/06n90"":...","[Jacqueline Emerson, Jennifer Lawrence, Josh H..."
2,20663735,Poovalli Induchoodan is sentenced for six yea...,2000,Narasimham,"{""/m/04t36"": ""Musical"", ""/m/02kdv5l"": ""Action""...","[Thilakan, Sai Kumar, Kalabhavan Mani, nan, Bh..."
3,2231378,"The Lemon Drop Kid , a New York City swindler,...",1951-03-08,The Lemon Drop Kid,"{""/m/06qm3"": ""Screwball comedy"", ""/m/01z4y"": ""...","[Jane Darwell, Bob Hope, Marilyn Maxwell, Ann ..."
4,595909,Seventh-day Adventist Church pastor Michael Ch...,1988-11-03,A Cry in the Dark,"{""/m/0lsxr"": ""Crime Fiction"", ""/m/07s9rl0"": ""D...","[Frank Holden, Sam Neill, Meryl Streep, Deborr..."


In [49]:

def extract_genres(genres):
    genres = list(ast.literal_eval(genres).values())
    genres = [re.split('/|&', genre) for genre in genres]
    flattened_genres = [item.strip() for sublist in genres for item in sublist]
    return list(set(flattened_genres))

df['genres'] = df['genres'].apply(extract_genres)

def expand_summary(summary, movie_name, genres, actors_names):
    str_genres = " the movie has the genres of " + ', '.join(genres)
    str_movie_name = 'the movie ' + movie_name + ' is a movie '

    if isinstance(actors_names, float) and pd.isna(actors_names):
        return str_movie_name+ 'in which ' + summary + str_genres
    else:
        actors_str = ''
        if isinstance(actors_names, list):
            valid_actors = [str(name) for name in actors_names if not pd.isna(name)]
            actors_str = ' of the actors ' + ', '.join(valid_actors) + ' in which '

        return str_movie_name + actors_str.lower() + summary + str_genres


df['expanded_summary'] = df.apply(lambda x: expand_summary(x['summary'],
                                                           x['movie_name'],
                                                           x['genres'],
                                                           x['Actor Name']),axis=1)

In [50]:
df

Unnamed: 0,Wikipedia_movie_ID,summary,relase_date,movie_name,genres,Actor Name,expanded_summary
0,23890098,"Shlykov, a hard-working taxi driver and Lyosha...",1990-09-07,Taxi Blues,"[Drama, World cinema]","[Natalia Koliakanova, Pyotr Mamonov, Hal Singe...",the movie Taxi Blues is a movie of the actors...
1,31186339,The nation of Panem consists of a wealthy Capi...,2012-03-12,The Hunger Games,"[Adventure, Action, Drama, Science Fiction]","[Jacqueline Emerson, Jennifer Lawrence, Josh H...",the movie The Hunger Games is a movie of the ...
2,20663735,Poovalli Induchoodan is sentenced for six yea...,2000,Narasimham,"[Drama, Action, Musical, Bollywood]","[Thilakan, Sai Kumar, Kalabhavan Mani, nan, Bh...",the movie Narasimham is a movie of the actors...
3,2231378,"The Lemon Drop Kid , a New York City swindler,...",1951-03-08,The Lemon Drop Kid,"[Screwball comedy, Comedy]","[Jane Darwell, Bob Hope, Marilyn Maxwell, Ann ...",the movie The Lemon Drop Kid is a movie of th...
4,595909,Seventh-day Adventist Church pastor Michael Ch...,1988-11-03,A Cry in the Dark,"[Drama, Crime Fiction, Courtroom Drama, World ...","[Frank Holden, Sam Neill, Meryl Streep, Deborr...",the movie A Cry in the Dark is a movie of the...
...,...,...,...,...,...,...,...
42199,34808485,"The story is about Reema , a young Muslim scho...",2012-02-17,Oomakkuyil Padumbol,[Children's],[Shankar],the movie Oomakkuyil Padumbol is a movie of t...
42200,1096473,"In 1928 Hollywood, director Leo Andreyev look...",1928,The Last Command,"[Drama, Period piece, Political drama, Black-a...","[Emil Jannings, Evelyn Brent, William Powell, ...",the movie The Last Command is a movie of the ...
42201,35102018,American Luthier focuses on Randy Parsons’ tra...,2011-10-04,Randy Parsons: American Luthier,"[Documentary, Biographical film, Music, Short ...",,the movie Randy Parsons: American Luthier is a...
42202,8628195,"Abdur Rehman Khan , a middle-aged dry fruit se...",1961-12-14,Kabuliwala,[Drama],"[Laxmi, Balraj Sahni, Sonu, Usha Kiran, Padma,...",the movie Kabuliwala is a movie of the actors...
