# IT will take the column names and values from the question
# it will take use of OPEN AI models to prepare the query
# it will ask the database with the prepared query
# the response will also be converted back to the Description

# Installing dependencies

In [1]:
!pip install langchain langchain-experimental openai pymysql



# Importing Necessary Packages

In [1]:
from langchain.utilities import SQLDatabase
from langchain.llms import OpenAI
from langchain_experimental.sql import SQLDatabaseChain
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import HumanMessagePromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage

In [2]:
OPENAI_API_KEY = "Your API Key"
llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)

  llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)


# Database Setup

In [3]:
def retrieve_from_db(query: str) -> str:
    db_context = db_chain(query)
    db_context = db_context['result'].strip()
    return db_context

In [7]:
host = 'localhost'
port = '3306'
username = 'root'
# password = rekubannu
database_schema = 'mydb'
mysql_uri = f"mysql+pymysql://{username}@{host}:{port}/{database_schema}"
# mysql_uri = f"mysql+pymysql://{username:rekubannu}@{host}:{port}/{database_schema}"
db = SQLDatabase.from_uri(mysql_uri, include_tables=['telugumovies_dataset'],sample_rows_in_table_info=2)
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)

In [8]:
def generate(query: str) -> str:
    db_context = retrieve_from_db(query)
    
    system_message = """You are a movie database expert specializing in Telugu cinema.
        Your task is to answer users' questions by providing relevant information from a database of Telugu movies.
        The database contains the following information for each movie:
        - Name of the movie
        - Year of release
        - Certificate given by the censor board
        - Movie genre
        - A brief description or plot of the movie
        - Duration in minutes
        - IMDb Rating
        - Number of people who rated the movie
        
        Example:
        
        Input:
        Which Telugu movies released in 2021 have the highest IMDb ratings?
        
        Context:
        The Telugu movies released in 2021 with the highest IMDb ratings are:
        1. Movie A - IMDb Rating: 8.5
        2. Movie B - IMDb Rating: 8.3
        
        Output:
        The highest-rated Telugu movies released in 2021 are Movie A (IMDb Rating: 8.5) and Movie B (IMDb Rating: 8.3).
        """
    
    human_qry_template = HumanMessagePromptTemplate.from_template(
        """Input:
        {human_input}
        
        Context:
        {db_context}
        
        Output:
        """
    )
    messages = [
      SystemMessage(content=system_message),
      human_qry_template.format(human_input=query, db_context=db_context)
    ]
    response = llm(messages).content
    return response

# Ask the queries

In [9]:
generate("How many movies are there?")

  db_context = db_chain(query)




[1m> Entering new SQLDatabaseChain chain...[0m
How many movies are there?
SQLQuery:[32;1m[1;3mSELECT COUNT(`ID`) AS TotalMovies FROM telugumovies_dataset;[0m
SQLResult: [33;1m[1;3m[(1404,)][0m
Answer:[32;1m[1;3mThere are 1404 movies in the dataset.[0m
[1m> Finished chain.[0m


  response = llm(messages).content


'There are 1404 movies in the dataset.'

In [10]:
generate("How many movies are there with Action Genre?")



[1m> Entering new SQLDatabaseChain chain...[0m
How many movies are there with Action Genre?
SQLQuery:[32;1m[1;3mSELECT COUNT(*) AS TotalMovies
FROM telugumovies_dataset
WHERE `Genre` LIKE '%Action%';[0m
SQLResult: [33;1m[1;3m[(634,)][0m
Answer:[32;1m[1;3mThere are 634 movies with the Action genre.[0m
[1m> Finished chain.[0m


'There are 634 movies with the Action genre.'

In [11]:
generate("How many movies in year 2015?")



[1m> Entering new SQLDatabaseChain chain...[0m
How many movies in year 2015?
SQLQuery:[32;1m[1;3mSELECT COUNT(`ID`) AS `TotalMovies2015` FROM telugumovies_dataset WHERE `Year` = '2015';[0m
SQLResult: [33;1m[1;3m[(61,)][0m
Answer:[32;1m[1;3mTotal number of movies in the year 2015 is 61.[0m
[1m> Finished chain.[0m


'There were a total of 61 movies released in the year 2015.'

In [12]:
generate("which movies having UA certificate?")



[1m> Entering new SQLDatabaseChain chain...[0m
which movies having UA certificate?
SQLQuery:[32;1m[1;3mSELECT `movie`, `Year`, `Certificate` 
FROM telugumovies_dataset 
WHERE `Certificate` = 'UA';[0m
SQLResult: [33;1m[1;3m[('Bahubali: The Beginning', '2015', 'UA'), ('Baahubali 2: The Conclusion', '2017', 'UA'), ('1 - Nenokkadine', '2014', 'UA'), ('Dhoom:3', '2013', 'UA'), ('Dhoom:2', '2006', 'UA'), ('Eega', '2012', 'UA'), ('Rangasthalam', '2018', 'UA'), ('War', '2019', 'UA'), ('Bharat Ane Nenu', '2018', 'UA'), ('Saaho', '2019', 'UA'), ('Dookudu', '2011', 'UA'), ('Sarkar', '2018', 'UA'), ('The Ghazi Attack', '2017', 'UA'), ('Srimanthudu', '2015', 'UA'), ('Veer - Vivegam', '2017', 'UA'), ('Manam', '', 'UA'), ('Bigil', '2019', 'UA'), ('Geetha Govindam', '2018', 'UA'), ('Spyder', '2017', 'UA'), ('Nannaku Prematho', '2016', 'UA'), ('Dabangg 3', '2019', 'UA'), ('Race Gurram', '2014', 'UA'), ('Khaleja', '2010', 'UA'), ('Yennai Arindhaal', '2015', 'UA'), ('Goodachari', '2018', 'UA'), 

'The Telugu movies with a \'UA\' certificate in the dataset include "Bahubali: The Beginning", "Baahubali 2: The Conclusion", "Rangasthalam", "Saaho", "Sye Raa Narasimha Reddy", and many more.'

In [13]:
generate("which movie have the highest rating?")



[1m> Entering new SQLDatabaseChain chain...[0m
which movie have the highest rating?
SQLQuery:[32;1m[1;3mSELECT `movie`, `Rating` 
FROM telugumovies_dataset 
ORDER BY `Rating` DESC 
LIMIT 1;[0m
SQLResult: [33;1m[1;3m[('Amaran', Decimal('9.8'))][0m
Answer:[32;1m[1;3mThe movie with the highest rating is "Amaran" with a rating of 9.8.[0m
[1m> Finished chain.[0m


'The movie with the highest rating is "Amaran" with a rating of 9.8.'

In [14]:
generate("give me all movies names ?")



[1m> Entering new SQLDatabaseChain chain...[0m
give me all movies names ?
SQLQuery:[32;1m[1;3mSELECT `movie` FROM telugumovies_dataset;[0m
SQLResult: [33;1m[1;3m[('Bahubali: The Beginning',), ('Baahubali 2: The Conclusion',), ('1 - Nenokkadine',), ('Dhoom:3',), ('Dhoom:2',), ('Eega',), ('Krrish 3',), ('Arjun Reddy',), ('Rangasthalam',), ('Magadheera',), ('War',), ('Bharat Ane Nenu',), ('Saaho',), ('Theri',), ('Dookudu',), ('Pokiri',), ('Sarkar',), ('Athadu',), ('The Ghazi Attack',), ('Kabali',), ('MSG: The Messenger of God',), ('Nanban',), ('Srimanthudu',), ('Veer - Vivegam',), ('Billa 2',), ('Manam',), ('7 Aum Arivu',), ('Bigil',), ('Business Man',), ('Geetha Govindam',), ('Mahanati',), ('Spyder',), ('Nannaku Prematho',), ('Dabangg 3',), ('MSG 2 the Messenger',), ('Manikarnika: The Queen of Jhansi',), ('Race Gurram',), ('Okkadu',), ('Bommarillu',), ('Atharintiki Daaredi',), ('Khaleja',), ('Yennai Arindhaal',), ('Thalaivaa',), ('Kaala',), ('Bairavaa',), ('Goodachari',), ('Puli

'The movies are:\n1. Bahubali: The Beginning\n2. Baahubali 2: The Conclusion\n3. 1 - Nenokkadine\n4. Dhoom:3\n5. Dhoom:2'

In [15]:
generate("which movies comes under the thriller genre?")



[1m> Entering new SQLDatabaseChain chain...[0m
which movies comes under the thriller genre?
SQLQuery:[32;1m[1;3mSELECT `movie`, `Year`, `Rating`
FROM telugumovies_dataset
WHERE `Genre` LIKE '%Thriller%'
LIMIT 5;[0m
SQLResult: [33;1m[1;3m[('1 - Nenokkadine', '2014', Decimal('8.1')), ('Dhoom:3', '2013', Decimal('5.4')), ('Dhoom:2', '2006', Decimal('6.5')), ('War', '2019', Decimal('6.5')), ('Saaho', '2019', Decimal('5.2'))][0m
Answer:[32;1m[1;3m1 - Nenokkadine, Dhoom:3, Dhoom:2, War, Saaho[0m
[1m> Finished chain.[0m


'The Telugu movie "1 - Nenokkadine" falls under the thriller genre.'

In [16]:
generate("what is the best overview of the among all movies?")



[1m> Entering new SQLDatabaseChain chain...[0m
what is the best overview of the among all movies?
SQLQuery:[32;1m[1;3mSELECT `movie`, `Overview` 
FROM telugumovies_dataset 
ORDER BY `Rating` DESC 
LIMIT 1;[0m
SQLResult: [33;1m[1;3m[('Amaran', 'action-packed film inspired by the life of Major Mukund Varadarajan')][0m
Answer:[32;1m[1;3mThe best overview among all movies is "action-packed film inspired by the life of Major Mukund Varadarajan" from the movie "Amaran".[0m
[1m> Finished chain.[0m


'The best overview among all movies is "action-packed film inspired by the life of Major Mukund Varadarajan" from the movie "Amaran".'

In [20]:
generate("how many columns are there and their names ?")



[1m> Entering new SQLDatabaseChain chain...[0m
how many columns are there and their names ?
SQLQuery:[32;1m[1;3mSELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'telugumovies_dataset';[0m
SQLResult: [33;1m[1;3m[('ID',), ('movie',), ('Year',), ('Certificate',), ('Genre',), ('Overview',), ('Runtime',), ('Rating',), ('No.of.Ratings',)][0m
Answer:[32;1m[1;3mThere are 9 columns in the "telugumovies_dataset" table. Their names are: ID, movie, Year, Certificate, Genre, Overview, Runtime, Rating, No.of.Ratings.[0m
[1m> Finished chain.[0m


'There are 9 columns in the "telugumovies_dataset" table. Their names are: ID, movie, Year, Certificate, Genre, Overview, Runtime, Rating, No.of.Ratings.'

In [21]:
generate("how many rows are there in the given dataset ?")



[1m> Entering new SQLDatabaseChain chain...[0m
how many rows are there in the given dataset ?
SQLQuery:[32;1m[1;3mSELECT COUNT(*) AS total_rows FROM telugumovies_dataset;[0m
SQLResult: [33;1m[1;3m[(1404,)][0m
Answer:[32;1m[1;3mThere are 1404 rows in the given dataset.[0m
[1m> Finished chain.[0m


'There are 1404 rows in the given dataset.'

In [None]:
generate("In 1400 row which movie is there  and provide me  thier rating,genre,year,overview?")



[1m> Entering new SQLDatabaseChain chain...[0m
in 1400 row which movie is there  and provide me  thier rating,genre,year,overview?
SQLQuery:[32;1m[1;3mSELECT `movie`, `Rating`, `Genre`, `Year`, `Overview` 
FROM telugumovies_dataset
LIMIT 1400, 1;[0m
SQLResult: [33;1m[1;3m[('Hi Nanna', Decimal('9.0'), 'Action', '2024', 'action-packed film')][0m
Answer:[32;1m[1;3mMovie: Hi Nanna
Rating: 9.0
Genre: Action
Year: 2024
Overview: action-packed film[0m
[1m> Finished chain.[0m


'In row 1400, the movie "Hi Nanna" is listed with the following details:\n- Rating: 9.0\n- Genre: Action\n- Year: 2024\n- Overview: An action-packed film'

In [25]:
generate("life is beautiful movie present which row ?")



[1m> Entering new SQLDatabaseChain chain...[0m
life is beautiful movie present which row ?
SQLQuery:[32;1m[1;3mSELECT * 
FROM telugumovies_dataset 
WHERE movie = 'Life is Beautiful'[0m
SQLResult: [33;1m[1;3m[(411, 'Life Is Beautiful', '2012', 'U', 'Drama, Romance            ', '    It is a coming of age story of six youngsters, set in the beautiful nostalgic world of a working-middle class neighborhood. The film tracks their journey through the different seasons, ...                See full summary\xa0»', 167, Decimal('6.4'), 671)][0m
Answer:[32;1m[1;3mThe movie "Life is Beautiful" is present in the row with ID 411 in the telugumovies_dataset table.[0m
[1m> Finished chain.[0m


'The movie "Life is Beautiful" is present in row 411 of the telugumovies_dataset table.'

In [30]:
generate("what is the same thing among all movies ?")



[1m> Entering new SQLDatabaseChain chain...[0m
what is the same thing among all movies ?
SQLQuery:[32;1m[1;3mSELECT DISTINCT `Certificate`, `Genre` FROM telugumovies_dataset;[0m
SQLResult: [33;1m[1;3m[('UA', 'Action, Drama            '), ('UA', 'Action, Thriller            '), ('UA', 'Action, Fantasy            '), ('U', 'Action, Sci-Fi            '), ('A', 'Action, Drama, Romance            '), ('A', 'Action, Drama, Fantasy            '), ('U', 'Action, Drama            '), ('UA', 'Action, Comedy            '), ('A', 'Action, Thriller            '), ('U', 'Action, Thriller            '), ('UA', 'Action, Thriller, War            '), ('U', 'Action, Crime, Drama            '), ('U', 'Action, Comedy, Drama            '), ('U', 'Comedy, Drama            '), ('UA', 'Comedy, Drama, Fantasy            '), ('U', 'Action, Sci-Fi, Thriller            '), ('UA', 'Action, Drama, Sport            '), ('A', 'Action, Crime            '), ('UA', 'Drama, Romance            '), ('U', 'Biography

'The common thing among all movies is the combination of `Certificate` and `Genre`.'

In [None]:
generate("arjun reddy movie row and its overview?")



[1m> Entering new SQLDatabaseChain chain...[0m
arjun reddy movie row  and its overview?
SQLQuery:[32;1m[1;3mSELECT `ID`, `movie`, `Overview` 
FROM telugumovies_dataset 
WHERE movie = 'Arjun Reddy'
LIMIT 1;[0m
SQLResult: [33;1m[1;3m[(8, 'Arjun Reddy', '    Arjun Reddy, a short tempered house surgeon gets used to drugs and drinks when his girlfriend is forced to marry another person.')][0m
Answer:[32;1m[1;3mArjun Reddy movie row and its overview:
Arjun Reddy, a short tempered house surgeon gets used to drugs and drinks when his girlfriend is forced to marry another person.[0m
[1m> Finished chain.[0m


'The movie "Arjun Reddy" is about a short-tempered house surgeon who turns to drugs and alcohol after his girlfriend is forced to marry someone else.'