# Coreference

In [None]:
from pycorenlp import StanfordCoreNLP

nlp = StanfordCoreNLP('http://localhost:9010')


def resolve_coreferences(corenlp_output):
    """
    Resolves coreferences by replacing pronouns/nouns with their antecedents in the CoreNLP output.
    """
    for coref in corenlp_output['corefs']:
        mentions = corenlp_output['corefs'][coref]
        antecedent = mentions[0]  # The antecedent is the first mention in the coreference chain
        for mention in mentions[1:]:
            if mention['type'] == 'PRONOMINAL':
                # Get the sentence and token index for the pronoun
                target_sentence = mention['sentNum']
                target_token = mention['startIndex'] - 1
                # Replace the pronoun with the antecedent's text
                corenlp_output['sentences'][target_sentence - 1]['tokens'][target_token]['word'] = antecedent['text']


def get_resolved_text(corenlp_output):
    """
    Reconstructs the text from the resolved CoreNLP output.
    """
    resolved_text = []
    possessives = ['hers', 'his', 'their', 'theirs']
    for sentence in corenlp_output['sentences']:
        for token in sentence['tokens']:
            output_word = token['word']
            if token['lemma'] in possessives or token['pos'] == 'PRP$':
                output_word += "'s"
            output_word += token['after']
            resolved_text.append(output_word)
    return "".join(resolved_text)


def rewrite_query(conversational_history, current_question):
    """
    Rewrites the current question using the conversational history by resolving coreferences.
    """
    # Combine conversational history and current question
    conversation = " ".join(conversational_history + [current_question])

    # Annotate the conversation using CoreNLP
    output = nlp.annotate(conversation, properties={
        'annotators': 'dcoref',
        'outputFormat': 'json',
        'ner.useSUTime': 'false'
    })

    # Resolve coreferences
    resolve_coreferences(output)

    # Extract the rewritten question (last sentence in the conversation)
    resolved_conversation = get_resolved_text(output)
    rewritten_question = resolved_conversation.split(".")[-2] + "."

    return resolved_conversation.strip()


# Conversational history and current question
conversational_history = [
    "What are the names of all the movies released in 2000?",
    "Show me the names of all actors who acted in movies directed by Steven Spielberg."
]
current_question = "Show me all the movies directed by him in that year."

# Rewrite the query
rewritten_question = rewrite_query(conversational_history, current_question)

print("Rewritten Question:", rewritten_question)


Rewritten Question: What are the names of all the movies released in 2000? Show me the names of all actors who acted in movies directed by Steven Spielberg. Show me all the movies directed by Steven Spielberg in that year.


In [None]:
# Conversational history and current question
conversational_history = [
    "How many movies did Netflix produce?",
    "How many actors from China have acted in movie  Rush Hour 3?."
]
current_question = "Show me all the movies  directed by the director of that movie."

# Rewrite the query
rewritten_question = rewrite_query(conversational_history, current_question)

print("Rewritten Question:", rewritten_question)

Rewritten Question: How many movies did Netflix produce? How many actors from China have acted in movie  Rush Hour 3?. Show me all the movies  directed by the director of that movie.


#Context Tracking:

In [None]:
import spacy

# Load SpaCy's language model
nlp = spacy.load("en_core_web_sm")

class ContextTracker:
    def __init__(self):
        self.context = {"entities": {}, "relationships": {}, "temporal": None, "directors": [], "years": []}

    def update_context(self, query):
        """Extract and update context from the query."""
        doc = nlp(query)

        # Extract entities (people, locations, etc.)
        for ent in doc.ents:
            if ent.label_ == "DATE":  # Handle year explicitly
                self.context["years"].append(ent.text)
            else:
                self.context["entities"][ent.text] = ent.label_

        # Extract temporal references (e.g., "this year")
        for token in doc:
            if token.pos_ == "NUM" or "year" in token.text.lower():
                if token.text.lower() not in self.context["years"]:
                    self.context["years"].append(token.text)

        # Extract movie directors or actors (if mentioned)
        for token in doc:
            if token.pos_ == "PROPN":  # Proper noun, could be a person (director/actor)
                self.context["directors"].append(token.text)

        return doc

    def resolve_query(self, query):
        """Resolve context-dependent parts of the query."""
        doc = nlp(query)
        resolved_query = query

        # Replace pronouns with stored entities
        for token in doc:
            if token.pos_ == "PRON" and token.text.lower() in ["him", "her", "it", "that", "this"]:
                # Use the most relevant entity from context (the last one if available)
                last_entity = list(self.context["entities"].keys())[-1] if self.context["entities"] else "unknown"
                resolved_query = resolved_query.replace(token.text, last_entity)

        # Replace temporal references (e.g., "that year", "year")
        for token in doc:
            if token.text.lower() == "year" and self.context["years"]:
                resolved_query = resolved_query.replace(token.text, self.context["years"][-1])

        # Replace director references (e.g., "director", "him", etc.) with the most recent director
        for token in doc:
            if token.text.lower() == "director" and self.context["directors"]:
                resolved_query = resolved_query.replace(token.text, " ".join(self.context["directors"]))

        return resolved_query


# Initialize the context tracker
tracker = ContextTracker()

# Dialogue simulation
queries = [
    "What are the names of all the movies released in 2000?",
    "Show me the names of all actors who acted in movies directed by Steven Spielberg.",
    "Show me all the movies directed by him in that year.",
]

# Process each query
for query in queries:
    tracker.update_context(query)  # Update the context with the query
    resolved_query = tracker.resolve_query(query)  # Resolve the query using the context
    print(f"Original Query: {query}")
    print(f"Resolved Query: {resolved_query}")
    print("Context:", tracker.context)
    print("-" * 50)


Original Query: What are the names of all the movies released in 2000?
Resolved Query: What are the names of all the movies released in 2000?
Context: {'entities': {}, 'relationships': {}, 'temporal': None, 'directors': [], 'years': ['2000']}
--------------------------------------------------
Original Query: Show me the names of all actors who acted in movies directed by Steven Spielberg.
Resolved Query: Show me the names of all actors who acted in movies directed by Steven Spielberg.
Context: {'entities': {'Steven Spielberg': 'PERSON'}, 'relationships': {}, 'temporal': None, 'directors': ['Steven', 'Spielberg'], 'years': ['2000']}
--------------------------------------------------
Original Query: Show me all the movies directed by him in that year.
Resolved Query: Show me all the movies directed by Steven Spielberg in that year.
Context: {'entities': {'Steven Spielberg': 'PERSON'}, 'relationships': {}, 'temporal': None, 'directors': ['Steven', 'Spielberg'], 'years': ['2000', 'that yea

In [None]:
queries = [
    "How many movies did production house Netflix produce?",
    "How many actors from China have acted in movie Rush Hour 3?.",
    "Show me all the movies  directed by the director of above movie."

]
for query in queries:
    tracker.update_context(query)  # Update the context with the query
    resolved_query = tracker.resolve_query(query)  # Resolve the query using the context
    print(f"Original Query: {query}")
    print(f"Resolved Query: {resolved_query}")
    print("Context:", tracker.context)
    print("-" * 50)

Original Query: How many movies did production house Netflix produce?
Resolved Query: How many movies did production house Netflix produce?
Context: {'entities': {'Steven Spielberg': 'PERSON', 'Netflix': 'GPE'}, 'relationships': {}, 'temporal': None, 'directors': ['Steven', 'Spielberg', 'Netflix'], 'years': ['2000', 'that year', 'year']}
--------------------------------------------------
Original Query: How many actors from China have acted in movie Rush Hour 3?.
Resolved Query: How many actors from China have acted in movie Rush Hour 3?.
Context: {'entities': {'Steven Spielberg': 'PERSON', 'Netflix': 'GPE', 'China': 'GPE', 'Rush Hour 3': 'WORK_OF_ART'}, 'relationships': {}, 'temporal': None, 'directors': ['Steven', 'Spielberg', 'Netflix', 'China', 'Rush', 'Hour'], 'years': ['2000', 'that year', 'year', '3']}
--------------------------------------------------
Original Query: Show me all the movies  directed by the director of above movie.
Resolved Query: Show me all the movies  directe

In [None]:
  !pip install -U -q "google-generativeai>=0.7.2"

In [None]:
import google.generativeai as genai
from google.colab import userdata

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
def get_model_resp(prompt):
    try:
      model = genai.GenerativeModel('models/gemini-1.5-flash')
      response = model.generate_content(
          prompt,
          generation_config = genai.GenerationConfig(

              temperature=0,
          )
      )
      return response.text
    except Exception as e:
      print(e)
      return False


response = get_model_resp("Explain how AI works")
if response:

  print(response)

Artificial intelligence (AI) is a broad field encompassing many techniques, but at its core, it aims to create systems that can perform tasks that typically require human intelligence.  These tasks include learning, reasoning, problem-solving, perception, and natural language understanding.  There's no single "how it works" answer, as different AI approaches use different methods. However, we can break it down into key concepts:

**1. Data is King:**  AI systems learn from data.  The more relevant and high-quality data they are trained on, the better they perform. This data can be anything from images and text to sensor readings and financial transactions.

**2. Algorithms are the Engine:**  Algorithms are sets of rules and statistical techniques that AI systems use to process data and learn patterns.  These algorithms are the "brains" of the system, determining how it analyzes information and makes decisions.  Different algorithms are suited for different tasks.

**3. Learning Paradig

In [None]:

def get_convo_prompt(history,current_question):
  prompt = f'''Role: You are a very advanced reasoning agent who understands the context of the conversation and can answer the questions based on the previous chain of question and answers.

  Goal:
  Given the previous chain of question and answers and current question.you need to decide if the current question is dependent on context/data/answer of previous question or not. To do this you need to think step by step and understand the context of the conversation.
  you should log your step by step thinking on the </thoughts> section of output.

  If yes then you need to reformulate the current question by including the context/data points/data points in answers/questions of previous conversational history in the current question and output the reformulated question. Follow below mentioned guidelines to do 50
  1) If current question is related to previous context, You should not always add context/data points/data points in answers/questions of previous conversational history in the current question. You should first decidde if current question is self contained with respect to information present in it or not.I
  2) If current question is self contained wrt to information and no information is to be added from previous conversational history then output the question as it is in </question section of output. Record reasoning behind this decision in </thoughts> section of output.
  3) If current question is not self contained wrt to information and needs information is to be added from previous conversational history, then reformulate the current question by including the data points in answers/questions of previous conversational history and output the reformulated question in </question> section of output.

  If the current question is not dependent on context/data/answer of previous question then you should output the current question as it is in </questions> section of
  output.

  100%

  Input Guidelines: You will be given previous chain of questions and answers as "History" and current question as "current_question

  Output guidelines:
    -Do not add any extra information from previous conversational history to reformalute the current question if it is not needed.
    -Your reformulated question is going to be executed on TEXT2SQL system and hence formulate question accordingly.Reformulated question should not contain any clarifying extra information or extra context information from the conversation history other than most important required information.
    -Please always respond with a valid well-formed JSON object with the following format.
    -Do not include newline charecters in output. Dont include word json in output.
      {{
        thoughts: You shoould output the reasoning/thoughts to arrive at the decision of the current question is dependent on context/data/answer of previous question or not and also reasoning/thoughts of reformulating the current question if it is dependent on context/data/answer of previous question.
        question:
      }}

  History: {history}
  current question: {current_question}
  Output:

  '''
  return prompt

In [None]:
conversational_history = [
    "What are the names of all the movies released in 2000?",
    "Show me the names of all actors who acted in movies directed by Steven Spielberg."
]
current_question = "Show me all the movies directed by him in that year."
import json
prompt= get_convo_prompt(conversational_history,current_question)
resp = get_model_resp(prompt)
resp = json.loads(resp)


In [None]:

print("Rewritten Question:",resp["question"])
print("Thoughts:",resp["thoughts"])

Rewritten Question: Show me all the movies directed by Steven Spielberg that were released in 2000.
Thoughts: The current question "Show me all the movies directed by him in that year." is dependent on the previous questions.  "Him" refers to Steven Spielberg from the second question, and "that year" refers to 2000 from the first question.  Therefore, the question needs to be reformulated to include this context.


In [None]:
conversational_history = [
    "How many movies did production house Netflix produce?",
    "How many actors from China have acted in movie Rush Hour 3?."
]
current_question = "Show me all the movies  directed by the director of above movie."


prompt= get_convo_prompt(conversational_history,current_question)
resp = get_model_resp(prompt)
resp = get_model_resp(prompt)
resp = json.loads(resp)
print("Rewritten Question:",resp["question"])
print("Thoughts:",resp["thoughts"])

Rewritten Question: Show me all the movies directed by the director of Rush Hour 3.
Thoughts: The current question "Show me all the movies directed by the director of above movie" is dependent on the previous question "How many actors from China have acted in movie Rush Hour 3?" because "above movie" refers to Rush Hour 3.  Therefore, the question needs to be reformulated to include this context.
