In [13]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import requests
import json
# from googlesearch import search  # For web search in CRAG

# Load IPL dataset
ipl_data = pd.read_csv("OneDrive/Desktop/matches.csv")

month = ["january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"]
months = {}

for i in range(len(month)):
    months[i+1] = month[i]
# print(months)

from sentence_transformers import SentenceTransformer

# Load pre-trained Sentence-BERT model
model = SentenceTransformer('all-MiniLM-L6-v2') 

# Combine dataset columns for retrieval
ipl_data['context'] = ipl_data.apply(
    lambda x: f"Season: {x['season']}, Date: {x['date']} or {x['date'].split('/')[0]} of {months[int(x['date'].split('/')[1])]} {x['date'].split('/')[2]}, City: {x['city']}, Match: {x['team1']} vs {x['team2']}, "
              f"Toss Winner: {x['toss_winner']}, Decision: {x['toss_decision']}, Result: {x['result']}, Duckworth-Lewis-Stern applied: {x['dl_applied']}, "
              f"Winner: {x['winner']}, Win By Runs: {x['win_by_runs']}, Win By Wickets: {x['win_by_wickets']}, Player of the Match: {x['player_of_match']}, Venue: {x['venue']}, Umpire1: {x['umpire1']}, Umpire2: {x['umpire2']}",
    axis=1
)

# TF-IDF vectorization for retrieval
# vectorizer = TfidfVectorizer()
# tfidf_matrix = vectorizer.fit_transform(ipl_data['context'])
tfidf_matrix = model.encode(ipl_data['context'])

# Function to retrieve relevant context
def retrieve_context(question, k=5):
    question_vec = model.encode([question])
    scores = cosine_similarity(question_vec, tfidf_matrix).flatten()
    top_indices = scores.argsort()[-k:][::-1]
    return ipl_data.iloc[top_indices]['context'].tolist()

# Function to query Phi3 via Ollama API
# def query_phi3(prompt):
#     url = "http://localhost:11434/api/generate"  # Replace with your server URL if using ngrok
#     headers = {"Content-Type": "application/json"}
#     payload = {"model": "phi3", "prompt": prompt}

#     response = requests.post(url, json=payload, headers=headers)
#     if response.status_code == 200:
#         return response.json()["response"]
#     else:
#         return "Error querying Phi3 model."

# def query_phi3(prompt):
#     url = "http://localhost:11434/api/generate/"  # Replace with ngrok URL if applicable
#     headers = {"Content-Type": "application/json"}
#     payload = {"model": "phi3", "prompt": prompt}

#     try:
#         response = requests.post(url, json=payload, headers=headers)
#         response.raise_for_status()  # Raise an error for bad responses
#         return response.json().get("response", "No response from Phi3 model.")
#     except requests.exceptions.RequestException as e:
#         return f"Error querying Phi3 model: {e}"

def query_phi3(prompt):
    url = "http://localhost:11434/api/generate"  # Ensure this is the correct endpoint
    headers = {"Content-Type": "application/json"}
    payload = {"model": "phi3", "prompt": prompt}

    try:
        response = requests.post(url, json=payload, headers=headers)
        response.raise_for_status()  # Handle HTTP errors
        resp = "["
        # print("Raw Response:", response.text) # Log the raw response for debugging)
        for i in response:
            resp += i.decode('utf-8').replace("\n", "").replace("}", "},")
        resp = resp[:-1] + "]"
        resp = json.loads(resp)
        ans = ""
        for i in resp:
            ans += i["response"]
        return ans
    except requests.exceptions.RequestException as e:
        return f"Error querying Phi3 model: {e}"
    except ValueError as e:
        return f"Error parsing JSON response: {e}"


# Function to perform a web search
def web_search(question, num_results=3):
    results = search(question, num_results=num_results)
    return "\n".join(results)

# RAG system function
def answer_question_rag(question):
    # Retrieve context from the dataset
    context = retrieve_context(question)
    print("The context retrieved is: ", context)
    if context:
        full_context = "\n".join(context)
        prompt = f"Using the following IPL dataset context, answer the question:\n\n{full_context}\n\nQuestion: {question}\nAnswer:"
        response = query_phi3(prompt)
        if response.strip().lower() not in ["i don't know", "not found", ""]:
            return response
    return None

# CRAG system function (with web search fallback)
def answer_question_crag(question):
    # Try to answer using the RAG system
    rag_response = answer_question_rag(question)
    if rag_response:
        return rag_response
    
    # If RAG fails, use web search and ask Phi3
    web_results = web_search(question)
    if web_results:
        prompt = f"Using the following web search results, answer the question:\n\n{web_results}\n\nQuestion: {question}\nAnswer:"
        return query_phi3(prompt)
    
    return "I'm sorry, I couldn't find the answer to your question."

question = "In what locations did kolkatta knight riders and chennai super kings play a match in the year 2010?"

# RAG system response
print("Using RAG system...")
rag_answer = answer_question_rag(question)
print("RAG Answer:", rag_answer if rag_answer else "No answer found in the dataset.")

# CRAG system response
# print("\nUsing CRAG system...")
# crag_answer = answer_question_crag(question)
# print("CRAG Answer:", crag_answer)


Using RAG system...
The context retrieved is:  ['Season: 2010, Date: 13/04/10 or 13 of april 10, City: Chennai, Match: Kolkata Knight Riders vs Chennai Super Kings, Toss Winner: Kolkata Knight Riders, Decision: bat, Result: normal, Duckworth-Lewis-Stern applied: 0, Winner: Chennai Super Kings, Win By Runs: 0, Win By Wickets: 9, Player of the Match: R Ashwin, Venue: MA Chidambaram Stadium, Chepauk, Umpire1: SS Hazare, Umpire2: SJA Taufel', 'Season: 2008, Date: 26/04/08 or 26 of april 08, City: Chennai, Match: Kolkata Knight Riders vs Chennai Super Kings, Toss Winner: Kolkata Knight Riders, Decision: bat, Result: normal, Duckworth-Lewis-Stern applied: 0, Winner: Chennai Super Kings, Win By Runs: 0, Win By Wickets: 9, Player of the Match: JDP Oram, Venue: MA Chidambaram Stadium, Chepauk, Umpire1: BF Bowden, Umpire2: AV Jayaprakash', 'Season: 2008, Date: 18/05/08 or 18 of may 08, City: Kolkata, Match: Kolkata Knight Riders vs Chennai Super Kings, Toss Winner: Kolkata Knight Riders, Decisio