In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!pip install faiss-cpu


Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp310-cp310-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m54.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [3]:
import os
import re
import numpy as np
import pandas as pd
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
import faiss
import torch
from transformers import pipeline

In [4]:
dataset = load_dataset("ag_news", split="train")
df = pd.DataFrame({'text': dataset['text']})

README.md:   0%|          | 0.00/8.07k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

In [5]:
def split_into_passages(text, chunk_size=150):
    words = text.split()
    return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

df['passages'] = df['text'].apply(split_into_passages)
df = df.explode('passages').reset_index(drop=True)

In [6]:
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [7]:
def create_embeddings(texts):
    return embedding_model.encode(texts, convert_to_numpy=True)

embeddings = create_embeddings(df['passages'].tolist())

Batches:   0%|          | 0/3751 [00:00<?, ?it/s]

In [8]:
d = embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(embeddings)

In [9]:
def retrieve_passages(query, top_k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    return df.iloc[indices[0]]['passages'].tolist()

In [10]:
generator = pipeline('text2text-generation', model='google/flan-t5-large')

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Device set to use cuda:0


In [11]:
def generate_answer(query, passages):
    context = '\n'.join(passages)
    prompt = f"Answer the question based on the context.\nQuestion: {query}\nContext: {context}" 
    response = generator(prompt, max_length=200, num_return_sequences=1)
    return response[0]['generated_text']

In [12]:
def rag_pipeline(query):
    print("User: ", query)
    passages = retrieve_passages(query)
    print("\nRetrieved Passages:\n", '\n'.join(passages))
    answer = generate_answer(query, passages)
    print("\nAnswer: ", answer)

In [13]:
rag_pipeline("What caused the 2008 financial crisis?")

User:  What caused the 2008 financial crisis?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Retrieved Passages:
 Weak GM, Ford Sales Spur Production Cuts? (Reuters) Reuters - Hurricane Charley's damaging path\through Florida and the growing ineffectiveness of sales\incentives caused U.S. car and trucks sales to slip in August,\raising the likelihood of costly cuts in vehicle production,\analysts said.
Financier was grim reaper: Market-timing crime profited after 9/11 On September 13, 2001, most Americans were still reeling from the shock of the terrorist attacks on New York and the Pentagon two days before.
Finance: Lessons of the Past Five Years Five years ago, the great bull market of the #39;90s was about to come to a crashing halt, but no one knew that yet. In fact, many investors were still buying as if the gains would continue forever.

Answer:  Market-timing crime


In [16]:
import evaluate

# Load metrics
exact_match_metric = evaluate.load("exact_match")
f1_metric = evaluate.load("f1")

Downloading builder script:   0%|          | 0.00/5.67k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.79k [00:00<?, ?B/s]

In [15]:
!pip install evaluate


Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [21]:
!pip install rouge-score


Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=2be2f4e08cff22c885b2e1dc0abce5e4f207aa18401052145722e555eaa31c95
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [22]:
import evaluate

rouge = evaluate.load("rouge")

def evaluate_model(predictions, references):
    results = rouge.compute(predictions=predictions, references=references)
    print("ROUGE Scores:", results)

# Example prediction and reference
evaluate_model(
    ["The financial crisis was caused by risk-taking and regulatory failure."],
    ["The 2008 financial crisis was caused by excessive risk-taking and the failure of regulatory oversight."]
)


ROUGE Scores: {'rouge1': 0.8148148148148148, 'rouge2': 0.48, 'rougeL': 0.7407407407407406, 'rougeLsum': 0.7407407407407406}


In [23]:
import evaluate

# Load BLEU metric
bleu = evaluate.load("bleu")

def evaluate_bleu(predictions, references):
    results = bleu.compute(predictions=predictions, references=[[ref] for ref in references])
    print(f"BLEU Score: {results['bleu']}")
    
# Example Prediction and Reference
evaluate_bleu(
    ["The financial crisis was caused by risk-taking and regulatory failure."],
    ["The 2008 financial crisis was caused by excessive risk-taking and the failure of regulatory oversight."]
)


Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

BLEU Score: 0.2867745499592883


In [26]:
import re

def normalize_text(text):
    text = text.lower().strip()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text

def evaluate_normalized_exact_match(predictions, references):
    normalized_predictions = [normalize_text(p) for p in predictions]
    normalized_references = [normalize_text(r) for r in references]
    exact_match_metric = evaluate.load("exact_match")
    results = exact_match_metric.compute(predictions=normalized_predictions, references=normalized_references)
    print(f"Exact Match Score (Normalized): {results['exact_match']}")

# Example
evaluate_normalized_exact_match(
    ["The financial crisis was caused by risk-taking and regulatory failure."],
    ["The financial crisis was caused by excessive risk-taking and the failure of regulatory oversight."]
)



Exact Match Score (Normalized): 0.0
