In [1]:
import sqlite3
import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime
import re
import time
import torch
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

2025-04-26 17:02:20.530840: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-26 17:02:20.553311: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745667140.581033   27714 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745667140.590670   27714 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1745667140.613059   27714 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [None]:
import os

os.mkdir("data")
os.system("touch data/wikipedia_cache.db")
os.system("touch data/scholar_cache.db")

0

In [2]:
def get_db_connection(db_path):
    return sqlite3.connect(db_path)

In [None]:
class WikiRetriever:
    def __init__(self, db_path="data/wikipedia_cache.db"):
        self.db_path = db_path
        self._init_db()
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        })

    def _init_db(self):
        try:
            conn = get_db_connection(self.db_path)
            c = conn.cursor()
            c.execute('''
                CREATE TABLE IF NOT EXISTS articles (
                    title TEXT PRIMARY KEY,
                    sections TEXT,
                    retrieved_at TIMESTAMP
                )
            ''')
            conn.commit()
            conn.close()
        except sqlite3.Error as e:
            raise Exception(f"Database error: {e}")

    def retrieve(self, query, max_results=3):
        titles = self._search_titles(query, max_results)
        if not titles:
            print("No titles found for the query.")
            return [], []

        # Sequential article fetching
        articles = []
        sources = []
        for title in titles:
            sections = self._get_or_fetch_article(title)
            if sections:
                articles.append({'title': title, 'sections': sections})
                sources.append(title)

        return articles, sources

    def _get_or_fetch_article(self, title):
        sections = self._get_cached(title)
        if not sections:
            sections = self._fetch_and_cache(title)
        return sections

    def _search_titles(self, query, limit):
        params = {'action':'query','list':'search','srsearch':query,'format':'json','srlimit':limit}
        try:
            resp = self.session.get('https://en.wikipedia.org/w/api.php', params=params, timeout=15)
            if resp.status_code == 200:
                data = resp.json()
                search_results = data.get('query', {}).get('search', [])
                titles = [item['title'] for item in search_results]
                return titles
            else:
                # print(f"Error fetching titles: HTTP {resp.status_code}")
                return []
        except Exception as e:
            # print(f"Error fetching titles: {str(e)}")
            return []

    def _get_cached(self, title):
        try:
            conn = get_db_connection(self.db_path)
            c = conn.cursor()
            c.execute("SELECT sections FROM articles WHERE title=?", (title,))
            row = c.fetchone()
            conn.close()
            if row:
                return json.loads(row[0])
            return None
        except Exception as e:
            # print(f"Error retrieving from cache: {str(e)}")
            return None

    def _fetch_and_cache(self, title):
        try:
            url = f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"

            resp = self.session.get(url, timeout=15)
            if resp.status_code == 200:
                html = resp.text
                soup = BeautifulSoup(html, 'html.parser')
                content = soup.find(id='mw-content-text')
                if not content:
                    return None

                sections = {}
                current = 'Introduction'
                sections[current] = ''

                for el in content.find_all(['h2','h3','p','ul','ol']):
                    # Skip navigation or sidebar elements
                    if 'infobox' in el.get('class', []) or 'navbox' in el.get('class', []):
                        continue

                    if el.name.startswith('h'):
                        h = el.find(class_='mw-headline')
                        if h:
                            current = h.text.strip()
                            sections[current] = ''
                    elif el.name == 'p' and el.text.strip():
                        # Remove references
                        for sup in el.find_all('sup', class_='reference'):
                            sup.decompose()
                        sections[current] += el.text.strip() + ' '
                    elif el.name in ['ul','ol']:
                        for li in el.find_all('li', recursive=False):
                            for sup in li.find_all('sup', class_='reference'):
                                sup.decompose()
                            sections[current] += '• ' + li.text.strip() + '\n'

                # Store in cache
                self._save_to_cache(title, sections)
                return sections
            else:
                return None
        except Exception as e:
            return None

    def _save_to_cache(self, title, sections):
        """Helper method to save to DB"""
        try:
            conn = get_db_connection(self.db_path)
            c = conn.cursor()
            c.execute(
                "INSERT OR REPLACE INTO articles VALUES (?, ?, ?)",
                (title, json.dumps(sections), datetime.now().isoformat())
            )
            conn.commit()
            conn.close()
        except sqlite3.Error as e:
            print(f"Error caching article {title}: {str(e)}")

    def close(self):
        """Close resources"""
        if self.session:
            self.session.close()
            self.session = None


In [None]:
class ScholarRetriever:
    """Simplified retrieval module for fetching academic papers from Google Scholar"""
    def __init__(self, db_path="data/scholar_cache.db"):
        self.db_path = db_path
        self._init_db()
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        })
        self.request_delay = 5  # seconds between requests

    def _init_db(self):
        """Initialize the cache database"""
        try:
            conn = sqlite3.connect(self.db_path)
            c = conn.cursor()
            c.execute('''
                CREATE TABLE IF NOT EXISTS papers (
                    title TEXT PRIMARY KEY,
                    abstract TEXT,
                    authors TEXT,
                    year TEXT,
                    url TEXT,
                    citation_count INTEGER,
                    retrieved_at TIMESTAMP
                )
            ''')
            c.execute('''
                CREATE TABLE IF NOT EXISTS queries (
                    query TEXT PRIMARY KEY,
                    paper_titles TEXT,
                    retrieved_at TIMESTAMP
                )
            ''')
            conn.commit()
            conn.close()
        except sqlite3.Error as e:
            print(f"Database error: {e}")

    def retrieve(self, query, max_results=3):
        """Main method to retrieve papers for a query"""
        # Only try cached results for simplicity and reliability
        papers = self._get_cached_papers_for_query(query, max_results)

        if not papers:
            # Try a more aggressive keyword search in the cache
            papers = self._keyword_search_cache(query, max_results)

        sources = [f"{p['title']} ({p['year']})" for p in papers]
        return papers, sources

    def _get_cached_papers_for_query(self, query, limit):
        """Try to find cached papers for a query"""
        try:
            conn = sqlite3.connect(self.db_path)
            c = conn.cursor()

            # Look for exact query match first
            c.execute("SELECT paper_titles FROM queries WHERE query=?", (query,))
            row = c.fetchone()

            if row:
                paper_titles = json.loads(row[0])
                papers = []

                for title in paper_titles:
                    c.execute("SELECT title, abstract, authors, year, url, citation_count FROM papers WHERE title=?", (title,))
                    paper_row = c.fetchone()
                    if paper_row:
                        papers.append({
                            'title': paper_row[0],
                            'abstract': paper_row[1],
                            'authors': paper_row[2],
                            'year': paper_row[3],
                            'url': paper_row[4],
                            'citation_count': paper_row[5]
                        })

                conn.close()
                return papers[:limit]

            conn.close()
            return []

        except Exception as e:
            print(f"Error retrieving from cache: {str(e)}")
            return []

    def _keyword_search_cache(self, query, limit):
        """Search for papers in cache by keywords"""
        try:
            conn = sqlite3.connect(self.db_path)
            c = conn.cursor()

            # Extract keywords
            keywords = [k for k in query.lower().split() if len(k) > 2]
            papers = []
            seen_titles = set()

            # Search for each keyword
            for keyword in keywords:
                keyword_query = f"%{keyword}%"
                c.execute(
                    "SELECT title, abstract, authors, year, url, citation_count FROM papers " +
                    "WHERE lower(title) LIKE ? LIMIT ?",
                    (keyword_query, limit)
                )

                for row in c.fetchall():
                    if row[0] not in seen_titles and len(papers) < limit:
                        papers.append({
                            'title': row[0],
                            'abstract': row[1],
                            'authors': row[2],
                            'year': row[3],
                            'url': row[4],
                            'citation_count': row[5]
                        })
                        seen_titles.add(row[0])

                if len(papers) >= limit:
                    break

            conn.close()
            return papers

        except Exception as e:
            print(f"Error searching cache: {str(e)}")
            return []

    def close(self):
        """Close resources"""
        if self.session:
            self.session.close()
            self.session = None

In [5]:
class LLMGenerator:
    """Generation module with maximum input and output token windows."""
    def __init__(
        self,
        model_name: str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        max_context_tokens: int = None
    ):
        print(f"Loading model {model_name}")

        # Determine device
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {self.device}")

        try:
            # Load tokenizer and model
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            self.model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.float16 if self.device == "cuda" else None,
                device_map="auto" if self.device == "cuda" else None
            )

            # Determine model maximum context window
            # model_max = getattr(self.tokenizer, 'model_max_length', None)
            # if model_max is None or model_max < 0:
            #     model_max = 65536
            # Set both context and generation sizes to the model max
            self.max_context_tokens = 1024
            self.max_new_tokens = self.max_context_tokens
            print(f"Using max_context_tokens = max_new_tokens = {self.max_context_tokens}")

            # Initialize text-generation pipeline
            self.pipeline = pipeline(
                "text-generation",
                model=self.model,
                tokenizer=self.tokenizer,
                max_new_tokens=self.max_new_tokens,
                do_sample=True,
                temperature=0.3,
                device_map="auto" if self.device == "cuda" else -1
            )
            print("Model and pipeline initialized successfully")

        except Exception as e:
            print(f"Error loading model: {e}")
            self.pipeline = None

    def _build_context(self, retrieved):
        """Build context from retrieved documents"""
        context = ""
        for item in retrieved:
            context += f"Document: {item['title']}\n"

            if isinstance(item.get('sections'), dict):
                for section_name, section_text in item['sections'].items():
                    # Skip empty sections
                    if not section_text.strip():
                        continue
                    context += f"Section: {section_name}\n{section_text}\n\n"
            else:
                # For items without explicit sections
                context += f"Content: {str(item)}\n\n"

        return context

    def generate(self, question: str, retrieved: list) -> str:
        """Generate an answer"""
        if not retrieved:
            return "No relevant information found to answer the question."
        if not self.pipeline:
            return self._fallback_generate(question, retrieved)

        # Build context
        context = self._build_context(retrieved)

        # Tokenize
        tokens = self.tokenizer.encode(
            context,
            truncation=True,
            max_length=self.max_context_tokens
        )

        context = self.tokenizer.decode(tokens, skip_special_tokens=True)

        # Build prompt
        prompt = (
            f"Answer this question based on the provided context:\n\n"
            f"Context:\n{context}\n\n"
            f"Question: {question}\n\n"
            f"Answer:"
        )

        try:
            output = self.pipeline(
                prompt,
                max_new_tokens=self.max_new_tokens,
                temperature=0.3
            )[0]['generated_text']

            # Extract answer after 'Answer:'
            match = re.search(r'Answer:(.*?)(?:Question:|$)', output, re.DOTALL)
            if match:
                return match.group(1).strip()
            return self._fallback_generate(question, retrieved)

        except Exception as e:
            print(f"Error during generation: {e}")
            return self._fallback_generate(question, retrieved)

    def _fallback_generate(self, question: str, retrieved: list) -> str:
        """Return a simple fallback answer if pipeline is unavailable."""
        answer = f"Based on the available information about '{question}':\n\n"
        for idx, art in enumerate(retrieved[:2], start=1):
            snippet = ''
            if isinstance(art.get('sections'), dict):
                sec_name, sec_text = next(iter(art['sections'].items()))
                snippet = sec_text[:200] + '...' if len(sec_text) > 200 else sec_text
            else:
                abstract = art.get('abstract', '')
                snippet = abstract[:200] + '...' if len(abstract) > 200 else abstract
            answer += f"[{idx}] {snippet}\n\n"
        return answer

In [6]:
class RAGSystem:
    """Synchronous RAG system combining retrieval and generation"""
    def __init__(self):
        self.wiki_retriever = WikiRetriever()
        self.scholar_retriever = ScholarRetriever()
        self.generator = LLMGenerator()

    def answer(self, question, use_wiki=True, use_scholar=True, max_wiki=2, max_scholar=2):
        """Generate an answer to a question using retrieved context"""
        retrieved = []
        sources = []

        # Get Wikipedia results if requested
        if use_wiki:
            wiki_articles, wiki_sources = self.wiki_retriever.retrieve(question, max_wiki)
            retrieved.extend(wiki_articles)
            sources.extend(wiki_sources)

        # Get Scholar results if requested
        if use_scholar:
            scholar_papers, scholar_sources = self.scholar_retriever.retrieve(question, max_scholar)

            # Format papers
            for paper in scholar_papers:
                retrieved.append({
                    'title': paper['title'],
                    'sections': {
                        'Abstract': paper.get('abstract', 'No abstract available.'),
                        'Authors': paper.get('authors', 'Unknown authors.'),
                        'Year': paper.get('year', 'Unknown year.')
                    }
                })
            sources.extend(scholar_sources)

        if not retrieved:
            return {
                'answer': "No relevant documents found for the query.",
                'sources': []
            }

        # Generate answer
        answer_text = self.generator.generate(question, retrieved)

        return {
            'answer': answer_text,
            'sources': sources
        }

    def close(self):
        """Close all resources"""
        self.wiki_retriever.close()
        self.scholar_retriever.close()

In [7]:
rag = RAGSystem()

Loading model TinyLlama/TinyLlama-1.1B-Chat-v1.0
Using device: cuda


Device set to use cuda:0


Using max_context_tokens = max_new_tokens = 1024
Model and pipeline initialized successfully


In [8]:
q1 = "what is machine learning"
q2 = "What is the difference between supervised and unsupervised learning?"
q3 = "What is the Turing test?"
q4 = "What is the difference between AI and ML?"
q5 = "What is the difference between AI and AGI?"

questions = [q1, q2, q3, q4, q5]

for question in questions:
    print(f"Question: {question}")
    result = rag.answer(question, use_wiki=True, use_scholar=True, max_wiki=2, max_scholar=0)
    print(f"Answer: {result['answer']}")

Question: whar is machine learning
Answer: Machine learning is a subset of artificial intelligence that involves the use of algorithms and data to learn from experience and make predictions. It is a rapidly growing field with many applications in fields such as healthcare, finance, and marketing.
Question: What is the difference between supervised and unsupervised learning?
Answer: Supervised learning is a type of learning where the model is trained on a labeled dataset. In contrast, unsupervised learning is a type of learning where the model is trained on unlabeled data without any ground-truth labels. Supervised learning involves training a model on a labeled dataset, while unsupervised learning involves training a model on unlabeled data without any ground-truth labels.
Question: What is the Turing test?
Answer: The Turing test is a test of a machine's ability to exhibit intelligent behavior equivalent to that of a human. The test was introduced by Alan Turing in his 1950 paper "Com

In [11]:
while True:
    input_text = input("Enter Question (type exit to exit): ")
    if input_text.lower() == 'exit':
        break
    result = rag.answer(input_text, max_wiki=2, max_scholar=2)
    print(f"Question: {input_text}")
    print(f"Answer: {result['answer']}")

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Question: what is logistic regression
Answer: Logistic regression is a statistical model used to predict the probability of an outcome based on a set of independent variables. It is a special case of linear regression, where the dependent variable is a binary or categorical variable, and the independent variables are binary or ordinal variables. The logistic model is a generalization of the linear model, where the dependent variable is a binary or categorical variable, and the independent variables are binary or ordinal variables. The logistic model is used to make a classifier, where the output is a probability of the dependent variable taking on a particular value. The logistic model is used to model the probability of an outcome based on a set of independent variables. The logistic model is used in a wide range of applications, including classification, regression, and modeling.
