research_finder/
├── requirements.txt
├── main.py
└── research_finder/
    ├── __init__.py
    ├── aggregator.py
    ├── exporter.py
    └── searchers/
        ├── __init__.py
        ├── base_searcher.py
        ├── semantic_scholar.py
        ├── arxiv.py
        └── google_scholar.py

Setup and Installation

In [1]:
# Install required libraries if you haven't already
# Note: In a Jupyter environment, you can run shell commands with '!'
!pip install requests pandas feedparser scholarly

# Standard library imports
import logging
from typing import List, Dict, Any

# Setup basic logging to see output from our classes
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

print("Libraries installed and logging configured.")

Libraries installed and logging configured.



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


The Base Class (BaseSearcher)

In [2]:
# research_finder/searchers/base_searcher.py

from abc import ABC, abstractmethod

class BaseSearcher(ABC):
    """Abstract base class for all article searchers."""
    
    def __init__(self, name: str):
        self.name = name
        self.results: List[Dict[str, Any]] = []

    @abstractmethod
    def search(self, query: str, limit: int) -> None:
        """
        Performs a search and populates the self.results list.
        Each result should be a dictionary.
        """
        pass

    def get_results(self) -> List[Dict[str, Any]]:
        """Returns the list of standardized results."""
        return self.results

    def clear_results(self) -> None:
        """Clears the stored results."""
        self.results = []
        
print("BaseSearcher class defined.")

BaseSearcher class defined.


Semantic Scholar Searcher

In [3]:
# research_finder/searchers/semantic_scholar.py

import requests
from IPython.display import display, Markdown # For nice output in notebooks

class SemanticScholarSearcher(BaseSearcher):
    """Searcher for the Semantic Scholar API."""
    
    BASE_URL = "https://api.semanticscholar.org/graph/v1/paper/search"

    def __init__(self):
        super().__init__("Semantic Scholar")
        self.logger = logging.getLogger(self.name)

    def search(self, query: str, limit: int = 10) -> None:
        self.logger.info(f"Searching for: '{query}' with limit {limit}")
        self.clear_results()
        params = {
            'query': query,
            'limit': limit,
            'fields': 'title,authors,year,abstract,url,citationCount,tldr'
        }
        try:
            response = requests.get(self.BASE_URL, params=params)
            response.raise_for_status() # Raises an HTTPError for bad responses (4xx or 5xx)
            data = response.json()
            
            for item in data.get('data', []):
                authors = [author.get('name') for author in item.get('authors', [])]
                abstract = item.get('tldr', {}).get('text') or item.get('abstract')

                paper = {
                    'Title': item.get('title'),
                    'Authors': ', '.join(authors),
                    'Year': item.get('year'),
                    'Abstract': abstract,
                    'URL': item.get('url'),
                    'Source': self.name
                }
                self.results.append(paper)
            self.logger.info(f"Found {len(self.results)} papers.")
        except requests.exceptions.RequestException as e:
            self.logger.error(f"API request failed: {e}")

print("SemanticScholarSearcher class defined.")

SemanticScholarSearcher class defined.


arXiv Searcher

In [4]:
# research_finder/searchers/arxiv.py

import requests
import feedparser

class ArxivSearcher(BaseSearcher):
    """Searcher for the arXiv API."""
    
    BASE_URL = "http://export.arxiv.org/api/query"

    def __init__(self):
        super().__init__("arXiv")
        self.logger = logging.getLogger(self.name)

    def search(self, query: str, limit: int = 10) -> None:
        self.logger.info(f"Searching for: '{query}' with limit {limit}")
        self.clear_results()
        params = {
            'search_query': f'all:"{query}"',
            'start': 0,
            'max_results': limit
        }
        try:
            response = requests.get(self.BASE_URL, params=params)
            response.raise_for_status()
            feed = feedparser.parse(response.content)

            for entry in feed.entries:
                authors = [author.name for author in entry.authors]
                paper = {
                    'Title': entry.title,
                    'Authors': ', '.join(authors),
                    'Year': entry.published.split('-')[0],
                    'Abstract': entry.summary,
                    'URL': entry.link,
                    'Source': self.name
                }
                self.results.append(paper)
            self.logger.info(f"Found {len(self.results)} papers.")
        except requests.exceptions.RequestException as e:
            self.logger.error(f"API request failed: {e}")
        except Exception as e:
            self.logger.error(f"Failed to parse arXiv response: {e}")

print("ArxivSearcher class defined.")

ArxivSearcher class defined.


The Aggregator

In [5]:
# research_finder/aggregator.py

class Aggregator:
    """Aggregates results from multiple searchers."""
    
    def __init__(self):
        self.searchers: List[BaseSearcher] = []
        self.logger = logging.getLogger("Aggregator")

    def add_searcher(self, searcher: BaseSearcher) -> None:
        """Adds a searcher instance to the list."""
        if isinstance(searcher, BaseSearcher):
            self.searchers.append(searcher)
            self.logger.info(f"Added searcher: {searcher.name}")
        else:
            self.logger.error(f"Failed to add searcher: {searcher} is not a valid BaseSearcher instance.")

    def run_all_searches(self, query: str, limit: int) -> List[dict]:
        """
        Runs the search query on all added searchers and returns combined results.
        """
        self.logger.info(f"--- Starting search for '{query}' ---")
        all_results = []
        for searcher in self.searchers:
            try:
                searcher.search(query, limit)
                all_results.extend(searcher.get_results())
            except Exception as e:
                self.logger.error(f"An error occurred with searcher '{searcher.name}': {e}")
        
        self.logger.info(f"--- Search complete. Total results found: {len(all_results)} ---")
        return all_results

print("Aggregator class defined.")

Aggregator class defined.


The Exporter

In [6]:
# research_finder/exporter.py

import pandas as pd

class Exporter:
    """Handles exporting data to various formats."""

    def __init__(self):
        self.logger = logging.getLogger("Exporter")

    def to_csv(self, data: List[Dict[str, Any]], filename: str) -> None:
        """Exports a list of dictionaries to a CSV file."""
        if not data:
            self.logger.warning("No data provided to export.")
            return
        
        try:
            df = pd.DataFrame(data)
            # Ensure a consistent column order
            df = df[['Title', 'Authors', 'Year', 'Source', 'URL', 'Abstract']]
            df.to_csv(filename, index=False, encoding='utf-8')
            self.logger.info(f"Successfully exported {len(data)} results to {filename}")
        except Exception as e:
            self.logger.error(f"Failed to export to CSV: {e}")

print("Exporter class defined.")

Exporter class defined.


Main Execution and Testing

In [7]:
# --- Main Execution Logic ---

# 1. Define your search parameters
SEARCH_QUERY = "quantum computing"
OUTPUT_FILE = "notebook_test_results.csv"
RESULTS_PER_SOURCE = 5

# 2. Initialize the components
aggregator = Aggregator()
exporter = Exporter()

# 3. Add the searchers you want to use
# To debug, you can comment out one of these lines.
aggregator.add_searcher(SemanticScholarSearcher())
aggregator.add_searcher(ArxivSearcher())

# 4. Run the searches
all_articles = aggregator.run_all_searches(SEARCH_QUERY, RESULTS_PER_SOURCE)

# 5. Display a sample of the results in the notebook
if all_articles:
    print("\n--- Sample of Results Found ---")
    df_display = pd.DataFrame(all_articles)
    display(df_display.head()) # Shows first 5 rows
    
    # 6. Export all results to the CSV file
    exporter.to_csv(all_articles, OUTPUT_FILE)
else:
    print("No articles found to export.")


2025-09-30 16:23:27,907 - Aggregator - INFO - Added searcher: Semantic Scholar
2025-09-30 16:23:27,907 - Aggregator - INFO - Added searcher: arXiv
2025-09-30 16:23:27,908 - Aggregator - INFO - --- Starting search for 'quantum computing' ---
2025-09-30 16:23:27,908 - Semantic Scholar - INFO - Searching for: 'quantum computing' with limit 5
2025-09-30 16:23:29,690 - Aggregator - ERROR - An error occurred with searcher 'Semantic Scholar': 'NoneType' object has no attribute 'get'
2025-09-30 16:23:29,690 - arXiv - INFO - Searching for: 'quantum computing' with limit 5
2025-09-30 16:23:30,444 - arXiv - INFO - Found 5 papers.
2025-09-30 16:23:30,445 - Aggregator - INFO - --- Search complete. Total results found: 5 ---



--- Sample of Results Found ---


Unnamed: 0,Title,Authors,Year,Abstract,URL,Source
0,Pulse controlled noise suppressed quantum comp...,"Lu-Ming Duan, Guang-Can Guo",1998,To make arbitrarily accurate quantum computati...,http://arxiv.org/abs/quant-ph/9807072v1,arXiv
1,Unconventional Quantum Computing Devices,Seth Lloyd,2000,This paper investigates a variety of unconvent...,http://arxiv.org/abs/quant-ph/0003151v1,arXiv
2,Photonic Quantum Computers,M. AbuGhanem,2024,In the pursuit of scalable and fault-tolerant ...,http://arxiv.org/abs/2409.08229v1,arXiv
3,Quantum computer and its quasiclassical model,Timur F. Kamalov,2001,Could the theories with hidden variables be em...,http://arxiv.org/abs/quant-ph/0109152v1,arXiv
4,Quantum Computing for Multi Period Asset Alloc...,"Queenie Sun, Nicholas Grablevsky, Huaizhang De...",2024,Portfolio construction has been a long-standin...,http://arxiv.org/abs/2410.11997v1,arXiv


2025-09-30 16:23:30,466 - Exporter - INFO - Successfully exported 5 results to notebook_test_results.csv
