In [None]:
%load_ext dotenv
%dotenv

: 

In [27]:
import os

SERPAPI_API_KEY = os.getenv("SERPAPI_API_KEY")

In [28]:
from serpapi import GoogleSearch
params = {
    "engine": "google_scholar",
    "q": "Exploring ChatGPT and its impact on society MA Haque, S Li",
    "api_key": SERPAPI_API_KEY
}

search = GoogleSearch(params)
results = search.get_dict()
organic_results = results.get("organic_results", [])

print(f"Found {len(organic_results)} results")
print("\n" + "="*80 + "\n")

for i, result in enumerate(organic_results[:5], 1):  # Show first 5 results
    print(f"Result {i}:")
    print(f"Title: {result.get('title', 'N/A')}")
    print(f"Authors: {result.get('publication_info', {}).get('authors', 'N/A')}")
    print(f"Year: {result.get('year', 'N/A')}")
    print(f"Link: {result.get('link', 'N/A')}")
    print(f"Snippet: {result.get('snippet', 'N/A')}")
    print("-" * 60)


Found 1 results


Result 1:
Title: Exploring ChatGPT and its impact on society
Authors: [{'name': 'MA Haque', 'link': 'https://scholar.google.com/citations?user=FRrXc0wAAAAJ&hl=en&oi=sra', 'serpapi_scholar_link': 'https://serpapi.com/search.json?author_id=FRrXc0wAAAAJ&engine=google_scholar_author&hl=en', 'author_id': 'FRrXc0wAAAAJ'}, {'name': 'S Li', 'link': 'https://scholar.google.com/citations?user=H8UOWqoAAAAJ&hl=en&oi=sra', 'serpapi_scholar_link': 'https://serpapi.com/search.json?author_id=H8UOWqoAAAAJ&engine=google_scholar_author&hl=en', 'author_id': 'H8UOWqoAAAAJ'}]
Year: N/A
Link: https://link.springer.com/article/10.1007/s43681-024-00435-4
Snippet: Artificial intelligence has been around for a while, but suddenly it has received more attention than ever before. Thanks to innovations from companies like Google, Microsoft, Meta, and other major brands in technology. OpenAI, though, has triggered the button with its ground-breaking invention—“ChatGPT”. ChatGPT is a Large Language 

In [29]:
paper_to_analyze  = organic_results[0]
print(paper_to_analyze)

{'position': 1, 'title': 'Exploring ChatGPT and its impact on society', 'result_id': 'NBzk5ix8knYJ', 'link': 'https://link.springer.com/article/10.1007/s43681-024-00435-4', 'snippet': 'Artificial intelligence has been around for a while, but suddenly it has received more attention than ever before. Thanks to innovations from companies like Google, Microsoft, Meta, and other major brands in technology. OpenAI, though, has triggered the button with its ground-breaking invention—“ChatGPT”. ChatGPT is a Large Language Model (LLM) based on Transformer architecture that has the ability to generate human-like responses in a conversational context. It uses deep learning algorithms to generate natural language responses to input text. Its large number of parameters, contextual generation, and open-domain training make it a versatile and effective tool for a wide range of applications, from chatbots to customer service to language translation. It has the potential to revolutionize various indust

In [23]:
paper_to_analyze_pdf_url = ''
for resource in paper_to_analyze.get('resources', []):
    if resource.get('file_format', '').lower() == 'pdf':
        paper_to_analyze_pdf_url = resource.get('link', '')
        break

print(paper_to_analyze_pdf_url)

https://arxiv.org/pdf/2403.14643?


In [49]:
import pandas as pd
from serpapi import GoogleSearch
import math

def get_cites_page(cites_id: str, start: int = 0, num: int = 20):
    params = {
        "engine": "google_scholar",
        "cites": cites_id,
        "api_key": SERPAPI_API_KEY,
        "num": num,
        "start": start,
    }
    search = GoogleSearch(params)
    cite_results = search.get_dict()

    citing_papers = cite_results.get("organic_results", []) or []
    total_results = (cite_results.get("search_information", {}) or {}).get("total_results", 0)

    rows = []
    for paper in citing_papers:
        pdf_url = ""
        for resource in paper.get("resources", []) or []:
            # SerpApi exposes file format/type for resources like PDFs
            fmt = (resource.get("file_format") or resource.get("type") or "").lower()
            if fmt == "pdf":
                pdf_url = resource.get("link", "")
                break

        pub_info = paper.get("publication_info", {}) or {}
        inline_links = paper.get("inline_links", {}) or {}
        cited_by = inline_links.get("cited_by", {}) or {}

        rows.append({
            "title": paper.get("title"),
            "result_id": paper.get("result_id"),
            "link": paper.get("link"),
            "snippet": paper.get("snippet"),
            "publication_summary": pub_info.get("summary"),
            "cites_id": cited_by.get("cites_id"),
            "pdf_url": pdf_url,
        })

    df = pd.DataFrame(rows)
    return df, int(total_results)

def get_all_cites(cites_id: str, per_page: int = 20, max_results: int | None = None):
    # First page to learn total_results
    first_df, total = get_cites_page(cites_id, start=0, num=per_page)
    if max_results is not None:
        total = min(total, max_results)

    pages = 1 if total <= per_page else math.ceil(total / per_page)
    dfs = [first_df]

    fetched = len(first_df)
    start = per_page
    while fetched < total:
        remaining = total - fetched
        num = per_page if remaining > per_page else remaining
        page_df, _ = get_cites_page(cites_id, start=start, num=num)
        if page_df.empty:
            break
        dfs.append(page_df)
        fetched += len(page_df)
        start += num

    all_df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame(columns=[
        "title","result_id","link","snippet","publication_summary","cites_id","pdf_url"
    ])
    return all_df, total

In [50]:
cites_id = paper_to_analyze.get("inline_links", {}).get("cited_by", {}).get("cites_id", "")
df_all, total_results = get_all_cites(cites_id, per_page=20, max_results=None)
print("Total citing results:", total_results)
df_all.head()


Total citing results: 68


Unnamed: 0,title,result_id,link,snippet,publication_summary,cites_id,pdf_url
0,ChatGPT: Perspectives from human–computer inte...,L0bsiVYsSXAJ,https://www.frontiersin.org/articles/10.3389/f...,The release of GPT-4 has garnered widespread a...,"J Liu - Frontiers in Artificial Intelligence, ...",8091046955740579375,https://www.frontiersin.org/journals/artificia...
1,The sociolinguistic foundations of language mo...,8GGORR0MaSsJ,https://www.frontiersin.org/journals/artificia...,"In this article, we introduce a sociolinguisti...","J Grieve, S Bartl, M Fuoli, J Grafmiller… - Fr...",3128044736046064112,https://www.frontiersin.org/journals/artificia...
2,Co-creation in action: Bridging the knowledge ...,PiJOikFaq3EJ,https://www.sciencedirect.com/science/article/...,The increasing significance of artificial inte...,"EI Yuwono, D Tjondronegoro, C Riverola… - Comp...",8190739584843129406,
3,"Exploring the influence, implications and chal...",uSS0SsDihNAJ,https://www.emerald.com/insight/content/doi/10...,Purpose This paper aims to investigate the ben...,"MI Alhusban, IN Khatatbeh… - … Review: An Inte...",15025383572329735353,
4,Exploring the paradoxical use of ChatGPT in ed...,Eh8UqkqYDRoJ,https://www.sciencedirect.com/science/article/...,ChatGPT's impact on education is both signific...,"WL Hsu, ADK Silalahi - Computers and Education...",1877324066131812114,


In [51]:
len(df_all)

68

In [52]:
df_all.to_csv("cites.csv", index=False)