In [1]:
%load_ext dotenv
%dotenv

In [None]:
import pandas as pd
from openai import OpenAI


In [None]:
DEEPSEEK_BASE_URL = 'https://corellm.wb.ru/deepseek/v1'
DEEPSEEK_MODEL_NAME = 'DeepSeek-R1'

In [None]:
import os

RWB_CORELLM_TOKEN = os.getenv("RWB_CORELLM_TOKEN")
SOURCE_ARTICLE_NAME = 'Exploring ChatGPT and its impact on society MA Haque, S Li'

In [9]:
df = pd.read_csv('pdf_extracted_text_with_digests.csv')

df

Unnamed: 0,result_id,pdf_content,content_length,has_error,DeepSeek-R1_digest
0,VdBeSlL9_TMJ,**What do Learners See in ChatGPT?**\n\n\n**Ch...,330705,False,"The article references ""Exploring ChatGPT and ..."
1,tEdhwBtai2UJ,Volume 6 Issue 4 Year 2025 Pages 755-765\nISSN...,54918,False,"However, some previous studies also identify c..."
2,JYbhWHRjZR8J,"# **Knowledge Building, Multimodal Digital Sto...",495882,False,The article references the source 'Exploring C...
3,vvWK674pjjMJ,Digital Policy Hub ‚Äî Working Paper\n\n# Digita...,38979,False,The article references 'Exploring ChatGPT and ...
4,X0Vwc_5HcwcJ,# **From Boundary Object to Boundary Vision:**...,516407,False,"The article ""Exploring ChatGPT and its impact ..."
5,Vh3K8GLTg9sJ,Zeitschrift f√ºr Arbeitswissenschaft\n\n[https:...,59655,False,"The article references ""Haque und Li 2024"" in ..."
6,BBM06Azg7dsJ,## **OPEN**\n\n\n\n[www.nature.com/scientificr...,84634,False,"The article references ""Exploring ChatGPT and ..."
7,8GGORR0MaSsJ,"OPEN ACCESS\n\n\nEDITED BY\n\nMeishan Zhang,\n...",120205,False,"Based on the provided article, the text that r..."
8,upmOB6l0mqcJ,Postdigital Science and Education\nhttps://doi...,87427,False,The article references 'Exploring ChatGPT and ...
9,L0bsiVYsSXAJ,"OPEN ACCESS\n\n\nEDITED BY\n\nSirje Virkus,\nT...",135047,False,The article references 'Exploring ChatGPT and ...


In [10]:
valid_digests_mask = (
    df[digest_col].notna() & 
    ~df[digest_col].str.contains('No references found', na=False) & 
    ~df[digest_col].str.contains('Error after', na=False) &
    (df[digest_col].str.strip() != '')
)

valid_digests_df = df[valid_digests_mask].copy()

print(f"Found {len(valid_digests_df)} valid digests with references")

if len(valid_digests_df) > 0:
    all_digests_text = "\n\n--- DIGEST SEPARATOR ---\n\n".join(valid_digests_df[digest_col].tolist())

else:
    print("‚ùå No valid digests found to analyze")
    all_digests_text = ""


üîç EXTRACTING VALID DIGESTS
Found 29 valid digests with references


In [12]:
if len(valid_digests_df) > 0 and all_digests_text.strip():
    client = OpenAI(
        api_key=RWB_CORELLM_TOKEN,
        base_url=DEEPSEEK_BASE_URL,
    )

    summary_prompt = f"""
You are an expert academic researcher. I will provide you with multiple text excerpts from various academic papers that all reference the same source article: "{SOURCE_ARTICLE_NAME}".

Your task is to create a comprehensive summary about this source article based on how it is referenced and discussed across these different papers.

Instructions:
1. Analyze all the provided excerpts to understand how the source article is being referenced
2. Identify the main themes, topics, and contributions of the source article
3. Note how different papers interpret or build upon the source article
4. Create a coherent summary that synthesizes the information from all references
5. Focus on what the source article appears to be about based on these references

Here are the excerpts from papers that reference "{SOURCE_ARTICLE_NAME}":

{all_digests_text}

Please provide a comprehensive summary of the source article based on these references.
"""

    try:
        
        chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "You are an expert academic researcher specializing in synthesizing information from multiple sources to create comprehensive summaries."
                },
                {
                    "role": "user",
                    "content": summary_prompt
                }
            ],
            model=DEEPSEEK_MODEL_NAME,
            stream=False,
            temperature=0.3
        )
        
        comprehensive_summary = chat_completion.choices[0].message.content

        print("SUMMARY")
        print(comprehensive_summary)
        
    except Exception as e:
        print(f"‚ùå Error generating summary: {str(e)}")
        comprehensive_summary = f"Error generating summary: {str(e)}"
        
else:
    print("‚ùå Cannot generate summary - no valid digests available")
    comprehensive_summary = "No valid digests found to generate summary"


SUMMARY
Based on analysis of 30+ referencing excerpts across diverse academic papers, the source article **"Exploring ChatGPT and its impact on society" (Haque & Li, 2024)** appears to be a comprehensive interdisciplinary analysis addressing the following key themes:

### Core Themes and Contributions
1. **Environmental Impact**  
   - **Energy Consumption**: Extensively cited for quantifying ChatGPT's massive energy demands, including:
     - Millions in electricity costs for training GPT-3
     - Significant carbon emissions from non-renewable energy sources
     - Per-query energy costs scaling with user adoption
   - **Sustainability Concerns**: Highlighted as a critical critique of AI scalability, emphasizing trade-offs between capability growth and ecological consequences.

2. **Labor Market Disruption**  
   - **Job Displacement**: Central to discussions on automation risks, with estimates that ChatGPT could substitute "up to one-fourth of current work" (linked to Goldman Sachs 