## tweet generator

In [4]:
import glob
import os

import docx2txt
import openai
import pandas as pd
import PyPDF2
from tqdm import tqdm

# assumes your openai key is set as an environment variable
openai.api_key = os.getenv("OPENAI_API_KEY")


In [31]:
def read_pdf(pdf_file):
    
    content = ""
    n_pages = 0

    # creating a pdf file object
    with open(pdf_file, "rb") as pdf:

        # creating a pdf reader object
        reader = PyPDF2.PdfReader(pdf)
        
        for page in reader.pages:
            
            page_content = page.extract_text()

            if "References" in page_content:
                content += page_content
                break
                
            else:
                content += page_content
                
            n_pages += 1
            
    print(n_pages)
                
    return content


### Extract content from example publication

In [51]:
# example publication
example_one_file = "/Users/d3y010/Desktop/tweet_generator/s41558-023-01616-5.pdf"

# Yan et al paper
target_paper = "/Users/d3y010/Desktop/tweet_generator/s41597-023-02049-7.pdf"


### Prompt engineering

In [52]:
# setting the system scope
system_scope = """You are a technical science editor who specializes in social media content creation.  You are constructing twitter highlight thread from recent publications."""


### Zero shot

In [60]:

text = read_pdf(target_paper)[:10000]


prompt = """
- Generate five tweets that summarize key findings in the publication.
- Tweets should consider how the findings relates to MultiSector Dynamics.
- MultiSector Dynamics is defined as the study of how complex Earth, environmental, infrastructure, governance, and socioeconomic systems coevolve in response to current and rapidly changing influences and stressors. MSD is a transdisciplinary research area that seeks to advance our understanding of how human-Earth system feedback shapes interdependent pathways of societal change across scales and uncertainties. These insights provide a basis for advancing a more resilient, adaptive, and sustainable society.

###
PROMPT: {0}

"""

messages=[{"role": "system",
           "content": system_scope},
          {"role": "user",
           "content": prompt.format(text)}]


response = openai.ChatCompletion.create(
    model="gpt-4",
    max_tokens=1000,
    temperature=0.5,
    messages=messages)

thread = response["choices"][0]["message"]["content"].split("\n")

thread


['Tweet 1:',
 "🌊 New study characterizes the uncertainty in the Community Land Model version 5's hydrological applications in the US. These insights will help improve drought and flood vulnerability assessments. #MultiSectorDynamics #ClimateChange",
 '',
 'Tweet 2:',
 '💧 Researchers use 5 meteorological datasets to evaluate the uncertainty in hydrological parameters of the Community Land Model version 5, a key tool for simulating the terrestrial system. #EarthSystemModel #WaterResources',
 '',
 'Tweet 3:',
 '🌐 The study provides a benchmark dataset of CLM5 default hydrological performance, parameter sensitivities for 28 hydrological metrics, and large-ensemble outputs for CLM5 hydrological predictions. #Hydrology #ClimateModel',
 '',
 'Tweet 4:',
 "🔍 The 28 error metrics in the study offer a diagnostic evaluation of CLM5's hydrological predictability, supporting a wide range of applications such as flood and drought prediction, and reservoir management. #WaterManagement #Resilience",
 

### Single shot

In [None]:
example_text = read_pdf(example_one_file)[:4000]

text = read_pdf(target_paper)[:8000]


prompt = """
- Generate five tweets that summarize key findings in the publication.
- Tweets should consider how the findings relates to MultiSector Dynamics.
- MultiSector Dynamics is defined as the study of how complex Earth, environmental, infrastructure, governance, and socioeconomic systems coevolve in response to current and rapidly changing influences and stressors. MSD is a transdisciplinary research area that seeks to advance our understanding of how human-Earth system feedback shapes interdependent pathways of societal change across scales and uncertainties. These insights provide a basis for advancing a more resilient, adaptive, and sustainable society.

The following are example prompts with appropriate responses:

PROMPT: {0}
RESPONSE: 
[Tweet 1/5] Adaptation planning & #MultiSectorDynamics research require an understanding of how local flood risks will evolve over time given global sea level rise. Hermans et al. develop new tools for providing decision-relevant info in Nature Climate Change (see thread)
[Tweet 2/5] The new paper by Hermans et al. provides decision-relevant info by estimating the global sea level rise that would lead to different local risk amplification factors (e.g., a 10x or 100x decrease in return period), plus uncertainty in these estimates.
[Tweet 3/5] The authors also incorporate globally-variable estimates of design specifications for infrastructure like seawalls and dykes, in order to ground the changing risk profiles in local context and understand when adaptation might be necessary.
[Tweet 4/5] This research could improve infrastructure adaptation by providing planners with locally-tailored and time-evolving estimates of changing flood risk profiles. It could also help #MultiSectorDynamics researchers improve the fidelity of risk management decisions in their models.
[Tweet 5/5] For more details, see the Nature Climate Change News & Views highlight written by David Johnson.

###
PROMPT: {1}

"""

messages=[{"role": "system",
           "content": system_scope},
          {"role": "user",
           "content": prompt.format(example_text, text)}]


response = openai.ChatCompletion.create(
    model="gpt-4",
    max_tokens=1000,
    temperature=0.5,
    messages=messages)

thread = response["choices"][0]["message"]["content"].split("\n")

thread
