#### Run the notebook in the given order

In [None]:
#libraries
import openai
import pandas as pd
import requests
from requests.auth import HTTPBasicAuth
import time
import re

In [None]:
model = "gpt-3.5-turbo"
#insert your own GPT API key
openai.api_key = "Insert key"

In [None]:
#old sandbox
#base_url = "http://sneed-vm/jina/journals/api/"
base_url = "http://sneed-dev/jina/journals/api/"

In [None]:
#auth = HTTPBasicAuth('username', 'password')
#Bea's temporary info
auth = HTTPBasicAuth('lubangji@msu.edu', 'GoWhite1')

In [None]:
default_prompt = "Would you classify the following abstract as being relevant to nuclear astrophysics? Reply yse or no. Abstract:"

tags_prompt = """Given this list of tags, please only return the corresponding numbers that you think matches the text.
[
    'Abundances': 13,
    'Astrophysics Theory /Model': 19,
    'Codes, algorithms': 31,
    'Cosmo Chronology': 22,
    'Experimental Equipment': 28,
    'Nuclear Data-EXPT': 1,
    'Nuclear-Particle THEORY': 5,
    'Observation': 16,
    'Observational data analysis': 25,
    'S-factors, Reaction Rates, masses': 9,
    'AGB stars': 17,
    'Big Bang / cosmology': 2,
    'Black Holes': 40,
    'Core Collapse SN': 26,
    'Dwarf Galaxies': 36,
    'Evolving Stars': 14,
    'First stars': 6,
    'Gamma Ray Bursters': 29,
    'Globular clusters': 42,
    'Milky Way Galaxy': 44,
    'Neutron / Compact Stars': 38,
    'Neutron Star Mergers': 34,
    'Novae': 23,
    'Other galaxies': 46,
    'The Sun': 10,
    'Type Ia SN': 32,
    'White dwarfs': 47,
    'x-ray binaries': 20,
    'Chemical evolution': 24,
    'Cosmic rays': 33,
    'E-capture, beta decay B(GT), B(L=1)': 21,
    'Equation of state': 43,
    'Galactic Evolution / Structure': 27,
    'Gamma ray emission': 39,
    'Gravitational Waves': 30,
    'Hydrogen burning': 41,
    'i-Process': 197,
    'Magnetic fields': 37,
    'Meteoric grains': 45,
    'Neutrino cross sections, processes': 18,
    'p-Process': 15,
    'r-process': 11,
    'Rotation': 35,
    'rp-Process': 7,
    's-Process': 3,
    'Books, Book reviews': 12,
    'Popular audience': 8,
    'Reviews': 4,
    'Undecided': 207,
]
Here is the title or abstract:
"""

In [None]:
#JINA VJ API Functions
def get_import_dates():

    url = base_url + "GetImportDates.php"
    response = requests.get(url, auth=auth)
    if response.ok:
        return response.json()["Results"]
    return None

def get_external_journals(import_date):

    url = base_url + "GetExternalJournalsForImportDate.php"
    params = {"articleImportDate": import_date}
    response = requests.get(url, params=params, auth=auth)
    if response.ok:
        return response.json()["Results"]
    return None

def get_articles(import_date, journal_id):

    url = base_url + "GetArticles.php"
    params = {"articleImportDate": import_date, "extJournalID": journal_id}
    response = requests.get(url, params=params, auth=auth)
    if response.ok:
        return response.json()["Results"]
    return None

def get_tags(journal_id):

    url = base_url + "GetTagsAndCategories.php"
    params = {"journalID": journal_id}
    response = requests.get(url, params=params, auth=auth)
    if response.ok:
        return response.json()["Results"]
    return None

def add_article_tag(journal_id, article_id, tag_id):

    url = base_url + "AddArticleTag.php"
    params = {"journalID": journal_id, "articleID": article_id, "tagID": tag_id}
    response = requests.get(url, params=params, auth=auth)
    return response.ok

def mark_external_journal_as_reviewed(ext_journal_id, import_date):

    url = base_url + "MarkExternalJournalAsReviewed.php"
    params = {"extJournalID": ext_journal_id, "articleImportDate": import_date}
    response = requests.get(url, params=params, auth=auth)
    return response.ok


def delete_article_tag(journal_id, article_id, tag_id):
    
    url = base_url + "DeleteArticleTag.php"
    params = {"journalID": journal_id, "articleID": article_id, "tagID": tag_id}
    response = requests.get(url, params=params, auth=auth)
    return response.ok

In [None]:
#function to call Chat GPT API
max_retries = 10

def make_openai_request(prompt):

    retries = 0
    while retries < max_retries:
        try:
            response = openai.ChatCompletion.create(
                model=model,
                messages=[{"role": "user", "content": prompt}]
            )
            time.sleep(3)
            return response
        except requests.exceptions.Timeout:
            retries += 1
            time.sleep(10)

In [None]:
#function for parsing numbers from Chat GPT's answer to tags prompt
def extract_numbers_from_string(input_string):

    numbers_list = re.findall(r'\d+', input_string)
    return numbers_list

In [None]:
#function for looping for the articles and prompting Chat GPT
def fetch_articles_and_process_tags(ext_journal_ids, latest_import_date):

    for ext_journal_id in ext_journal_ids:
        articles = get_articles(latest_import_date, ext_journal_id)
        
        if not articles:
            continue
        
        for article in articles:
            article_abstract = article.get('articleAbstract', '')
            article_title = article.get('articleTitle', '')

            is_abstract_available = article_abstract and article_abstract.strip()

            if is_abstract_available:
                first_prompt = default_prompt + article_abstract
                second_prompt = tags_prompt + article_abstract
            else:
                first_prompt = default_prompt + article_title
                second_prompt = tags_prompt + article_title

            response = make_openai_request(first_prompt)
            if "Yes" in response['choices'][0]['message']['content']:
                tags_response = make_openai_request(second_prompt)
                tag_ids = extract_and_process_tags(article, tags_response, ext_journal_id)

        mark_external_journal_as_reviewed(ext_journal_id, latest_import_date)


In [None]:
#adding info to tag_info and adding the article tags
def extract_and_process_tags(article, tags_response, ext_journal_id):
    
    tag_ids_list = extract_numbers_from_string(tags_response['choices'][0]['message']['content'])
    tag_ids = [int(tag_id) for tag_id in tag_ids_list]
    
    if not tag_ids:
        tag_ids = [207]
        
    for tag_id in tag_ids:
        add_article_tag(ext_journal_id, article['articleID'], tag_id)
    
    if ext_journal_id not in tag_info:
        tag_info[ext_journal_id] = {}
    
    tag_info[ext_journal_id][article['articleID']] = tag_ids
    
    return tag_ids

In [None]:
#empty dictionary for storing info
tag_info = {}

In [None]:
import_dates = get_import_dates()
if not import_dates:
    print("Error: Unable to fetch import dates.")

In [None]:
import_dates = get_import_dates()
latest_import_date = import_dates[0]['articleImportDate']

In [None]:
journal_info_list = get_external_journals(latest_import_date)
ext_journal_ids = [journal_info['extJournalID'] for journal_info in journal_info_list]

In [None]:
#calling fetch_articles_and_process_tags
fetch_articles_and_process_tags(ext_journal_ids, latest_import_date)