In [73]:
import os
# Setting this as environmental variables means I don't have to enter the api keys in each call to the APIs

import requests
from bs4 import BeautifulSoup
from langchain.chains.api.base import APIChain
from langchain_openai import OpenAI
from langchain.callbacks import get_openai_callback
import inspect
from langchain_core.prompts.base import BasePromptTemplate
from langchain import PromptTemplate
import ipywidgets as widgets
from IPython.display import display

In [87]:
def get_text_from_web(url):
    
    # Fetch the content of the URL
    response = requests.get(url)
    
    # Use BeautifulSoup to parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Extract all text from the webpage
    text = soup.get_text(separator=' ', strip=True)
    
    # Print the extracted text
    return text

def get_text_from_file(file):
    with open(file, 'r') as file:
        data = file.read()#.replace('\n', '')
        return data

def count_tokens(chain, query):
    with get_openai_callback() as cb:
        result = chain.run(query)
        pprint.pprint(f'Spent a total of {cb.total_tokens} tokens')

    return result

## Templates

In [134]:
my_response_template = '''
You are given the below API Documentation:
{api_docs}

Using this documentation, generate the full API url to call for answering the user question.
Pay attention to deliberately exclude any unnecessary pieces of data in the API call.
Question:{question}
API url: {api_url}

Here is the response from the API:
{api_response}

Provide the data that was requested by the user.
Provide the full citation to the user: How to cite: Thornton; M.M.; R. Shrestha; Y. Wei; P.E. Thornton; S-C. Kao; and B.E. Wilson. 2022. Daymet: Daily Surface Weather Data on a 1-km Grid for North America; Version 4 R1. ORNL DAAC; Oak Ridge; Tennessee; USA. https://doi.org/10.3334/ORNLDAAC/2129

Make sure the table has all the data points that the user wants. If the user asks for a month of data, such as July, then you should return all data points for all days of that month.

data:
'''


modis_response_template = '''
You are given the below API Documentation:
{api_docs}

Using this documentation, generate the full API url to call for answering the user question.
Pay attention to deliberately exclude any unnecessary pieces of data in the API call.
Question:{question}
API url: {api_url}

Here is the response from the API:
{api_response}

Use the response from the API to provide the data that was requested. Always report the units of the data and adjust for an offset if it exists.
Data:
'''

## API testing ground

In [133]:
# import requests
# header = {'Accept': 'application/json'}

# # Use following for a csv response:
# # header = {'Accept': 'text/csv'}
# my_lat = 34.334459834357986
# my_lon = -118.05277394802319
# my_url = 'https://modis.ornl.gov/rst/api/v1/ECO4ESIPTJPL/subset?latitude=35.958767&longitude=-84.287433&startDate=A2018049&endDate=A2018049&kmAboveBelow=0.1&kmLeftRight=0.1'
# npp_url = f'https://modis.ornl.gov/rst/api/v1/MOD17A3HGF/subset?latitude={my_lat}&longitude={my_lon}&product=MOD17A3HGF&startDate=A2002001&endDate=A2005365&kmAboveBelow=0&kmLeftRight=0'
# response = requests.get(
#     npp_url, headers=header)
# print(response.json())

## Prompts

In [135]:
my_response_prompt = PromptTemplate(input_variables=['api_docs', 'question'], template=my_response_template)
modis_response_prompt = PromptTemplate(input_variables=['api_docs', 'question'], template=modis_response_template)

## MODIS API chain

Pick up with adding the other modis product names to the api docs text file.

In [136]:
modis_docs = get_text_from_file('modis_docs.txt')
open_ai_llm = OpenAI()
modis_chain = APIChain.from_llm_and_api_docs(open_ai_llm,
                                           modis_docs,
                                           verbose=True,
                                           api_response_prompt= modis_response_prompt,
                                           limit_to_domains=['https://modis.ornl.gov/rst/api/v1/'])

inputs = {"question": "Can you get NPP data for Deukmejian Wilderness Park for 2010?"}
output = modis_chain._call(inputs)
output = output['output']
pprint.pprint(output)

## Daymet API chain

In [138]:
daymet_docs = get_text_from_file('daymet_docs.txt')
open_ai_llm = OpenAI()
api_chain = APIChain.from_llm_and_api_docs(open_ai_llm,
                                           daymet_docs,
                                           verbose=True,
                                           api_response_prompt= my_response_prompt,
                                           limit_to_domains=['https://daymet.ornl.gov/single-pixel/api/data'])

#vars(api_chain)

# Define your question
inputs = {"question": "Can you get solar radiation data from Daymet for Los Angeles for the whole month of July 2010?"}

#Ways to call chain

# 1
#output = api_chain.run(inputs)

# 2
output = api_chain._call(inputs)
output = output['output']

# 3
#count_tokens(api_chain, inputs)

pprint.pprint(output)

('year,yday,srad (W/m^2)\n'
 '2010,182,506.82\n'
 '2010,183,509.35\n'
 '2010,184,471.11\n'
 '2010,185,456.51\n'
 '2010,186,403.86\n'
 '2010,187,404.60\n'
 '2010,188,435.36\n'
 '2010,189,387.53\n'
 '2010,190,487.16\n'
 '2010,191,495.68\n'
 '2010,192,484.37\n'
 '2010,193,474.03\n'
 '2010,194,564.88\n'
 '2010,195,574.72\n'
 '2010,196,532.36\n'
 '2010,197,550.60\n'
 '2010,198,538.44\n'
 '2010,199,517.40\n'
 '2010,200,499.06\n'
 '2010,201,451.52\n'
 '2010,202,428.50\n'
 '2010,203,436.48\n'
 '2010,204,485.66\n'
 '2010,205,494.12\n'
 '2010,206,476.87\n'
 '2010,207,453.41\n'
 '2010,208,454.38\n'
 '2010')
