In [1]:
# imports
import pandas as pd
import numpy as np
import tiktoken
import openai
from openai.embeddings_utils import get_embedding

embedding_model = "text-embedding-ada-002"
embedding_encoding = "cl100k_base"  # this the encoding for text-embedding-ada-002
max_tokens = 8000  # the maximum for text-embedding-ada-002 is 8191

In [2]:
# load & inspect dataset
input_datapath = "data/article.csv"  # to save space, we provide a pre-filtered dataset
df = pd.read_csv(input_datapath, index_col=0, encoding='windows-1254')
#df = df[["Id",Topic", "Heading", "Text"]]
#df = df.dropna()
#df["combined"] = (
#    "Title: " + df.Heading.str.strip() + "; Content: " + df.Text.str.strip())

In [3]:
df["TH"]=( "Title: " + df.Topic.str.strip() + "; Content: " + df.Heading.str.strip())

In [4]:
df["THX"]=( "Title: " + df.Topic.str.strip() + "; Content: " + df.Heading.str.strip()+df.Text.str.strip())

In [5]:
encoding = tiktoken.get_encoding(embedding_encoding)

#df["n_tokens"] = df.combined.apply(lambda x: len(encoding.encode(x)))
#df = df[df.n_tokens <= max_tokens]

In [6]:
# Ensure you have your API key set in your environment per the README: https://github.com/openai/openai-python#usage

# This may take a few minutes
df["embedding"] = df.TH.apply(lambda x: get_embedding(x, engine=embedding_model))
df.to_csv("data/article_with_embeddings.csv",index=False)

In [None]:
#No run
datafile_path = "data/article_with_embeddings.csv"

df = pd.read_csv(datafile_path)
df["embedding"] = df.embedding.apply(eval).apply(np.array)
df.to_csv("data/article_with_embeddings.csv",index=False)

In [19]:
from openai.embeddings_utils import get_embedding, cosine_similarity

# search through the articles for a specific topic
def search_article(df, problem, n=8):
    problem_embedding = get_embedding(
        problem,
        engine="text-embedding-ada-002"
    )
    df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, problem_embedding))

    results = (
        df.sort_values("similarity", ascending=False)
        .head(n)
    )
    
    return results


results = search_article(df, "What is the best path forward for the continued development and implementation of Abandoned Farmland Restoration to effectively mitigate the effects of climate change? What steps need to be taken to ensure that this technology is adopted on a large scale and is effective? Be as specific as possible using scientific facts and numerical statistics to support your claims. List any companies and organizations that have been at the forefront of this solution. News articles that address these lessons will be very helpful. Cite your sources if possible.", n=8)


In [20]:
print(results)

                                         Topic  \
Id                                               
16  Best-Case Outcome for Farmland Restoration   
18  Best-Case Outcome for Farmland Restoration   
20  Best-Case Outcome for Farmland Restoration   
5              Why Restore Abandoned Farmland?   
17  Best-Case Outcome for Farmland Restoration   
19  Best-Case Outcome for Farmland Restoration   
4              Why Restore Abandoned Farmland?   
2              Why Restore Abandoned Farmland?   

                                              Heading  \
Id                                                      
16  In the best case scenario, these abandoned far...   
18  Around?31% of farmland is considered abandoned...   
20  Once those landowners are on board, people hav...   
5   Certain organisations have already pledged to ...   
17  Some of the soil in abandoned farmland might n...   
19      How likely is this outcome to happen, though?   
4   Farmland restoration is also a great wa

In [21]:
def query_message(
    query: str,
    df: pd.DataFrame) -> str:
    """Return a message for GPT, with relevant source texts pulled from a dataframe."""
    query="What is the best path forward for the continued development and implementation of Abandoned Farmland Restoration to effectively mitigate the effects of climate change? What steps need to be taken to ensure that this technology is adopted on a large scale and is effective? Be as specific as possible using scientific facts and numerical statistics to support your claims. List any companies and organizations that have been at the forefront of this solution. News articles that address these lessons will be very helpful. Cite your sources if possible."

    message = 'Use the below articles to answer the subsequent question.'
    question = f"\n\nQuestion: {query}"
    
    for index, row in df.iterrows():
        #print(row['THX'])
        message += row['THX']
    
    return message + question

In [22]:
def ask(
    query: str,
    df: pd.DataFrame = results) -> str:
    """Answers a query using GPT and a dataframe of relevant texts and embeddings."""
    message = query_message(query,df)
    messages = [
        {"role": "system", "content": "You answer questions."},
        {"role": "user", "content": message},
    ]
    print(messages)
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=messages,
        temperature=0
    )
    response_message = response["choices"][0]["message"]["content"]
    return response_message

In [23]:
ask("What is the best path forward for the continued development and implementation of Abandoned Farmland Restoration to effectively mitigate the effects of climate change? What steps need to be taken to ensure that this technology is adopted on a large scale and is effective? Be as specific as possible using scientific facts and numerical statistics to support your claims. List any companies and organizations that have been at the forefront of this solution.")

[{'role': 'system', 'content': 'You answer questions.'}, {'role': 'user', 'content': 'Use the below articles to answer the subsequent question.Title: Best-Case Outcome for Farmland Restoration; Content: In the best case scenario, these abandoned farmland restoration projects will turn into flourishing opportunities for food for everyone.?They¡¯ll provide all sorts of crops and meat to families that are hurting ¨C but there is a chance this outcome may not happen.Title: Best-Case Outcome for Farmland Restoration; Content: Around?31% of farmland is considered abandoned?worldwide.?The world could produce that same percentage more food if those farms could be rejuvenated and filled. That would be enough to feed more hungry families and ensure food insecurity is on its way out. Solving world hunger is one way to ensure everyone has a happy, long life and doesn¡¯t have to worry about where their next meal comes from.Title: Best-Case Outcome for Farmland Restoration; Content: Once those lando

'The best path forward for the continued development and implementation of Abandoned Farmland Restoration to effectively mitigate the effects of climate change is to encourage landowners to lease or sell their unused land for farming, provide financial incentives for farmers to use abandoned farmland, and use new technology to operate sustainably. According to the article "Best-Case Outcome for Farmland Restoration," around 31% of farmland is considered abandoned worldwide, and if those farms could be rejuvenated and filled, the world could produce that same percentage more food, which would be enough to feed more hungry families and ensure food insecurity is on its way out. \n\nTo ensure that this technology is adopted on a large scale and is effective, steps need to be taken to test the farmland to ensure it\'s suitable for crops and grazing, and if the soil is not good for growing, it can be used as a place to raise livestock for meat stores. Additionally, companies and organization