In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# This notebook's description  
In this notebook, the OpenAI's gpt3.5-turbo model was utilized to create prompts in a very simple manner (with minimal data manipulation), organize predicted values for each record, and calculate accuracy. For those who have not created prompts before, the section **"create prompt and request openai gpt3.5 turbo to solve problems"** provides guidance on prompt creation which could be helpful.  

**There are two points to note:  
1.To execute this notebook in its entirety, you will need to obtain an API key from OpenAI.  
2.This notebook is an attempt to use the results of gpt3.5 turbo as a personal benchmark. Even if you execute all the steps here, please be aware that it cannot be submitted.**

The gpt model generates responses in various forms, making their control challenging. Therefore, to stabilize the variation in response generation, it is effective to include phrases in the prompts that instruct the desired response format. Additionally, I believe it's important to adjust the value of the "temperature" parameter, which is one of the factors influencing response generation.  

Lastly, thank you very much for reading up to this point! If you found this helpful in any way, I would greatly appreciate it if you could upvote.  

**The following text has been added as an update on August 29th.**  
Additional methods to increase the percentage of correct answers were considered.  
Please take a look at since the section entitled **"In addition to GPT-3.5, I will attempt improvements by utilizing the Wikipedia API"**, as it yielded a somewhat higher percentage of correct answers than simply getting the answers from GPT 3.5!  

**The following text has been added as an update on September 5.**  
Additional methods to increase the percentage of correct answers were considered.
Please take a look at since the section entitled "**Extract wikipedia information related to a question using the langchain library**".Using the langchain library, I investigated ways to extract information that is highly relevant to the question, and including this in the prompt and generating responses with GPT 3.5 further increased the percentage of correct responses!

# check data

In [None]:
train = pd.read_csv("/kaggle/input/kaggle-llm-science-exam/train.csv")
print(train.shape)
train.head()

In [None]:
i=0
print("prompt question is: ",train.loc[i,"prompt"])
print("----------------------------------")
for option in ["A","B","C","D","E"]:
    print("Option ",option,": ",train.loc[i,option])
    print("----------------------------------")

Wow, this seems like a challenging problem indeed. It doesn't seem like something I could answer off the top of my head either.

# install library openai  
to request gpt3.5 turbo we need install openai library

In [None]:
!pip install openai

# set OpenAI API-key  
In this notebook, I will be using OpenAI's API key. Once defined as a variable below, you will specify the API key from the variable 'apikey.' To obtain an OpenAI API key, you can create an account on OpenAI's official website: https://openai.com/ and generate it from your profile page.

In [None]:
apikey="your-api-key"

# create prompt and request openai gpt3.5 turbo to solve problems

In [None]:
#Creates a generic GPT calling function
def request_gpt(model_name,messages_list):
    response=openai.ChatCompletion.create(
        model=model_name,
        messages=messages_list,
        temperature =0
        )
    return response

In [None]:
import os
import openai
import time

# you cat get openai apikey when you create account
openai.api_key = apikey
answer_list=[]

for i in range(train.shape[0]):
    prompt_str=train.loc[i,"prompt"]+' Please select the most accurate option from the choices A to E above and answer just like "The answer is A" \\n----------------------------------\\n'
    for option in ["A","B","C","D","E"]:
        prompt_str=prompt_str+'Option ' + option + ' : ' + train.loc[i,option] + '\\n'
    messages=[{"role": "user", "content": prompt_str}]
    #Sometimes errors occur, so try&catch so that you can retry once
    try:
        response=request_gpt("gpt-3.5-turbo",messages)
    except:
        response=request_gpt("gpt-3.5-turbo",messages)
    if i%25==0:
        #print sample responce
        print("id",str(i)," responce is : ",response["choices"][0]["message"]["content"])
        #Progress check
    answer_list.append(response["choices"][0]["message"]["content"][14])
    #There is a 1-minute request limit with OpenAI, so please wait for 3 seconds between each request.
    #Reference information:https://platform.openai.com/docs/guides/rate-limits/overview
    time.sleep(1)
print("done")

train["prediction"]=answer_list
#Let's take a look at the answers for the first 10 questions
answer_list[:10]

# check accuracy score

In [None]:
from sklearn.metrics import accuracy_score
print("accuracy score is : ",accuracy_score(train["answer"],train["prediction"]))

# Reflection-1  
It's not too bad to have a about 70 percent accuracy rate considering there are five multiple-choice questions. However, given that you're using gpt3.5-turbo, the performance could indeed be better.  


**The following text has been added as an update on August 29th.**  
So, it may sound simplistic, but I considered adding additional features.  
As for where to obtain information, I turned to Wikipedia.  
Fortunately, retrieving Wikipedia information isn't overly challenging, with APIs and libraries readily available.  
However, to fetch Wikipedia information, specific search keywords are required for each question.  
As these are unavailable, I've decided to have GPT-3.5 extract search keywords for me.

# install library wikipedia
to access and parse data from Wikipedia, we need install wikipedia library

In [None]:
!pip install wikipedia

# I will attempt improvements by utilizing the Wikipedia API.

In [None]:
import wikipedia
wiki_info=[]
for i in range(train.shape[0]):
    #Generate keywords for searching on Wikipedia using the question information and GPT.
    prompt_str=train.loc[i,"prompt"]
    messages=[]
    messages.append({"role": "user", "content": prompt_str})
    messages.append({"role": "user", "content": 'Create one most important keyword for searching the above question on Wikipedia. Keyword only, no explanations or additional text.'})
    
    try:
        response=response=request_gpt("gpt-3.5-turbo",messages)
    except:
        response=response=request_gpt("gpt-3.5-turbo",messages)

    keyword = response["choices"][0]["message"]["content"]
    #Sometimes, there might be quotation marks at the beginning and end of the generated GPT text, so I will exclude these using slicing.
    if (keyword[0]=='""')&(keyword[-1]=='""'):
        keyword=keyword[1:-1]
    
    #utilizing the Wikipedia API
    wikipedia.set_lang("en")
    search_response = wikipedia.search(keyword[:300])
    info_str=""
    page_data=""
    #The wikipedia.search method retrieves a list of information available on Wikipedia through execution.
    #However, there are occasional instances where the result is zero. In such cases, the addition of information is abandoned.
    if len(search_response)==0:
        info_str = ""
    else:
        #Sometimes, when the target page doesn't exist, I give up on adding the information.
        try:
            page_data = wikipedia.page(search_response[0])
            info_str = page_data.content
        except:
            try:
                if len(search_response)>1:
                    page_data = wikipedia.page(search_response[1])
                    info_str = page_data.content
                else:
                    info_str = ""
            except:
                info_str = ""
    wiki_info.append(info_str)
    
    #Display a sample of each result.
    if i%50==0:
        print("id is : ",str(i))
        print("prompt message is : " , messages)
        print('gpt3.5turbo answer(created keyword) : \n',response["choices"][0]["message"]["content"])
        print('wikipedia search result : \n',search_response)
        print('wikipedia page content(Due to the large volume, only the first 200 characters will be displayed) : \n',info_str[:100])
        print()

    time.sleep(1)

#Add the information obtained from Wikipedia as a feature column.
train["wiki_info"]=wiki_info
train.head()

The amount of information that can be retrieved from wikipedia seems to vary from line to line.

In [None]:
import seaborn as sns
print(train["wiki_info"].str.len().describe())
sns.histplot(data=train["wiki_info"].str.len())

# create prompt with wikipedia information to solve problems  
try again! request turbo to solve problems  
Due to the large number of characters, model with 16k(= "gpt-3.5-turbo-16k") are used.

In [None]:
answer_list=[]
for i in range(train.shape[0]):
    prompt_str=train.loc[i,"prompt"]+" Please choose the most accurate option from the choices A to E above and answer in the format 'The answer is A'."
    prompt_str=prompt_str+'\n----------------------------------\n'
    for option in ["A","B","C","D","E"]:
        prompt_str=prompt_str+'Option ' + option + ' : ' + train.loc[i,option] + '\n'
    info_str=train.loc[i,"wiki_info"][:3000]
    messages=[]
    messages.append({"role": "assistant", "content": info_str})
    messages.append({"role": "user", "content": prompt_str})

    try:
        response=response=response=request_gpt("gpt-3.5-turbo-16k",messages)
    except:
        response=response=response=request_gpt("gpt-3.5-turbo-16k",messages)
    
    if i%50==0:
        #print sample responce
        print("id",str(i)," responce is : ",response["choices"][0]["message"]["content"])
    
    answer_list.append(response["choices"][0]["message"]["content"][14])
    time.sleep(1)
    
print("done")
train["prediction_2"]=answer_list

#Let's take a look at the answers for the first 10 questions
train.head()

In [None]:
# print sample prompt string
print(prompt_str)

# check accuracy score again

In [None]:
print("accuracy score is : ",accuracy_score(train["answer"],train["prediction_2"]))

Good! It is more accurate than before (score without wikipedia information = 0.72 correct response rate)！

# Reflection-2  
In a way, as expected, the accuracy was higher when the percentage of correct answers was given only for the rows to which wikipedia information could be added.  
From this perhaps? What we can tell is that requesting the LLM, such as GPT3.5, with the relevant information attached to it is more accurate than requesting only the question directly to the LLM.  
Therefore, it can be inferred that the correct response rate is greatly affected by how much correct (information necessary to derive the correct answer and related information) information is attached when making a request to an LLM model such as GPT.  

**The following text has been added as an update on September 5.**  
The wikipedia information given in the above prompt (version with a correct response rate of 0.79) was given by extracting only the first 3,000 characters, as shown in the histogram diagram, because some of the lines have too many characters.  
How can we add more information than this to the "prompt" column?  
After some searching, it seems that the langchian library and OpenAI's embedding function can be used to extract highly relevant text. I decided to do some additional work on this.

# install library langchain and others

In [None]:
!pip install langchain
!pip install chromadb
!pip install tiktoken

# Extract wikipedia information related to a question using the langchain library

In [None]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.schema.document import Document

text_splitter = CharacterTextSplitter(chunk_size=1000)
embeddings = OpenAIEmbeddings(openai_api_key=apikey)
def choose_important_info(wiki_info_str,prompt_str):
    prompt_info_str=""
    docs = [Document(page_content=x) for x in text_splitter.split_text(wiki_info_str)]
    if len(docs)>0:
        vectorstore = Chroma.from_documents(docs, embeddings)
        candidate = vectorstore.similarity_search(prompt_str)
        len_cnt=0
        each_info=[]
        for j in range(len(candidate)):
            len_cnt=len_cnt+len(candidate[j].page_content)
            each_info.append(candidate[j].page_content)
            if(len_cnt>2000):
                break
        prompt_info_str = '\n'.join(list(set(each_info)))
    else:
        prompt_info_str=""
    return prompt_info_str

# create prompt with Wikipedia information highly relevant to the question

In [None]:
answer_list=[]
for i in range(train.shape[0]):
    prompt_str=train.loc[i,"prompt"]+" Please choose the most accurate option from the choices A to E above and answer in the format 'The answer is A'."
    prompt_str=prompt_str+'\n----------------------------------\n'
    for option in ["A","B","C","D","E"]:
        prompt_str=prompt_str+'Option ' + option + ' : ' + train.loc[i,option] + '\n'
    info_str=choose_important_info(train.loc[i,"wiki_info"],train.loc[i,"prompt"])
    messages=[]
    messages.append({"role": "assistant", "content": info_str})
    messages.append({"role": "user", "content": prompt_str[:2000]})

    try:
        response=response=response=request_gpt("gpt-3.5-turbo-16k",messages)
    except:
        response=response=response=request_gpt("gpt-3.5-turbo-16k",messages)
    
    if i%50==0:
        #print sample prompt and responce
        print("prompt message is :",messages)
        print("id",str(i)," responce is : ",response["choices"][0]["message"]["content"])
    
    answer_list.append(response["choices"][0]["message"]["content"][14])
    time.sleep(1)
    
print("done")
train["prediction_3"]=answer_list

#Let's take a look at the answers for the first 10 questions
train.head()

# check accuracy score again

In [None]:
print("accuracy score is : ",accuracy_score(train["answer"],train["prediction_3"]))

It's right on target! I was able to further increase the percentage of correct answers!

# Reflection-3  
At first, the GPT generates answers very simply, using only the questions, choices, and a few additional request sentences prepared in the training data set.  
Next, additional information is added and answers are generated by GPT.  
Next, additional information is added to the training data set and the GPT is used to generate answers.  
I divided the process into three phases and checked how the percentage of correct answers changed.  
As I had hoped, I was able to increase the correct answer rate from 0.72 to 0.785 to 0.84.  
In order to increase the percentage of correct answers, perhaps? Although I was able to examine the necessary information carefully, I have not yet been able to edit it for submission, so I will consider this issue from now on.  
Also, in order to execute this notebook, it is necessary to obtain the API-KEY of OpenAI, but I would like to consider whether the same process can be done with another LLM (preferably one that does not require API-KEY, etc.).