<a target="_blank" href="https://colab.research.google.com/github/UpstageAI/cookbook/blob/main/cookbooks/upstage/Solar-Full-Stack LLM-101/05_3_OracleDB.ipynb">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# EWHA bagging _ alpha

In [1]:
# set parameters

file = open("info/api.txt", "r")
api_key = file.read()
file.close()

file = open("info/datapath.txt", "r")
data_path = file.read()
file.close()

file = open("info/resultspath.txt", "r")
results_path = file.read()
file.close()

In [2]:
from langchain_upstage import UpstageEmbeddings
import time

# 쿼리 전용 임베딩 모델
query_embeddings = UpstageEmbeddings(api_key=api_key, model="solar-embedding-1-large-query")

# 문장 전용 임베딩 모델
passage_embeddings = UpstageEmbeddings(api_key=api_key, model="solar-embedding-1-large-passage")

In [3]:
# funcion to extract an answer from response

import re

def extract_answer(response):
    """
    extracts the answer from the response using a regular expression.
    expected format: "[ANSWER]: (A) convolutional networks"

    if there are any answers formatted like the format, it returns None.
    """
    pattern = r"\[ANSWER\]:\s*\((A|B|C|D|E)\)"  # Regular expression to capture the answer letter and text
    match = re.search(pattern, response)

    if match:
        return match.group(1) # Extract the letter inside parentheses (e.g., A)
    else:
        return extract_again(response)

def extract_again(response):
    pattern = r"\b[A-J]\b(?!.*\b[A-J]\b)"
    match = re.search(pattern, response)
    if match:
        return match.group(0)
    else:
        return None

## 1. build DB

In [4]:
from langchain_upstage import UpstageLayoutAnalysisLoader
import os
import numpy as np


UPSTAGE_API_KEY = api_key

# .npy 파일 로드 (타입==넘파이)
textbookDB = np.load(data_path+f'embedding/full_philosophy_textbook.npy')
textbookDB = textbookDB.tolist()

textbookDB_embed = np.load(data_path+f'embedding/full_philosophy_textbook_embed.npy')
textbookDB_embed = textbookDB_embed.tolist()

## 3. test set 갖고오기

In [5]:
# read samples.csv file
import pandas as pd

def read_data(data_path):
    data = pd.read_csv(data_path)
    prompts = data['prompts']
    answers = data['answers']
    # returns two lists: prompts and answers
    return prompts, answers

In [6]:
prompts, answers = read_data(os.path.join(data_path, 'mmlupro_test_philosophy.csv'))
testdata = pd.read_csv(data_path+'mmlupro_test_philosophy.csv')

In [7]:
nowtest = pd.DataFrame(columns=['index', 'embed_ques', 'question', 'prompts', 'answers', 'top1', 'top2', 'top3', 'top1_1pred','top1_2pred','top1_3pred', 'top2pred', 'top3pred', 'predict' ])

for index, row in testdata.iterrows():
    #if index == 100 : break # 일단 실험할 땐 100개 단위로 끊어서 가져옴
    q = row.prompts
    a = row.answers
    question = q.partition('(A)')[0]
    question = question.partition(') ')[2]
    q = q.partition(') ')[2]
    try : 
        embedded_query = query_embeddings.embed_query(question) # 질문만 받아와서 embedding 하기
        nowtest.loc[len(nowtest)] = {'index':index, 'embed_ques' : embedded_query, 'question' : question, 'prompts' : q, 'answers' : a}

    except :
        print(f'pass: {index}')
        continue 


In [8]:
nowtest

Unnamed: 0,index,embed_ques,question,prompts,answers,top1,top2,top3,top1_1pred,top1_2pred,top1_3pred,top2pred,top3pred,predict
0,0,"[0.0016546249389648438, 0.0062408447265625, -0...",Select the best translation into predicate log...,Select the best translation into predicate log...,A,,,,,,,,,
1,1,"[0.005649566650390625, -0.0133514404296875, -0...",Construct a complete truth table for the foll...,Construct a complete truth table for the foll...,D,,,,,,,,,
2,2,"[0.00476837158203125, -0.01258087158203125, -0...",Which of the following propositions is an imm...,Which of the following propositions is an imm...,I,,,,,,,,,
3,3,"[0.006565093994140625, -0.0103912353515625, -0...",Select the best translation into predicate log...,Select the best translation into predicate log...,A,,,,,,,,,
4,4,"[-0.010833740234375, -0.020233154296875, -0.01...",Select the best translation into predicate log...,Select the best translation into predicate log...,E,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,494,"[0.00557708740234375, -0.003238677978515625, -...",Mill claims that in deciding how to act:\n,Mill claims that in deciding how to act:\n(A) ...,C,,,,,,,,,
495,495,"[-0.0124053955078125, -0.0189208984375, 0.0102...",Of the two versions of the principle that Sing...,Of the two versions of the principle that Sing...,A,,,,,,,,,
496,496,"[-0.0109405517578125, -0.0105133056640625, -0....","According to Kant, laws of nature are laws acc...","According to Kant, laws of nature are laws acc...",C,,,,,,,,,
497,497,"[-0.017913818359375, -0.0139923095703125, -0.0...",Hare refers to people who endorse ideals witho...,Hare refers to people who endorse ideals witho...,A,,,,,,,,,


## 4. Prompt engineering

In [9]:
import numpy as np


for idx, row in nowtest.iterrows() : # 질문 받아오기 

    embed_ques= row.embed_ques

    # 유사도 기준 내림차순 정렬
    sorted_idx = (np.array(embed_ques) @ np.array(textbookDB_embed).T).argsort()[::-1]

    nowtest.loc[idx, 'top1'] = textbookDB[sorted_idx[0]]
    nowtest.loc[idx, 'top2'] = textbookDB[sorted_idx[1]]
    nowtest.loc[idx, 'top3'] = textbookDB[sorted_idx[2]]


without accepting something very like the view that persons (defined in terms of their
intellectual, moral, and religious capacities) have baseline intrinsic worth. “Person” is
not a biologically definable term. It is not at all clear to me that anything short of a
person has BIW, or that the critic can both deny this and adequately justify the claim
that things that are not persons possess intrinsic natural worth. But going into all of this
would require a book on moral philosophy.380 NOTES53 Claims about BIW starting at the level of persons (and stopping there as well since
there can be non-human as well as human persons) will be called “speciesist” and get
one booed in various contemporary circles. Nonetheless, name-calling aside, the
perspective that so limits BIW seems eminently defensible.
54 Alan Donagan’s The Theory of Morality (Chicago: University of Chicago Press, 1977)
and his Choice: The Essential Element in Action (London: Routledge and Kegan
Paul, 1987) are very good here

In [10]:
try : del [[bagging_pred]]
except : pass
bagging_pred = pd.DataFrame(columns=['questionNum', 'answer', 'top1_1pred', 'top1_2pred', 'top1_3pred', 'top2pred', 'top3pred', 'predict'])

In [11]:
############# first PREDICTION ##########

from langchain_core.prompts import PromptTemplate
from langchain_upstage import ChatUpstage

 
llm = ChatUpstage(api_key = api_key)

prompt_template = PromptTemplate.from_template(
    '''
    
    Please provide most correct answer. Let's think step by step.
    
    When translating the answer, DO NOT exlain anything. And you must also include the choice number like :
    Answer : (Number) the answer choice
    ---
    
    Question: {question}

    Context: {context1}

    Answer :
    ---
        
    '''

)
ko_chain1 = prompt_template | llm

for idx, row in nowtest.iterrows() :
    #if idx == 100 : break
    max_retries = 3  # 최대 재시도 횟수
    retries = 0
    while retries < max_retries:
        try:
            response = ko_chain1.invoke({"question": row.prompts, "context1": row.top1}) # 선지 전까지 받아오기
            nowtest.loc[idx, 'top1pred'] = response.content

            generated_answer = extract_answer(response.content)
            bagging_pred.loc[len(bagging_pred)] = {'questionNum': row.question, 'answer': row.answers, 'top1_1pred': generated_answer}
            break

        except Exception as e:  # API 호출 에러
            retries += 1
            print(f"Error occurred: {e}. Retrying idx:{idx} - {retries}/{max_retries} after 10 seconds...")
            time.sleep(10)  # 10초 대기 후 재시도
            if retries == max_retries:
                print(f"Failed after {max_retries} retries. Skipping this context.")
    


for i in range(2,4) : 
    for idx, row in nowtest.iterrows() :
        #if idx == 100 : break
        max_retries = 3  # 최대 재시도 횟수
        retries = 0
        while retries < max_retries:
            try:
                response = ko_chain1.invoke({"question": row.prompts, "context1": row.top1}) # 선지 전까지 받아오기
                nowtest.loc[idx, 'top1pred'] = response.content

                generated_answer = extract_answer(response.content)
                bagging_pred.loc[idx, f'top1_{i}pred'] = generated_answer
                break

            except Exception as e:  # API 호출 에러
                retries += 1
                print(f"Error occurred: {e}. Retrying idx:{idx} - {retries}/{max_retries} after 10 seconds...")
                time.sleep(10)  # 10초 대기 후 재시도
                if retries == max_retries:
                    print(f"Failed after {max_retries} retries. Skipping this context.")
            
        


  bagging_pred.loc[idx, f'top1_{i}pred'] = generated_answer
  bagging_pred.loc[idx, f'top1_{i}pred'] = generated_answer


In [12]:
bagging_pred

Unnamed: 0,questionNum,answer,top1_1pred,top1_2pred,top1_3pred,top2pred,top3pred,predict
0,Select the best translation into predicate log...,A,A,A,A,,,
1,Construct a complete truth table for the foll...,D,I,I,J,,,
2,Which of the following propositions is an imm...,I,B,B,F,,,
3,Select the best translation into predicate log...,A,A,D,D,,,
4,Select the best translation into predicate log...,E,D,D,D,,,
...,...,...,...,...,...,...,...,...
494,Mill claims that in deciding how to act:\n,C,C,H,F,,,
495,Of the two versions of the principle that Sing...,A,B,B,E,,,
496,"According to Kant, laws of nature are laws acc...",C,A,B,C,,,
497,Hare refers to people who endorse ideals witho...,A,D,D,D,,,


In [13]:
############# second PREDICTION ##########

from langchain_core.prompts import PromptTemplate
from langchain_upstage import ChatUpstage
import time
 
llm = ChatUpstage(api_key = api_key)
    
prompt_template = PromptTemplate.from_template(
    '''

    Please provide most correct answer. Let's think step by step.
    
    When translating the answer, DO NOT exlain anything. And you must also include the choice number like :
    Answer : (Number) the answer choice
    ---
    
    Question: {question}

    Context: {context1} {context2}

    Answer :
    ---
    '''

)
ko_chain2 = prompt_template | llm

for idx, row in nowtest.iterrows() :
    #if idx == 100 : break
    max_retries = 3  # 최대 재시도 횟수
    retries = 0
    while retries < max_retries:
        try:
            response = ko_chain2.invoke({"question": row.prompts, "context1": row.top1, "context2":row.top2})
            nowtest.loc[idx, 'top2pred'] = response.content

            generated_answer = extract_answer(response.content)
            bagging_pred.loc[idx, 'top2pred'] = generated_answer
            break

        except Exception as e:  # API 호출 에러
            retries += 1
            print(f"Error occurred: {e}. Retrying idx:{idx} - {retries}/{max_retries} after 10 seconds...")
            time.sleep(10)  # 10초 대기 후 재시도
            if retries == max_retries:
                print(f"Failed after {max_retries} retries. Skipping this context.")

    


  nowtest.loc[idx, 'top2pred'] = response.content
  bagging_pred.loc[idx, 'top2pred'] = generated_answer


Error occurred: Error code: 429 - {'error': {'message': 'You have reached API request limit. Please wait and try again later. If your use-case require a higher rate limit, please request at https://support.upstage.ai', 'type': 'too_many_requests', 'param': '', 'code': 'too_many_requests'}}. Retrying idx:2 - 1/3 after 10 seconds...
Error occurred: Error code: 429 - {'error': {'message': 'You have reached API request limit. Please wait and try again later. If your use-case require a higher rate limit, please request at https://support.upstage.ai', 'type': 'too_many_requests', 'param': '', 'code': 'too_many_requests'}}. Retrying idx:15 - 1/3 after 10 seconds...
Error occurred: Error code: 429 - {'error': {'message': 'You have reached API request limit. Please wait and try again later. If your use-case require a higher rate limit, please request at https://support.upstage.ai', 'type': 'too_many_requests', 'param': '', 'code': 'too_many_requests'}}. Retrying idx:28 - 1/3 after 10 seconds...

In [14]:
############# third PREDICTION ##########

from langchain_core.prompts import PromptTemplate
from langchain_upstage import ChatUpstage

 
llm = ChatUpstage(api_key = api_key)

prompt_template = PromptTemplate.from_template(
    '''
    Please provide most correct answer. Let's think step by step.
    
    When translating the answer, DO NOT exlain anything. And you must also include the choice number like :
    Answer : (Number) the answer choice
    ---
    
    Question: {question}

    Context: {context1} {context2} {context3}

    Answer :
    ---
        
    '''

)
ko_chain3 = prompt_template | llm

for idx, row in nowtest.iterrows() :
    #if idx == 100 : break
    max_retries = 3  # 최대 재시도 횟수
    retries = 0
    while retries < max_retries:
        try:
            response = ko_chain3.invoke({"question": row.prompts, "context1": row.top1, "context2":row.top2, "context3":row.top3}) # 선지 전까지 받아오기
            nowtest.loc[idx, 'top3pred'] = response.content

            generated_answer = extract_answer(response.content)
            bagging_pred.loc[idx, 'top3pred'] = generated_answer
            break

        except Exception as e:  # API 호출 에러
            retries += 1
            print(f"Error occurred: {e}. Retrying {retries}/{max_retries} after 10 seconds...")
            time.sleep(10)  # 10초 대기 후 재시도
            if retries == max_retries:
                print(f"Failed after {max_retries} retries. Skipping this context.")



  nowtest.loc[idx, 'top3pred'] = response.content
  bagging_pred.loc[idx, 'top3pred'] = generated_answer


Error occurred: Error code: 429 - {'error': {'message': 'You have reached API request limit. Please wait and try again later. If your use-case require a higher rate limit, please request at https://support.upstage.ai', 'type': 'too_many_requests', 'param': '', 'code': 'too_many_requests'}}. Retrying 1/3 after 10 seconds...
Error occurred: Error code: 429 - {'error': {'message': 'You have reached API request limit. Please wait and try again later. If your use-case require a higher rate limit, please request at https://support.upstage.ai', 'type': 'too_many_requests', 'param': '', 'code': 'too_many_requests'}}. Retrying 1/3 after 10 seconds...
Error occurred: Error code: 429 - {'error': {'message': 'You have reached API request limit. Please wait and try again later. If your use-case require a higher rate limit, please request at https://support.upstage.ai', 'type': 'too_many_requests', 'param': '', 'code': 'too_many_requests'}}. Retrying 1/3 after 10 seconds...
Error occurred: Error cod

In [15]:
bagging_pred

Unnamed: 0,questionNum,answer,top1_1pred,top1_2pred,top1_3pred,top2pred,top3pred,predict
0,Select the best translation into predicate log...,A,A,A,A,A,A,
1,Construct a complete truth table for the foll...,D,I,I,J,I,J,
2,Which of the following propositions is an imm...,I,B,B,F,J,B,
3,Select the best translation into predicate log...,A,A,D,D,A,A,
4,Select the best translation into predicate log...,E,D,D,D,D,D,
...,...,...,...,...,...,...,...,...
494,Mill claims that in deciding how to act:\n,C,C,H,F,H,H,
495,Of the two versions of the principle that Sing...,A,B,B,E,B,B,
496,"According to Kant, laws of nature are laws acc...",C,A,B,C,D,B,
497,Hare refers to people who endorse ideals witho...,A,D,D,D,D,D,


# bagging

In [16]:
from collections import Counter

final_pred = []
for idx, row in bagging_pred.iterrows() :

    pred = []
    pred.append(row.top1_1pred)
    pred.append(row.top1_2pred)
    pred.append(row.top1_3pred)
    pred.append(row.top2pred)
    pred.append(row.top3pred)

    counts = Counter(pred)

    prediction = counts.most_common(1)[0][0]
    if prediction == None :
        try : prediction = counts.most_common(2)[1][0]
        except : pass
    if prediction == None : prediction = 'A'
    final_pred.append(prediction)
    bagging_pred.loc[idx, 'predict'] = prediction

final_pred

  bagging_pred.loc[idx, 'predict'] = prediction


['A',
 'I',
 'B',
 'A',
 'D',
 'I',
 'B',
 'J',
 'A',
 'F',
 'H',
 'J',
 'F',
 'B',
 'B',
 'I',
 'C',
 'A',
 'D',
 'C',
 'G',
 'G',
 'D',
 'D',
 'A',
 'H',
 'H',
 'G',
 'B',
 'B',
 'A',
 'C',
 'A',
 'A',
 'H',
 'C',
 'D',
 'B',
 'F',
 'F',
 'C',
 'J',
 'B',
 'D',
 'I',
 'G',
 'I',
 'A',
 'F',
 'A',
 'A',
 'J',
 'H',
 'G',
 'C',
 'A',
 'H',
 'A',
 'F',
 'F',
 'H',
 'I',
 'B',
 'C',
 'A',
 'H',
 'J',
 'J',
 'C',
 'F',
 'G',
 'A',
 'C',
 'A',
 'G',
 'I',
 'F',
 'B',
 'A',
 'B',
 'I',
 'B',
 'G',
 'C',
 'H',
 'A',
 'G',
 'C',
 'C',
 'E',
 'B',
 'D',
 'E',
 'A',
 'F',
 'J',
 'A',
 'A',
 'D',
 'H',
 'I',
 'C',
 'C',
 'H',
 'I',
 'H',
 'B',
 'A',
 'A',
 'F',
 'C',
 'G',
 'I',
 'A',
 'I',
 'C',
 'J',
 'B',
 'F',
 'J',
 'H',
 'C',
 'J',
 'I',
 'A',
 'C',
 'B',
 'G',
 'H',
 'F',
 'F',
 'F',
 'J',
 'B',
 'J',
 'J',
 'B',
 'A',
 'A',
 'B',
 'A',
 'I',
 'E',
 'C',
 'D',
 'F',
 'I',
 'C',
 'F',
 'A',
 'I',
 'H',
 'I',
 'B',
 'B',
 'B',
 'C',
 'D',
 'H',
 'H',
 'C',
 'C',
 'A',
 'F',
 'A',
 'D',
 'D'

In [17]:
######### 정답 확인 + wrong 뽑아내기 ######

# print accuracy

cnt = 0
wrong = []
for idx, (answer, response) in enumerate(zip(answers, final_pred)):
    print("-"*10)
    try : generated_answer = extract_answer(response)
    except : pass
    print(response)
    # check
    if generated_answer:
        print(f"idx: {idx} | generated answer: {generated_answer}, answer: {answer}")
    else:
        print("extraction fail")

    if generated_answer == None:
        wrong.append(idx+1)
        bagging_pred.loc[idx, 'iswrong'] = 'X'
        continue
    
    if generated_answer in answer:
        cnt += 1
    else : 
        wrong.append(idx+1)
        bagging_pred.loc[idx, 'iswrong'] = 'X'
        
acc = cnt/len(answers)*100
print(f"acc: {acc}%")
print()
print("wrong:", wrong)
bagging_pred.loc[len(bagging_pred), 'predict'] = acc

----------
A
idx: 0 | generated answer: A, answer: A
----------
I
idx: 1 | generated answer: I, answer: D
----------
B
idx: 2 | generated answer: B, answer: I
----------
A
idx: 3 | generated answer: A, answer: A
----------
D
idx: 4 | generated answer: D, answer: E
----------
I
idx: 5 | generated answer: I, answer: I
----------
B
idx: 6 | generated answer: B, answer: D
----------
J
idx: 7 | generated answer: J, answer: J
----------
A
idx: 8 | generated answer: A, answer: D
----------
F
idx: 9 | generated answer: F, answer: H
----------
H
idx: 10 | generated answer: H, answer: D
----------
J
idx: 11 | generated answer: J, answer: G
----------
F
idx: 12 | generated answer: F, answer: B
----------
B
idx: 13 | generated answer: B, answer: D
----------
B
idx: 14 | generated answer: B, answer: E
----------
I
idx: 15 | generated answer: I, answer: D
----------
C
idx: 16 | generated answer: C, answer: I
----------
A
idx: 17 | generated answer: A, answer: A
----------
D
idx: 18 | generated answe

In [24]:
######### 탑 원으로만 확인  ######

# print accuracy
top1_pred = []
for idx, row in bagging_pred.iterrows() :
    top1 = row.top1_1pred
    if top1 == None : top1 = 'A'
    top1_pred.append(top1)

print(top1_pred)


cnt = 0
wrong = []
for idx, (answer, response) in enumerate(zip(answers, top1_pred)):
    print("-"*10)
    try : generated_answer = extract_answer(response)
    except : pass
    print(response)
    # check
    if generated_answer:
        print(f"idx: {idx} | generated answer: {generated_answer}, answer: {answer}")
    else:
        print("extraction fail")

    if generated_answer == None:
        wrong.append(idx+1)
        bagging_pred.loc[idx, 'iswrong'] = 'X'
        continue
    
    if generated_answer in answer:
        cnt += 1
    else : 
        wrong.append(idx+1)
        bagging_pred.loc[idx, 'iswrong'] = 'X'
        
acc = cnt/len(answer)*100
print(f"acc: {acc}%")
print()
print("wrong:", wrong)
bagging_pred.loc[len(bagging_pred), 'predict'] = acc

['A', 'I', 'B', 'A', 'D', 'I', 'B', 'J', 'A', 'I', 'H', 'J', 'F', 'B', 'B', 'I', 'C', 'I', 'D', 'C', 'G', 'G', 'D', 'D', 'A', 'D', 'H', 'G', 'B', 'B', 'A', 'C', 'A', 'A', 'H', 'C', 'D', 'B', 'F', 'F', 'C', 'A', 'B', 'D', 'I', 'G', 'I', 'A', 'F', 'A', 'A', 'G', 'H', 'G', 'C', 'A', 'H', 'A', 'F', 'F', 'H', 'C', 'E', 'C', 'A', 'H', 'J', 'F', 'C', 'F', 'G', 'A', 'A', 'D', 'G', 'I', 'J', 'B', 'A', 'B', 'I', 'G', 'G', 'C', 'H', 'A', 'G', 'C', 'B', 'E', 'B', 'D', 'E', 'A', 'F', 'J', 'A', 'A', 'J', 'H', 'I', 'C', 'C', 'H', 'I', 'H', 'B', 'A', 'A', 'F', 'C', 'G', 'I', 'A', 'I', 'C', 'J', 'B', 'F', 'B', 'H', 'C', 'J', 'I', 'A', 'C', 'E', 'G', 'H', 'F', 'F', 'F', 'J', 'C', 'J', 'J', 'B', 'A', 'A', 'B', 'A', 'I', 'D', 'C', 'D', 'G', 'I', 'C', 'F', 'A', 'I', 'H', 'I', 'B', 'B', 'B', 'C', 'B', 'H', 'H', 'C', 'C', 'A', 'F', 'A', 'D', 'F', 'F', 'D', 'C', 'C', 'F', 'C', 'F', 'I', 'G', 'J', 'H', 'B', 'F', 'E', 'B', 'F', 'B', 'B', 'F', 'A', 'A', 'F', 'H', 'C', 'H', 'I', 'C', 'G', 'G', 'E', 'J', 'D', 'G',

In [25]:
bagging_pred

Unnamed: 0,questionNum,answer,top1_1pred,top1_2pred,top1_3pred,top2pred,top3pred,predict,iswrong
0,Select the best translation into predicate log...,A,A,A,A,A,A,A,
1,Construct a complete truth table for the foll...,D,I,I,J,I,J,I,X
2,Which of the following propositions is an imm...,I,B,B,F,J,B,B,X
3,Select the best translation into predicate log...,A,A,D,D,A,A,A,
4,Select the best translation into predicate log...,E,D,D,D,D,D,D,X
...,...,...,...,...,...,...,...,...,...
497,Hare refers to people who endorse ideals witho...,A,D,D,D,D,D,D,X
498,Moore defines the naturalistic fallacy as the ...,D,D,D,D,D,D,D,
499,,,,,,,,32.865731,
500,,,,,,,,157.0,


In [20]:
#bagging_pred.to_csv(results_path+'ewha_bagging01.csv')

In [21]:
try : del[[wdf]]
except : pass
wdf = bagging_pred[bagging_pred['iswrong']=='X']
wdf.loc[len(bagging_pred), 'predict'] = acc

wdf

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wdf.loc[len(bagging_pred), 'predict'] = acc


Unnamed: 0,questionNum,answer,top1_1pred,top1_2pred,top1_3pred,top2pred,top3pred,predict,iswrong
1,Construct a complete truth table for the foll...,D,I,I,J,I,J,I,X
2,Which of the following propositions is an imm...,I,B,B,F,J,B,B,X
4,Select the best translation into predicate log...,E,D,D,D,D,D,D,X
6,Use indirect truth tables to determine whethe...,D,B,B,C,B,B,B,X
8,"According to Reiman, van den Haag's argument ...",D,A,A,A,A,A,A,X
...,...,...,...,...,...,...,...,...,...
494,Mill claims that in deciding how to act:\n,C,C,H,F,H,H,H,X
495,Of the two versions of the principle that Sing...,A,B,B,E,B,B,B,X
496,"According to Kant, laws of nature are laws acc...",C,A,B,C,D,B,B,X
497,Hare refers to people who endorse ideals witho...,A,D,D,D,D,D,D,X
