In [1]:
import pandas as pd

### We manually test all pipelines for each similarity threshold (from 0.1-0.9) and save them respectively

In [None]:
RAGv1 = pd.read_csv('sythetic_testing/QB1_threshold_0.1.csv')
RAGv2 = pd.read_csv('sythetic_testing/QB2_threshold_0.1.csv')
RAGv3 = pd.read_csv('sythetic_testing/QB3_threshold_0.1.csv')
RAGv4 = pd.read_csv('sythetic_testing/QB4_threshold_0.1.csv')
RAGv5 = pd.read_csv('sythetic_testing/HB1_0.1.csv')
RAGv6 = pd.read_csv('sythetic_testing/HB2_0.1.csv')
RAGv7 = pd.read_csv('sythetic_testing/HYB_0.1.csv')
# baseline = pd.read_csv('sythetic_testing/baseline.csv')

In [7]:
from bs4 import BeautifulSoup

def clean_accepted_answer_column(df):
    """
    Cleans the 'Accepted Answer' column by removing HTML tags and keeping only the text.
    """
    df['Accepted Answer Cleaned'] = df['Accepted Answer'].apply(
        lambda x: BeautifulSoup(str(x), 'html.parser').get_text().strip()
    )
    return df

In [8]:
RAGv3 = clean_accepted_answer_column(RAGv3)

In [12]:
import pandas as pd

def fix_if_unseen_by_context(df, col='Step3PROMPT'):
    
    def check_context(row):
        # If IFUnseen is already 0, do not apply changes
        if row['IFUnseen'] == 0:
            return 0
        
        text = str(row[col])
        
        # Define markers
        marker_start = '### CONTEXT:'
        marker_end = 'Please provide your best answer below:'
        
        # Find the start index
        idx_start = text.find(marker_start)
        
        # If '### CONTEXT:' not found at all, we assume there's some content
        if idx_start == -1:
            return 1
        
        # Extract everything after '### CONTEXT:'
        context_part = text[idx_start + len(marker_start):]
        
        # Find the end marker
        idx_end = context_part.find(marker_end)
        
        # If we do find 'Please provide your best answer below:', cut off at that point
        if idx_end != -1:
            context_part = context_part[:idx_end]
        
        # Trim whitespace
        context_part = context_part.strip()

        # Condition 2: Contains the phrase "no relevant"
        if not context_part or "no relevant" in context_part.lower():
            return 0
        
        # Otherwise, we say it has content
        return 1
    
    # Apply our check_context logic row by row
    df['IFUnseen'] = df.apply(check_context, axis=1)
    
    return df




In [13]:
RAGv3 = fix_if_unseen_by_context(RAGv3, col='Step3PROMPT')

In [14]:
RAGv3['IFUnseen'].value_counts()

IFUnseen
0    385
Name: count, dtype: int64

In [15]:
def extract_answer(text):
    if isinstance(text, str) and "Please provide your best answer below:" in text:
        return text.split("Please provide your best answer below:")[-1].strip()
    return text

# Process the Step2Response column
RAGv1['Step2Response'] = RAGv1['Step2Response'].apply(extract_answer)

In [16]:
RAGv2['Step2Response_v2'] = RAGv2['Step2Response_v2'].apply(extract_answer)

In [17]:
import re

def is_context_empty(cell_text):
    """
    Returns True if the text between '### CONTEXT:' and 
    'Please provide your best answer below:' is:
      1) Empty (after trimming whitespace), OR
      2) Exactly '[]' (with optional whitespace inside the brackets), OR
      3) Contains 'no relevant' (case-insensitive).

    Returns False otherwise, or if '### CONTEXT:' is not found.
    """
    # Convert to string to avoid errors with non-string data
    text = str(cell_text)

    marker_start = "### CONTEXT:"
    marker_end = "Please provide your best answer below:"

    # Case-insensitive search
    text_lower = text.lower()
    start_lower = marker_start.lower()
    end_lower = marker_end.lower()

    # Find the start index of '### CONTEXT:' (case-insensitive)
    idx_start = text_lower.find(start_lower)
    if idx_start == -1:
        # '### CONTEXT:' not found at all
        return False

    # Move idx_start to the end of '### CONTEXT:'
    idx_start += len(marker_start)

    # Find the end index of 'Please provide your best answer below:' (case-insensitive)
    idx_end = text_lower.find(end_lower, idx_start)

    # If the end marker isn't found, take everything after '### CONTEXT:'
    if idx_end == -1:
        context_substring = text[idx_start:].strip()
    else:
        context_substring = text[idx_start:idx_end].strip()

    # Convert the extracted substring to lowercase for checks
    context_substring_lower = context_substring.lower()

    # 1) Check if empty
    if not context_substring:
        return True

    # 2) Check if exactly '[]' (with optional whitespace inside)
    # We'll do a regex match on the trimmed substring
    # e.g. '[]', '[   ]'
    if re.fullmatch(r"\[\s*\]", context_substring.strip()):
        return True

    # 3) Check if it contains 'no relevant'
    if "no relevant" in context_substring_lower:
        return True

    # If none of the above conditions are met, return False
    return False


In [19]:
RAGv4['Context_Empty'] = RAGv4['Prompt'].apply(is_context_empty)

In [20]:
RAGv4['Context_Empty'].value_counts()

Context_Empty
True     200
False    185
Name: count, dtype: int64

In [23]:
RAGv5['Context_Empty'] = RAGv5['Prompt'].apply(is_context_empty)

In [24]:
RAGv6['Context_Empty'] = RAGv6['Prompt'].apply(is_context_empty)

In [25]:
RAGv6['Context_Empty'].value_counts()

Context_Empty
False    251
True     134
Name: count, dtype: int64

In [None]:
# baseline_output = baseline['Generated Response'].to_list()
RAG1_output = RAGv1['Step2Response'].to_list()
RAG2_output = RAGv2['Step2Response_v2'].to_list()
RAG3_output = RAGv3['Step3Response'].to_list()
RAG4_output = RAGv4['Response'].to_list()
RAG5_output = RAGv5['Response'].to_list()
RAG6_output = RAGv6['Response'].to_list()
RAG7_output = RAGv7['Response'].to_list()

In [27]:
# baseline

In [28]:
unseen_list = RAGv3['IFUnseen'].to_list()
RAG4_unseen = RAGv4['Context_Empty'].to_list()
RAG5_unseen = RAGv5['Context_Empty'].to_list()
RAG6_unseen = RAGv6['Context_Empty'].to_list()
questions = RAGv6['Question'].to_list()
accepted_answer  = RAGv3['Accepted Answer Cleaned'].to_list()
# questions = baseline['Title'].to_list()

In [None]:
data = pd.DataFrame({
    "Question": questions,
    "RAG1": RAG1_output,
    "RAG2": RAG2_output,
    "RAG3": RAG3_output,
    "RAG4": RAG4_output,
    "RAG5": RAG5_output,
    "RAG6": RAG6_output,
    "RAG7": RAG6_output,
    'Accepted':accepted_answer,
    "RAG1-3_unseen": unseen_list,
    "RAG4_unseen": RAG4_unseen,
    "RAG5_unseen": RAG5_unseen,
    "RAG6_unseen": RAG6_unseen,
})

In [30]:
from openai import OpenAI
import pandas as pd
import json
import time 
from tqdm import tqdm  

In [None]:
client = OpenAI(api_key = '')

In [None]:


# 3. Define a function that builds the prompt for the judge LLM.
def build_judge_prompt(question_title, rag3, rag2, rag1, accepted_answer):
    prompt = f"""
[QUESTION TITLE]
{question_title}
[QUESTION TITLE]

[THE START OF APPROACH 1 ANSWERS]
{rag3}
[THE END OF APPROACH 1 ANSWERS]

[THE START OF APPROACH 2 ANSWERS]
{rag2}
[THE END OF APPROACH 2 ANSWERS]

[THE START OF APPROACH 3 ANSWERS]
{rag1}
[THE END OF APPROACH 3 ANSWERS]

[THE START OF APPROACH 4 ANSWERS]
{accepted_answer}
[THE END OF APPROACH 4 ANSWERS]

"""
    return prompt.strip()


In [37]:
SYS_PROMPT = """
We would like to request your feedback on the performance of four APPROACH in response to the question in [QUESTION TITLE]
Please rate the helpfulness, accuracy, level of details of the responses. 
If you find, there is incompleted code snippet in the response, deduct the overall score.
Each approach receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.

Please first output a single line containing only 4 values indicating the scores for APPROACH 1, 2, 3, and 4 respectively. 
The two scores are separated by a comma(,). 

"""

In [39]:

evaluations = []
scores_list = []

for idx, row in tqdm(data.iterrows(), total=len(data)):
    question_title = row["Question"]
    # accepted_answer = row["Accepted Answer Body"]
    rag3 = row["RAG3"]
    rag1 = row["RAG1"]
    rag2 = row["RAG2"]
    # rag4 = row["RAG4"]
    # rag5 = row["RAG5"]
    # baseline = row["Baseline"]
    # rag6 = row["RAG6"]
    accepted_answer  = row["Accepted"]
    # Build the prompt for your judge LLM (gpt-4o-mini)
    prompt_text = build_judge_prompt(
        question_title,
        # accepted_answer,
        rag3,
        rag2,
        rag1,
        # baseline
        accepted_answer
    )
    time.sleep(0.5)
    try:
        
        response = client.chat.completions.create(

        # response = openai.ChatCompletion.create(
            model="gpt-4o",  # Replace with your actual model name
            messages=[
                {"role": "system", "content": SYS_PROMPT},
                {"role": "user", "content": prompt_text}
            ],
            temperature=0.5  # Lower temperature for consistent evaluations
        )
        
        # # Extract the raw text the LLM returned
        # for chunk in response:
        #     print(chunk.choices[0].delta.content or "", end="")
        
        evaluation_text = response.choices[0].message.content

        print(evaluation_text)
        evaluations.append(evaluation_text)
        
    except Exception as e:
        # Handle API/network exceptions, rate limits, etc.
        print(f"OpenAI API error on row {idx}: {e}")
        evaluations.append("Error")
        scores_list.append([None, None, None, None])



  0%|          | 1/385 [00:01<07:20,  1.15s/it]

6, 7, 5, 4


  1%|          | 2/385 [00:02<06:55,  1.08s/it]

5, 7, 4, 6


  1%|          | 3/385 [00:03<06:53,  1.08s/it]

3, 8, 2, 1


  1%|          | 4/385 [00:04<06:59,  1.10s/it]

7, 8, 6, 5


  1%|▏         | 5/385 [00:05<06:49,  1.08s/it]

4, 5, 6, 5


  2%|▏         | 6/385 [00:06<07:16,  1.15s/it]

6, 5, 7, 4


  2%|▏         | 7/385 [00:07<07:21,  1.17s/it]

7, 6, 8, 7


  2%|▏         | 8/385 [00:09<07:17,  1.16s/it]

5, 6, 7, 3


  2%|▏         | 9/385 [00:10<07:14,  1.16s/it]

6, 7, 8, 5


  3%|▎         | 10/385 [00:11<07:44,  1.24s/it]

6, 7, 8, 5


  3%|▎         | 11/385 [00:12<07:30,  1.20s/it]

7, 9, 5, 3


  3%|▎         | 12/385 [00:13<07:04,  1.14s/it]

7, 8, 5, 6


  3%|▎         | 13/385 [00:14<06:46,  1.09s/it]

5, 6, 7, 3


  4%|▎         | 14/385 [00:15<06:41,  1.08s/it]

7, 6, 3, 5


  4%|▍         | 15/385 [00:16<06:36,  1.07s/it]

6, 5, 7, 3


  4%|▍         | 16/385 [00:17<06:37,  1.08s/it]

7, 3, 5, 4


  4%|▍         | 17/385 [00:19<06:43,  1.10s/it]

7, 6, 5, 4


  5%|▍         | 18/385 [00:20<06:50,  1.12s/it]

3, 7, 6, 2


  5%|▍         | 19/385 [00:21<06:43,  1.10s/it]

6, 7, 5, 2


  5%|▌         | 20/385 [00:22<06:50,  1.13s/it]

3, 6, 2, 7


  5%|▌         | 21/385 [00:23<07:29,  1.23s/it]

6, 4, 8, 2


  6%|▌         | 22/385 [00:25<07:45,  1.28s/it]

4, 6, 8, 2


  6%|▌         | 23/385 [00:26<07:43,  1.28s/it]

5, 7, 3, 2


  6%|▌         | 24/385 [00:27<07:13,  1.20s/it]

6, 7, 3, 8


  6%|▋         | 25/385 [00:28<07:10,  1.20s/it]

5, 4, 6, 5


  7%|▋         | 26/385 [00:29<06:51,  1.15s/it]

6, 6, 7, 4


  7%|▋         | 27/385 [00:30<06:44,  1.13s/it]

4, 2, 5, 3


  7%|▋         | 28/385 [00:32<06:47,  1.14s/it]

3, 4, 8, 5


  8%|▊         | 29/385 [00:33<07:15,  1.22s/it]

7, 8, 6, 7


  8%|▊         | 30/385 [00:34<06:57,  1.17s/it]

4, 8, 9, 3


  8%|▊         | 31/385 [00:35<07:17,  1.23s/it]

6, 5, 7, 8


  8%|▊         | 32/385 [00:37<07:06,  1.21s/it]

7, 6, 5, 4


  9%|▊         | 33/385 [00:38<07:21,  1.26s/it]

3, 2, 3, 1


  9%|▉         | 34/385 [00:39<07:12,  1.23s/it]

4, 2, 3, 3


  9%|▉         | 35/385 [00:40<07:02,  1.21s/it]

5, 6, 4, 3


  9%|▉         | 36/385 [00:41<06:52,  1.18s/it]

4, 6, 5, 3


 10%|▉         | 37/385 [00:43<06:55,  1.19s/it]

6, 8, 7, 4


 10%|▉         | 38/385 [00:44<06:54,  1.20s/it]

3, 7, 5, 2


 10%|█         | 39/385 [00:45<07:12,  1.25s/it]

6, 7, 7, 2


 10%|█         | 40/385 [00:46<07:09,  1.24s/it]

7, 8, 6, 3


 11%|█         | 41/385 [00:48<06:51,  1.20s/it]

6, 5, 4, 3


 11%|█         | 42/385 [00:49<06:45,  1.18s/it]

6, 8, 5, 4


 11%|█         | 43/385 [00:50<06:33,  1.15s/it]

6, 8, 5, 6


 11%|█▏        | 44/385 [00:51<06:22,  1.12s/it]

4, 6, 2, 1


 12%|█▏        | 45/385 [00:52<06:34,  1.16s/it]

7, 8, 6, 5


 12%|█▏        | 46/385 [00:54<07:00,  1.24s/it]

6, 5, 7, 5


 12%|█▏        | 47/385 [00:55<06:35,  1.17s/it]

7, 8, 7, 5


 12%|█▏        | 48/385 [00:56<07:10,  1.28s/it]

3, 4, 5, 2


 13%|█▎        | 49/385 [00:57<06:43,  1.20s/it]

7, 6, 5, 7


 13%|█▎        | 50/385 [00:59<07:40,  1.38s/it]

5, 7, 8, 3


 13%|█▎        | 51/385 [01:00<07:12,  1.29s/it]

5, 6, 3, 4


 14%|█▎        | 52/385 [01:01<06:46,  1.22s/it]

7, 6, 5, 4


 14%|█▍        | 53/385 [01:02<06:29,  1.17s/it]

6, 7, 7, 4


 14%|█▍        | 54/385 [01:03<06:14,  1.13s/it]

4, 5, 6, 2


 14%|█▍        | 55/385 [01:04<06:02,  1.10s/it]

4, 6, 7, 2


 15%|█▍        | 56/385 [01:05<05:58,  1.09s/it]

6, 8, 7, 5


 15%|█▍        | 57/385 [01:07<06:27,  1.18s/it]

7, 5, 6, 4


 15%|█▌        | 58/385 [01:08<06:22,  1.17s/it]

2, 3, 4, 1


 15%|█▌        | 59/385 [01:09<06:36,  1.22s/it]

7, 6, 5, 4


 16%|█▌        | 60/385 [01:10<06:23,  1.18s/it]

6, 7, 5, 6


 16%|█▌        | 61/385 [01:11<06:30,  1.20s/it]

4, 5, 7, 3


 16%|█▌        | 62/385 [01:13<06:16,  1.17s/it]

6, 7, 6, 5


 16%|█▋        | 63/385 [01:14<07:02,  1.31s/it]

6, 4, 5, 3


 17%|█▋        | 64/385 [01:15<06:46,  1.27s/it]

7, 6, 5, 4


 17%|█▋        | 65/385 [01:17<07:09,  1.34s/it]

6, 5, 4, 2


 17%|█▋        | 66/385 [01:18<06:57,  1.31s/it]

3, 6, 2, 4


 17%|█▋        | 67/385 [01:19<06:56,  1.31s/it]

6, 5, 4, 4


 18%|█▊        | 68/385 [01:21<07:10,  1.36s/it]

6, 7, 5, 4


 18%|█▊        | 69/385 [01:22<06:43,  1.28s/it]

6, 7, 5, 2


 18%|█▊        | 70/385 [01:23<06:25,  1.22s/it]

3, 4, 5, 2


 18%|█▊        | 71/385 [01:24<06:32,  1.25s/it]

4, 3, 7, 2


 19%|█▊        | 72/385 [01:26<06:22,  1.22s/it]

6, 9, 8, 4


 19%|█▉        | 73/385 [01:27<06:18,  1.21s/it]

6, 7, 5, 6


 19%|█▉        | 74/385 [01:28<06:10,  1.19s/it]

3, 5, 4, 2


 19%|█▉        | 75/385 [01:29<06:05,  1.18s/it]

7, 8, 4, 3


 20%|█▉        | 76/385 [01:30<06:12,  1.21s/it]

7, 8, 6, 5


 20%|██        | 77/385 [01:32<06:18,  1.23s/it]

7, 5, 6, 6


 20%|██        | 78/385 [01:33<06:39,  1.30s/it]

7, 6, 4, 3


 21%|██        | 79/385 [01:34<06:49,  1.34s/it]

7, 8, 9, 6


 21%|██        | 80/385 [01:36<06:53,  1.35s/it]

6, 7, 5, 3


 21%|██        | 81/385 [01:37<06:18,  1.25s/it]

7, 8, 7, 4


 21%|██▏       | 82/385 [01:38<06:04,  1.20s/it]

6, 5, 4, 3


 22%|██▏       | 83/385 [01:39<06:02,  1.20s/it]

5, 4, 7, 3


 22%|██▏       | 84/385 [01:40<05:57,  1.19s/it]

6, 5, 4, 3


 22%|██▏       | 85/385 [01:41<05:47,  1.16s/it]

5, 6, 7, 3


 22%|██▏       | 86/385 [01:43<06:08,  1.23s/it]

3, 7, 4, 2


 23%|██▎       | 87/385 [01:44<05:54,  1.19s/it]

5, 3, 2, 2


 23%|██▎       | 88/385 [01:45<05:43,  1.16s/it]

3, 7, 5, 2


 23%|██▎       | 89/385 [01:46<06:03,  1.23s/it]

6, 7, 5, 4


 23%|██▎       | 90/385 [01:47<05:49,  1.18s/it]

3, 4, 5, 2


 24%|██▎       | 91/385 [01:48<05:37,  1.15s/it]

7, 8, 6, 5


 24%|██▍       | 92/385 [01:50<05:40,  1.16s/it]

7, 6, 5, 4


 24%|██▍       | 93/385 [01:51<05:43,  1.17s/it]

6, 7, 6, 5


 24%|██▍       | 94/385 [01:52<05:32,  1.14s/it]

7, 6, 8, 9


 25%|██▍       | 95/385 [01:53<05:58,  1.24s/it]

5, 6, 7, 8


 25%|██▍       | 96/385 [01:55<06:23,  1.33s/it]

7, 8, 5, 6


 25%|██▌       | 97/385 [01:56<06:05,  1.27s/it]

6, 8, 3, 5


 25%|██▌       | 98/385 [01:58<06:24,  1.34s/it]

7, 8, 6, 6


 26%|██▌       | 99/385 [01:59<06:11,  1.30s/it]

6, 5, 7, 8


 26%|██▌       | 100/385 [02:00<05:51,  1.23s/it]

6, 3, 4, 5


 26%|██▌       | 101/385 [02:01<06:06,  1.29s/it]

3, 4, 3, 5


 26%|██▋       | 102/385 [02:02<05:44,  1.22s/it]

4, 7, 6, 8


 27%|██▋       | 103/385 [02:03<05:21,  1.14s/it]

4, 5, 6, 1


 27%|██▋       | 104/385 [02:05<05:47,  1.24s/it]

7, 5, 3, 4


 27%|██▋       | 105/385 [02:06<06:16,  1.34s/it]

6, 5, 6, 4


 28%|██▊       | 106/385 [02:08<06:25,  1.38s/it]

7, 5, 6, 6


 28%|██▊       | 107/385 [02:09<05:48,  1.25s/it]

3, 4, 5, 2


 28%|██▊       | 108/385 [02:10<05:59,  1.30s/it]

7, 6, 5, 4


 28%|██▊       | 109/385 [02:11<05:42,  1.24s/it]

6, 8, 7, 4


 29%|██▊       | 110/385 [02:13<05:58,  1.30s/it]

7, 6, 5, 4


 29%|██▉       | 111/385 [02:14<05:42,  1.25s/it]

7, 8, 6, 5


 29%|██▉       | 112/385 [02:15<05:30,  1.21s/it]

2, 3, 4, 1


 29%|██▉       | 113/385 [02:17<06:07,  1.35s/it]

6, 5, 7, 4


 30%|██▉       | 114/385 [02:18<05:44,  1.27s/it]

7, 8, 7, 6


 30%|██▉       | 115/385 [02:21<08:09,  1.81s/it]

6, 8, 5, 4


 30%|███       | 116/385 [02:22<07:42,  1.72s/it]

6, 7, 5, 4


 30%|███       | 117/385 [02:24<07:01,  1.57s/it]

3, 7, 8, 4


 31%|███       | 118/385 [02:25<06:35,  1.48s/it]

7, 8, 6, 5


 31%|███       | 119/385 [02:26<06:00,  1.35s/it]

7, 8, 6, 7


 31%|███       | 120/385 [02:27<06:16,  1.42s/it]

7, 6, 5, 8


 31%|███▏      | 121/385 [02:28<05:43,  1.30s/it]

3, 4, 5, 2


 32%|███▏      | 122/385 [02:30<06:08,  1.40s/it]

6, 7, 4, 5


 32%|███▏      | 123/385 [02:31<05:57,  1.36s/it]

6, 7, 5, 3


 32%|███▏      | 124/385 [02:33<06:13,  1.43s/it]

3, 4, 2, 1


 32%|███▏      | 125/385 [02:34<05:45,  1.33s/it]

6, 5, 4, 3


 33%|███▎      | 126/385 [02:35<05:27,  1.27s/it]

6, 8, 5, 3


 33%|███▎      | 127/385 [02:37<05:29,  1.28s/it]

6, 7, 6, 5


 33%|███▎      | 128/385 [02:38<05:52,  1.37s/it]

6, 5, 4, 7


 34%|███▎      | 129/385 [02:40<05:57,  1.40s/it]

3, 5, 7, 2


 34%|███▍      | 130/385 [02:41<06:04,  1.43s/it]

3, 4, 5, 2


 34%|███▍      | 131/385 [02:43<06:19,  1.49s/it]

5, 7, 3, 2


 34%|███▍      | 132/385 [02:44<06:33,  1.56s/it]

6, 8, 7, 3


 35%|███▍      | 133/385 [02:46<06:02,  1.44s/it]

5, 6, 4, 7


 35%|███▍      | 134/385 [02:47<05:36,  1.34s/it]

7, 8, 6, 5


 35%|███▌      | 135/385 [02:48<05:10,  1.24s/it]

5, 8, 7, 2


 35%|███▌      | 136/385 [02:49<04:54,  1.18s/it]

7, 6, 8, 5


 36%|███▌      | 137/385 [02:50<04:54,  1.19s/it]

6, 7, 7, 4


 36%|███▌      | 138/385 [02:51<05:16,  1.28s/it]

7, 5, 6, 4


 36%|███▌      | 139/385 [02:53<05:03,  1.23s/it]

6, 5, 7, 3


 36%|███▋      | 140/385 [02:54<04:53,  1.20s/it]

6, 7, 8, 5


 37%|███▋      | 141/385 [02:56<05:59,  1.47s/it]

3, 8, 5, 2


 37%|███▋      | 142/385 [02:57<06:02,  1.49s/it]

5, 8, 6, 7


 37%|███▋      | 143/385 [02:59<06:25,  1.59s/it]

7, 6, 5, 3


 37%|███▋      | 144/385 [03:00<05:52,  1.46s/it]

6, 7, 8, 5


 38%|███▊      | 145/385 [03:02<05:43,  1.43s/it]

3, 6, 2, 1


 38%|███▊      | 146/385 [03:03<05:23,  1.35s/it]

3, 5, 4, 2


 38%|███▊      | 147/385 [03:05<05:54,  1.49s/it]

3, 5, 4, 2


 38%|███▊      | 148/385 [03:08<07:50,  1.99s/it]

3, 6, 5, 2


 39%|███▊      | 149/385 [03:09<06:58,  1.77s/it]

4, 8, 5, 3


 39%|███▉      | 150/385 [03:10<06:00,  1.53s/it]

4, 4, 5, 3


 39%|███▉      | 151/385 [03:11<05:41,  1.46s/it]

6, 7, 6, 5


 39%|███▉      | 152/385 [03:16<09:01,  2.32s/it]

7, 6, 8, 5


 40%|███▉      | 153/385 [03:17<08:08,  2.10s/it]

3, 8, 6, 2


 40%|████      | 154/385 [03:19<07:35,  1.97s/it]

7, 8, 7, 5


 40%|████      | 155/385 [03:20<06:56,  1.81s/it]

5, 5, 4, 2


 41%|████      | 156/385 [03:22<06:33,  1.72s/it]

5, 6, 4, 3


 41%|████      | 157/385 [03:23<06:01,  1.58s/it]

6, 7, 4, 3


 41%|████      | 158/385 [03:25<06:05,  1.61s/it]

3, 5, 6, 2


 41%|████▏     | 159/385 [03:26<05:26,  1.45s/it]

6, 8, 7, 3


 42%|████▏     | 160/385 [03:27<05:00,  1.33s/it]

7, 8, 7, 6


 42%|████▏     | 161/385 [03:28<04:36,  1.24s/it]

3, 3, 4, 5


 42%|████▏     | 162/385 [03:29<04:37,  1.24s/it]

4, 4, 5, 2


 42%|████▏     | 163/385 [03:30<04:32,  1.23s/it]

7, 6, 5, 8


 43%|████▎     | 164/385 [03:32<04:34,  1.24s/it]

4, 3, 6, 7


 43%|████▎     | 165/385 [03:33<04:27,  1.22s/it]

6, 4, 5, 7


 43%|████▎     | 166/385 [03:34<04:22,  1.20s/it]

3, 4, 7, 1


 43%|████▎     | 167/385 [03:35<04:15,  1.17s/it]

2, 6, 5, 1


 44%|████▎     | 168/385 [03:36<04:04,  1.13s/it]

6, 7, 8, 5


 44%|████▍     | 169/385 [03:37<04:07,  1.15s/it]

5, 6, 4, 4


 44%|████▍     | 170/385 [03:39<04:14,  1.18s/it]

3, 5, 6, 4


 44%|████▍     | 171/385 [03:40<04:07,  1.16s/it]

3, 5, 4, 2


 45%|████▍     | 172/385 [03:45<08:50,  2.49s/it]

6, 5, 7, 4


 45%|████▍     | 173/385 [03:48<08:54,  2.52s/it]

7, 6, 8, 5


 45%|████▌     | 174/385 [03:49<07:18,  2.08s/it]

6, 7, 8, 3


 45%|████▌     | 175/385 [03:51<06:50,  1.95s/it]

7, 8, 6, 7


 46%|████▌     | 176/385 [03:52<05:53,  1.69s/it]

5, 7, 6, 3


 46%|████▌     | 177/385 [03:53<05:39,  1.63s/it]

4, 7, 2, 1


 46%|████▌     | 178/385 [03:55<05:43,  1.66s/it]

5, 4, 3, 2


 46%|████▋     | 179/385 [03:56<05:27,  1.59s/it]

6, 5, 7, 3


 47%|████▋     | 180/385 [03:58<05:23,  1.58s/it]

6, 5, 4, 3


 47%|████▋     | 181/385 [03:59<05:02,  1.48s/it]

7, 8, 9, 6


 47%|████▋     | 182/385 [04:00<04:42,  1.39s/it]

4, 5, 3, 2


 48%|████▊     | 183/385 [04:02<04:54,  1.46s/it]

7, 8, 7, 6


 48%|████▊     | 184/385 [04:03<04:25,  1.32s/it]

4, 7, 6, 3


 48%|████▊     | 185/385 [04:04<04:12,  1.26s/it]

7, 8, 9, 6


 48%|████▊     | 186/385 [04:05<04:08,  1.25s/it]

5, 8, 7, 6


 49%|████▊     | 187/385 [04:07<04:37,  1.40s/it]

3, 6, 7, 2


 49%|████▉     | 188/385 [04:09<04:41,  1.43s/it]

5, 8, 7, 4


 49%|████▉     | 189/385 [04:10<04:31,  1.39s/it]

7, 8, 7, 5


 49%|████▉     | 190/385 [04:11<04:13,  1.30s/it]

7, 8, 7, 5


 50%|████▉     | 191/385 [04:12<04:00,  1.24s/it]

4, 3, 5, 2


 50%|████▉     | 192/385 [04:13<04:09,  1.29s/it]

7, 6, 7, 3


 50%|█████     | 193/385 [04:15<04:00,  1.26s/it]

7, 7, 3, 5


 50%|█████     | 194/385 [04:16<03:59,  1.25s/it]

3, 7, 6, 4


 51%|█████     | 195/385 [04:21<07:15,  2.29s/it]

6, 7, 5, 4


 51%|█████     | 196/385 [04:22<06:02,  1.92s/it]

7, 8, 7, 5


 51%|█████     | 197/385 [04:23<05:27,  1.74s/it]

4, 7, 8, 2


 51%|█████▏    | 198/385 [04:24<05:05,  1.64s/it]

6, 7, 8, 5


 52%|█████▏    | 199/385 [04:27<05:46,  1.86s/it]

4, 5, 6, 7


 52%|█████▏    | 200/385 [04:28<05:13,  1.69s/it]

7, 8, 5, 4


 52%|█████▏    | 201/385 [04:29<04:34,  1.49s/it]

6, 8, 7, 5


 52%|█████▏    | 202/385 [04:31<05:19,  1.75s/it]

5, 4, 7, 1


 53%|█████▎    | 203/385 [04:32<04:41,  1.55s/it]

3, 4, 4, 3


 53%|█████▎    | 204/385 [04:34<04:14,  1.41s/it]

5, 7, 6, 8


 53%|█████▎    | 205/385 [04:35<04:09,  1.38s/it]

6, 4, 5, 3


 54%|█████▎    | 206/385 [04:36<04:20,  1.45s/it]

7, 5, 3, 6


 54%|█████▍    | 207/385 [04:38<04:10,  1.41s/it]

4, 3, 4, 2


 54%|█████▍    | 208/385 [04:39<04:04,  1.38s/it]

6, 9, 8, 5


 54%|█████▍    | 209/385 [04:42<05:19,  1.82s/it]

3, 6, 8, 2


 55%|█████▍    | 210/385 [04:43<05:05,  1.75s/it]

6, 5, 4, 3


 55%|█████▍    | 211/385 [04:45<04:51,  1.67s/it]

3, 7, 2, 1


 55%|█████▌    | 212/385 [04:47<04:51,  1.68s/it]

3, 7, 6, 2


 55%|█████▌    | 213/385 [04:48<04:30,  1.57s/it]

4, 6, 7, 2


 56%|█████▌    | 214/385 [04:49<04:12,  1.48s/it]

7, 6, 7, 8


 56%|█████▌    | 215/385 [04:51<04:46,  1.68s/it]

2, 3, 8, 1


 56%|█████▌    | 216/385 [04:53<04:16,  1.51s/it]

4, 8, 6, 2


 56%|█████▋    | 217/385 [04:54<03:49,  1.37s/it]

6, 7, 5, 3


 57%|█████▋    | 218/385 [04:55<03:34,  1.29s/it]

6, 5, 4, 6


 57%|█████▋    | 219/385 [04:56<03:42,  1.34s/it]

2, 3, 5, 1


 57%|█████▋    | 220/385 [04:57<03:35,  1.31s/it]

7, 6, 8, 3


 57%|█████▋    | 221/385 [04:59<03:26,  1.26s/it]

7, 8, 7, 5


 58%|█████▊    | 222/385 [05:00<03:18,  1.21s/it]

6, 7, 5, 2


 58%|█████▊    | 223/385 [05:01<03:33,  1.32s/it]

2, 2, 2, 1


 58%|█████▊    | 224/385 [05:02<03:21,  1.25s/it]

5, 6, 7, 4


 58%|█████▊    | 225/385 [05:04<03:29,  1.31s/it]

6, 5, 4, 7


 59%|█████▊    | 226/385 [05:05<03:19,  1.26s/it]

3, 6, 7, 2


 59%|█████▉    | 227/385 [05:06<03:14,  1.23s/it]

5, 4, 6, 7


 59%|█████▉    | 228/385 [05:07<03:10,  1.21s/it]

7, 6, 8, 6


 59%|█████▉    | 229/385 [05:08<03:08,  1.21s/it]

6, 8, 4, 5


 60%|█████▉    | 230/385 [05:14<06:13,  2.41s/it]

7, 8, 7, 3


 60%|██████    | 231/385 [05:15<05:22,  2.09s/it]

4, 5, 6, 2


 60%|██████    | 232/385 [05:16<04:33,  1.79s/it]

7, 8, 6, 5


 61%|██████    | 233/385 [05:17<03:59,  1.58s/it]

5, 6, 4, 3


 61%|██████    | 234/385 [05:18<03:47,  1.50s/it]

7, 8, 9, 6


 61%|██████    | 235/385 [05:20<03:42,  1.48s/it]

5, 3, 2, 6


 61%|██████▏   | 236/385 [05:21<03:35,  1.45s/it]

3, 8, 7, 2


 62%|██████▏   | 237/385 [05:22<03:16,  1.33s/it]

3, 4, 5, 2


 62%|██████▏   | 238/385 [05:24<03:18,  1.35s/it]

7, 6, 5, 6


 62%|██████▏   | 239/385 [05:25<03:01,  1.24s/it]

6, 7, 8, 4


 62%|██████▏   | 240/385 [05:26<02:51,  1.18s/it]

6, 4, 5, 3


 63%|██████▎   | 241/385 [05:27<02:55,  1.22s/it]

7, 8, 7, 6


 63%|██████▎   | 242/385 [05:28<03:00,  1.27s/it]

6, 6, 7, 3


 63%|██████▎   | 243/385 [05:29<02:50,  1.20s/it]

6, 6, 7, 7


 63%|██████▎   | 244/385 [05:31<03:07,  1.33s/it]

7, 9, 7, 5


 64%|██████▎   | 245/385 [05:32<03:07,  1.34s/it]

6, 7, 6, 5


 64%|██████▍   | 246/385 [05:34<02:56,  1.27s/it]

7, 8, 6, 2


 64%|██████▍   | 247/385 [05:35<02:43,  1.19s/it]

3, 5, 6, 4


 64%|██████▍   | 248/385 [05:36<02:42,  1.18s/it]

7, 8, 8, 6


 65%|██████▍   | 249/385 [05:37<02:40,  1.18s/it]

5, 6, 3, 8


 65%|██████▍   | 250/385 [05:39<03:12,  1.42s/it]

3, 8, 8, 2


 65%|██████▌   | 251/385 [05:40<03:06,  1.39s/it]

6, 7, 6, 3


 65%|██████▌   | 252/385 [05:42<03:03,  1.38s/it]

7, 6, 9, 8


 66%|██████▌   | 253/385 [05:43<02:52,  1.30s/it]

7, 6, 8, 5


 66%|██████▌   | 254/385 [05:44<03:02,  1.40s/it]

3, 8, 4, 1


 66%|██████▌   | 255/385 [05:45<02:49,  1.30s/it]

7, 6, 7, 5


 66%|██████▋   | 256/385 [05:47<02:51,  1.33s/it]

7, 6, 5, 4


 67%|██████▋   | 257/385 [05:48<02:57,  1.39s/it]

6, 7, 7, 5


 67%|██████▋   | 258/385 [05:50<03:02,  1.44s/it]

5, 6, 8, 3


 67%|██████▋   | 259/385 [05:51<03:01,  1.44s/it]

2, 4, 7, 3


 68%|██████▊   | 260/385 [05:53<03:10,  1.52s/it]

5, 4, 3, 2


 68%|██████▊   | 261/385 [05:54<03:07,  1.51s/it]

4, 5, 4, 3


 68%|██████▊   | 262/385 [05:56<03:04,  1.50s/it]

6, 7, 5, 3


 68%|██████▊   | 263/385 [05:57<03:03,  1.50s/it]

6, 7, 5, 4


 69%|██████▊   | 264/385 [05:58<02:42,  1.35s/it]

6, 7, 6, 5


 69%|██████▉   | 265/385 [06:00<02:32,  1.27s/it]

7, 8, 6, 5


 69%|██████▉   | 266/385 [06:01<02:24,  1.21s/it]

5, 9, 8, 3


 69%|██████▉   | 267/385 [06:02<02:20,  1.19s/it]

4, 5, 6, 3


 70%|██████▉   | 268/385 [06:03<02:15,  1.16s/it]

4, 5, 6, 3


 70%|██████▉   | 269/385 [06:04<02:12,  1.14s/it]

7, 6, 5, 8


 70%|███████   | 270/385 [06:05<02:07,  1.11s/it]

3, 7, 4, 2


 70%|███████   | 271/385 [06:06<02:02,  1.08s/it]

2, 7, 6, 1


 71%|███████   | 272/385 [06:07<02:01,  1.08s/it]

6, 8, 7, 5


 71%|███████   | 273/385 [06:08<02:01,  1.09s/it]

6, 7, 8, 4


 71%|███████   | 274/385 [06:10<02:09,  1.17s/it]

5, 8, 6, 3


 71%|███████▏  | 275/385 [06:11<02:08,  1.17s/it]

2, 7, 8, 3


 72%|███████▏  | 276/385 [06:12<02:04,  1.14s/it]

6, 8, 7, 3


 72%|███████▏  | 277/385 [06:13<02:07,  1.18s/it]

3, 9, 5, 2


 72%|███████▏  | 278/385 [06:15<02:16,  1.28s/it]

3, 2, 4, 2


 72%|███████▏  | 279/385 [06:16<02:21,  1.34s/it]

7, 8, 7, 6


 73%|███████▎  | 280/385 [06:17<02:24,  1.37s/it]

5, 4, 6, 7


 73%|███████▎  | 281/385 [06:19<02:11,  1.27s/it]

6, 4, 7, 3


 73%|███████▎  | 282/385 [06:20<02:18,  1.34s/it]

3, 7, 8, 5


 74%|███████▎  | 283/385 [06:21<02:06,  1.24s/it]

7, 8, 9, 6


 74%|███████▍  | 284/385 [06:22<02:00,  1.20s/it]

2, 7, 2, 1


 74%|███████▍  | 285/385 [06:24<02:05,  1.26s/it]

7, 8, 7, 5


 74%|███████▍  | 286/385 [06:25<02:11,  1.33s/it]

6, 8, 5, 3


 75%|███████▍  | 287/385 [06:26<02:04,  1.27s/it]

3, 8, 5, 2


 75%|███████▍  | 288/385 [06:27<01:57,  1.21s/it]

7, 6, 5, 4


 75%|███████▌  | 289/385 [06:28<01:52,  1.17s/it]

3, 7, 4, 2


 75%|███████▌  | 290/385 [06:30<01:58,  1.25s/it]

4, 6, 5, 2


 76%|███████▌  | 291/385 [06:31<01:53,  1.21s/it]

3, 7, 8, 2


 76%|███████▌  | 292/385 [06:32<01:47,  1.16s/it]

3, 8, 7, 2


 76%|███████▌  | 293/385 [06:33<01:46,  1.16s/it]

7, 6, 5, 4


 76%|███████▋  | 294/385 [06:34<01:49,  1.21s/it]

3, 4, 6, 2


 77%|███████▋  | 295/385 [06:36<01:47,  1.19s/it]

4, 5, 2, 3


 77%|███████▋  | 296/385 [06:37<01:46,  1.20s/it]

5, 4, 3, 6


 77%|███████▋  | 297/385 [06:38<01:40,  1.15s/it]

3, 4, 5, 2


 77%|███████▋  | 298/385 [06:39<01:47,  1.24s/it]

3, 8, 2, 5


 78%|███████▊  | 299/385 [06:40<01:45,  1.23s/it]

6, 5, 4, 6


 78%|███████▊  | 300/385 [06:42<01:42,  1.20s/it]

7, 6, 8, 7


 78%|███████▊  | 301/385 [06:43<01:44,  1.24s/it]

5, 6, 5, 3


 78%|███████▊  | 302/385 [06:46<02:32,  1.84s/it]

5, 6, 4, 3


 79%|███████▊  | 303/385 [06:47<02:12,  1.62s/it]

4, 6, 5, 3


 79%|███████▉  | 304/385 [06:48<02:01,  1.51s/it]

3, 2, 4, 1


 79%|███████▉  | 305/385 [06:50<02:04,  1.56s/it]

7, 6, 5, 8


 79%|███████▉  | 306/385 [06:52<02:01,  1.54s/it]

7, 8, 8, 6


 80%|███████▉  | 307/385 [06:53<01:48,  1.39s/it]

7, 4, 8, 6


 80%|████████  | 308/385 [06:54<01:38,  1.28s/it]

3, 5, 7, 2


 80%|████████  | 309/385 [06:55<01:31,  1.20s/it]

7, 6, 8, 5


 81%|████████  | 310/385 [06:56<01:29,  1.19s/it]

7, 4, 6, 3


 81%|████████  | 311/385 [06:57<01:34,  1.28s/it]

6, 5, 4, 5


 81%|████████  | 312/385 [06:58<01:28,  1.21s/it]

6, 5, 3, 4


 81%|████████▏ | 313/385 [07:00<01:27,  1.22s/it]

4, 7, 8, 2


 82%|████████▏ | 314/385 [07:01<01:28,  1.25s/it]

4, 5, 6, 3


 82%|████████▏ | 315/385 [07:03<01:36,  1.38s/it]

6, 4, 2, 3


 82%|████████▏ | 316/385 [07:04<01:38,  1.43s/it]

6, 7, 3, 2


 82%|████████▏ | 317/385 [07:06<01:37,  1.44s/it]

3, 5, 2, 1


 83%|████████▎ | 318/385 [07:07<01:42,  1.54s/it]

6, 5, 4, 4


 83%|████████▎ | 319/385 [07:09<01:32,  1.40s/it]

6, 7, 5, 3


 83%|████████▎ | 320/385 [07:10<01:26,  1.34s/it]

7, 8, 7, 4


 83%|████████▎ | 321/385 [07:11<01:18,  1.23s/it]

7, 8, 7, 6


 84%|████████▎ | 322/385 [07:12<01:13,  1.16s/it]

7, 6, 7, 8


 84%|████████▍ | 323/385 [07:13<01:17,  1.25s/it]

5, 4, 7, 2


 84%|████████▍ | 324/385 [07:15<01:21,  1.34s/it]

5, 6, 4, 3


 84%|████████▍ | 325/385 [07:16<01:20,  1.34s/it]

4, 7, 6, 2


 85%|████████▍ | 326/385 [07:18<01:23,  1.41s/it]

3, 4, 2, 2


 85%|████████▍ | 327/385 [07:23<02:32,  2.64s/it]

6, 5, 4, 6


 85%|████████▌ | 328/385 [07:25<02:11,  2.30s/it]

3, 5, 6, 4


 85%|████████▌ | 329/385 [07:26<01:48,  1.94s/it]

3, 2, 4, 2


 86%|████████▌ | 330/385 [07:27<01:32,  1.69s/it]

2, 4, 6, 1


 86%|████████▌ | 331/385 [07:28<01:19,  1.48s/it]

6, 5, 8, 3


 86%|████████▌ | 332/385 [07:29<01:20,  1.52s/it]

7, 8, 5, 6


 86%|████████▋ | 333/385 [07:31<01:15,  1.46s/it]

7, 8, 6, 5


 87%|████████▋ | 334/385 [07:32<01:14,  1.46s/it]

5, 6, 4, 4


 87%|████████▋ | 335/385 [07:33<01:06,  1.32s/it]

7, 8, 5, 6


 87%|████████▋ | 336/385 [07:34<01:00,  1.24s/it]

7, 6, 8, 3


 88%|████████▊ | 337/385 [07:35<00:56,  1.18s/it]

3, 2, 4, 1


 88%|████████▊ | 338/385 [07:36<00:55,  1.18s/it]

7, 8, 7, 6


 88%|████████▊ | 339/385 [07:38<00:58,  1.26s/it]

6, 5, 4, 7


 88%|████████▊ | 340/385 [07:40<01:04,  1.44s/it]

7, 8, 8, 2


 89%|████████▊ | 341/385 [07:41<01:05,  1.48s/it]

7, 6, 5, 4


 89%|████████▉ | 342/385 [07:43<01:01,  1.43s/it]

6, 7, 8, 5


 89%|████████▉ | 343/385 [07:44<00:57,  1.37s/it]

2, 3, 1, 2


 89%|████████▉ | 344/385 [07:45<00:54,  1.33s/it]

5, 4, 6, 3


 90%|████████▉ | 345/385 [07:47<00:58,  1.47s/it]

7, 6, 7, 5


 90%|████████▉ | 346/385 [07:48<00:51,  1.32s/it]

6, 5, 8, 7


 90%|█████████ | 347/385 [07:49<00:51,  1.35s/it]

4, 5, 6, 3


 90%|█████████ | 348/385 [07:51<00:49,  1.35s/it]

6, 5, 7, 4


 91%|█████████ | 349/385 [07:52<00:47,  1.32s/it]

6, 4, 7, 5


 91%|█████████ | 350/385 [07:53<00:46,  1.34s/it]

6, 7, 8, 5


 91%|█████████ | 351/385 [07:54<00:43,  1.29s/it]

7, 5, 6, 6


 91%|█████████▏| 352/385 [07:56<00:47,  1.44s/it]

6, 5, 6, 2


 92%|█████████▏| 353/385 [07:58<00:45,  1.41s/it]

7, 7, 8, 6


 92%|█████████▏| 354/385 [07:59<00:46,  1.49s/it]

6, 7, 8, 5


 92%|█████████▏| 355/385 [08:01<00:43,  1.46s/it]

6, 7, 8, 3


 92%|█████████▏| 356/385 [08:02<00:38,  1.33s/it]

6, 7, 8, 4


 93%|█████████▎| 357/385 [08:03<00:36,  1.29s/it]

7, 6, 4, 5


 93%|█████████▎| 358/385 [08:04<00:35,  1.30s/it]

4, 5, 6, 3


 93%|█████████▎| 359/385 [08:05<00:32,  1.25s/it]

3, 5, 2, 1


 94%|█████████▎| 360/385 [08:06<00:30,  1.20s/it]

3, 7, 4, 2


 94%|█████████▍| 361/385 [08:08<00:29,  1.24s/it]

7, 8, 6, 3


 94%|█████████▍| 362/385 [08:09<00:30,  1.34s/it]

6, 7, 8, 4


 94%|█████████▍| 363/385 [08:11<00:29,  1.36s/it]

3, 5, 4, 8


 95%|█████████▍| 364/385 [08:12<00:27,  1.30s/it]

5, 4, 3, 2


 95%|█████████▍| 365/385 [08:14<00:28,  1.42s/it]

6, 5, 5, 2


 95%|█████████▌| 366/385 [08:15<00:25,  1.33s/it]

7, 6, 4, 8


 95%|█████████▌| 367/385 [08:16<00:22,  1.25s/it]

4, 7, 4, 3


 96%|█████████▌| 368/385 [08:17<00:20,  1.21s/it]

3, 2, 5, 1


 96%|█████████▌| 369/385 [08:18<00:20,  1.25s/it]

7, 6, 8, 4


 96%|█████████▌| 370/385 [08:19<00:18,  1.22s/it]

6, 8, 5, 4


 96%|█████████▋| 371/385 [08:20<00:16,  1.16s/it]

6, 7, 8, 7


 97%|█████████▋| 372/385 [08:21<00:14,  1.13s/it]

7, 7, 6, 8


 97%|█████████▋| 373/385 [08:23<00:13,  1.12s/it]

6, 7, 7, 4


 97%|█████████▋| 374/385 [08:24<00:12,  1.14s/it]

6, 7, 6, 8


 97%|█████████▋| 375/385 [08:25<00:12,  1.22s/it]

6, 8, 9, 3


 98%|█████████▊| 376/385 [08:27<00:11,  1.27s/it]

5, 4, 6, 2


 98%|█████████▊| 377/385 [08:28<00:09,  1.23s/it]

7, 8, 6, 5


 98%|█████████▊| 378/385 [08:29<00:08,  1.22s/it]

5, 6, 4, 3


 98%|█████████▊| 379/385 [08:30<00:07,  1.18s/it]

7, 6, 5, 6


 99%|█████████▊| 380/385 [08:31<00:06,  1.25s/it]

7, 6, 5, 4


 99%|█████████▉| 381/385 [08:33<00:04,  1.22s/it]

6, 5, 4, 3


 99%|█████████▉| 382/385 [08:34<00:03,  1.20s/it]

5, 6, 7, 3


 99%|█████████▉| 383/385 [08:35<00:02,  1.36s/it]

2, 3, 2, 1


100%|█████████▉| 384/385 [08:37<00:01,  1.35s/it]

4, 6, 8, 2


100%|██████████| 385/385 [08:38<00:00,  1.35s/it]

7, 6, 5, 4





In [40]:
filtered_evaluations = [x for x in evaluations if len(x.split(',')) == 4]
filtered_evaluations

['6, 7, 5, 4',
 '5, 7, 4, 6',
 '3, 8, 2, 1',
 '7, 8, 6, 5',
 '4, 5, 6, 5',
 '6, 5, 7, 4',
 '7, 6, 8, 7',
 '5, 6, 7, 3',
 '6, 7, 8, 5',
 '6, 7, 8, 5',
 '7, 9, 5, 3',
 '7, 8, 5, 6',
 '5, 6, 7, 3',
 '7, 6, 3, 5',
 '6, 5, 7, 3',
 '7, 3, 5, 4',
 '7, 6, 5, 4',
 '3, 7, 6, 2',
 '6, 7, 5, 2',
 '3, 6, 2, 7',
 '6, 4, 8, 2',
 '4, 6, 8, 2',
 '5, 7, 3, 2',
 '6, 7, 3, 8',
 '5, 4, 6, 5',
 '6, 6, 7, 4',
 '4, 2, 5, 3',
 '3, 4, 8, 5',
 '7, 8, 6, 7',
 '4, 8, 9, 3',
 '6, 5, 7, 8',
 '7, 6, 5, 4',
 '3, 2, 3, 1',
 '4, 2, 3, 3',
 '5, 6, 4, 3',
 '4, 6, 5, 3',
 '6, 8, 7, 4',
 '3, 7, 5, 2',
 '6, 7, 7, 2',
 '7, 8, 6, 3',
 '6, 5, 4, 3',
 '6, 8, 5, 4',
 '6, 8, 5, 6',
 '4, 6, 2, 1',
 '7, 8, 6, 5',
 '6, 5, 7, 5',
 '7, 8, 7, 5',
 '3, 4, 5, 2',
 '7, 6, 5, 7',
 '5, 7, 8, 3',
 '5, 6, 3, 4',
 '7, 6, 5, 4',
 '6, 7, 7, 4',
 '4, 5, 6, 2',
 '4, 6, 7, 2',
 '6, 8, 7, 5',
 '7, 5, 6, 4',
 '2, 3, 4, 1',
 '7, 6, 5, 4',
 '6, 7, 5, 6',
 '4, 5, 7, 3',
 '6, 7, 6, 5',
 '6, 4, 5, 3',
 '7, 6, 5, 4',
 '6, 5, 4, 2',
 '3, 6, 2, 4',
 '6, 5, 4,

In [41]:
print(len(filtered_evaluations))
print(len(evaluations))

385
385


In [43]:
# R4,R5,R3,R1,R2,R6 = map(list, zip(*[map(int, x.split(',')) for x in filtered_evaluations]))
R3,R2,R1,AC = map(list, zip(*[map(int, x.split(',')) for x in filtered_evaluations]))

In [44]:
# data['RAG4_results'] = R4
# data['RAG5_results'] = R5
data['RAG3_results'] = R3
data['RAG1_results'] = R1
data['RAG2_results'] = R2
data['AC'] = AC

In [None]:
# data.to_csv('llm_as_judge_results/results_dataframe-new_RQ2_baseline_and_ac.csv',index=False)

In [None]:
# data.to_csv('llm_as_judge_results/results_dataframe-2_0.9.csv',index=False)
data.to_csv('llm_as_judge_results/results_dataframe_0.1.csv',index=False)
# This data is the RAG1-RAG5 basic results
# data.to_csv('llm_as_judge_results/results_dataframe_unseen_combination.csv',index=False)

In [485]:
# This data is the RAG1-RAG5 variantion_results after discussion for additional experiments
# data.to_csv('llm_as_judge_results/all_rag_approach_variation_results.csv',index=False)

In [486]:
# This data contains RAG6 and not containing baseline
# data.to_csv('llm_as_judge_results/all_rag_approach_variation_results_with_RAG6.csv',index=False)