In [1]:
import requests
import pandas as pd

## Preprocessing Functions

In [2]:
from typing import Dict

def json_to_df(df: Dict[str, list]) -> pd.DataFrame:
    """
    Convert a dictionary containing rows data to a pandas DataFrame.

    Parameters
    ----------
    df : Dict[str, list]
        A dictionary containing rows data in the format {'rows': [row1, row2, ...]}.
        Each row should be a dictionary with keys representing columns.

    Returns
    -------
    pd.DataFrame
        A DataFrame constructed from the rows data.

    Example
    -------
    Consider a dictionary `data` in the format:
    data = {'rows': [{'col1': val1, 'col2': val2}, {'col1': val3, 'col2': val4}]}
    df = json_to_df(data)
    """
    rows_data = [row['row'] for row in df['rows']]
    df = pd.DataFrame(rows_data)
    return df

In [3]:
def fetch_rows(url: str, total_rows: int) -> pd.DataFrame:
    """
    Fetch rows of data from an API endpoint and compile them into a pandas DataFrame.

    Parameters
    ----------
    url : str
        The URL of the API endpoint to fetch data.
    total_rows : int
        The total number of rows to retrieve.

    Returns
    -------
    pd.DataFrame
        A DataFrame containing the compiled rows of data fetched from the API.

    Notes
    -----
    This function fetches rows of data from the provided API endpoint using the given URL. It iteratively
    retrieves data in chunks defined by 'rows_per_request' until 'total_rows' are fetched or an error occurs.
    The 'json_to_df' function is utilized to convert fetched JSON data to a DataFrame.
    """
    offset = 0
    rows_per_request = 100
    full_dataset = pd.DataFrame()

    while offset < total_rows:
        length = min(rows_per_request, total_rows - offset)
        api_url = f"{url}&offset={offset}&length={length}"
        response = requests.get(api_url)

        if response.status_code == 200:
            data = response.json()
            df_data = json_to_df(data)
            full_dataset = pd.concat([full_dataset, df_data], ignore_index=True)
            offset += length
        else:
            print(f"Failed to fetch data from {api_url}. Status code: {response.status_code}")
            break

    return full_dataset

## Fetching the first dataset (openhermes)

In [4]:
url_openhermes = 'https://datasets-server.huggingface.co/rows?dataset=teknium%2Fopenhermes&config=default&split=train&'

df_openhermes = pd.DataFrame()

if df_openhermes.empty:
  df_openhermes = fetch_rows(url_openhermes, 200)

df_openhermes

Unnamed: 0,output,input,instruction
0,```perl\n#!/usr/bin/perl\n\nuse strict;\nuse w...,,Write a Perl script that processes a log file ...
1,The letter 'M'.,,"What can be seen once in a minute, twice in a ..."
2,1. Thomas Edison: One of his most significant ...,,Famous inventors and their inventions: Identif...
3,1. Quail\n2. Quarry\n3. Quasar\n4. Quench\n5. ...,,Generate a list of 12 words that start with 'qu'.
4,Marie Curie; Physics,,"Who was the first woman to win a Nobel Prize, ..."
...,...,...,...
195,"First, let's calculate the number of hours eac...",,A software development company is working on a...
196,"Let G be a group of order p^2, where p is prim...",,"Prove that if G is a group of order p^2, where..."
197,"To handle errors in Haskell, we can use the `E...",,Develop a Haskell function that takes a list o...
198,Here's a Haskell function that achieves the de...,,Develop a Haskell function that takes a list o...


In [5]:
df_openhermes = df_openhermes.drop('input', axis=1)
df_openhermes

Unnamed: 0,output,instruction
0,```perl\n#!/usr/bin/perl\n\nuse strict;\nuse w...,Write a Perl script that processes a log file ...
1,The letter 'M'.,"What can be seen once in a minute, twice in a ..."
2,1. Thomas Edison: One of his most significant ...,Famous inventors and their inventions: Identif...
3,1. Quail\n2. Quarry\n3. Quasar\n4. Quench\n5. ...,Generate a list of 12 words that start with 'qu'.
4,Marie Curie; Physics,"Who was the first woman to win a Nobel Prize, ..."
...,...,...
195,"First, let's calculate the number of hours eac...",A software development company is working on a...
196,"Let G be a group of order p^2, where p is prim...","Prove that if G is a group of order p^2, where..."
197,"To handle errors in Haskell, we can use the `E...",Develop a Haskell function that takes a list o...
198,Here's a Haskell function that achieves the de...,Develop a Haskell function that takes a list o...


## Fetching the second dataset (slimOrca)

In [6]:
url_slimOrca = 'https://datasets-server.huggingface.co/rows?dataset=Open-Orca%2FSlimOrca&config=default&split=train&'
df_slimOrca = pd.DataFrame()

if df_slimOrca.empty:
  df_slimOrca = fetch_rows(url_slimOrca, 200)

df_slimOrca

Unnamed: 0,conversations
0,"[{'from': 'system', 'value': 'You are an AI as..."
1,"[{'from': 'system', 'value': 'You are an AI as..."
2,"[{'from': 'system', 'value': 'You are an AI as..."
3,"[{'from': 'system', 'value': 'You are a helpfu..."
4,"[{'from': 'system', 'value': 'You are an AI as..."
...,...
195,"[{'from': 'system', 'value': 'You are an AI as..."
196,"[{'from': 'system', 'value': 'You are a helpfu..."
197,"[{'from': 'system', 'value': 'You are an AI as..."
198,"[{'from': 'system', 'value': 'You are an AI as..."


In [7]:
from typing import List, Dict, Any

def df_slimOrca_preproc(df: pd.DataFrame) -> pd.DataFrame:
    """
    Preprocess a DataFrame containing 'conversations' data in a specific format.

    Parameters
    ----------
    df : pd.DataFrame
        Input DataFrame containing 'conversations' data.

    Returns
    -------
    pd.DataFrame
        A DataFrame with extracted 'question' and 'answer' values from the 'conversations' data.

    Notes
    -----
    This function preprocesses a DataFrame assumed to have a column 'conversations' containing a list of
    dictionaries. It extracts 'question' and 'answer' values from these dictionaries and creates a new
    DataFrame based on the extracted values.
    """
    extracted_values = []

    for index, row in df.iterrows():
        conversations = row['conversations']
        row_values = []

        for conv in conversations:
            if 'value' in conv:
                row_values.append(conv['value'])

        if len(row_values) >= 3:
            extracted_values.append({'question': row_values[1], 'answer': row_values[2]})
        else:
            extracted_values.append({'question': None, 'answer': None})

    return pd.DataFrame(extracted_values)


In [8]:
df_slimOrca_clean = df_slimOrca_preproc(df_slimOrca)
df_slimOrca_clean

Unnamed: 0,question,answer
0,"Write an article based on this ""A man has been...",Title: Tragedy Strikes in Sydney: Victims Stab...
1,Answer the following question: - number is 54 ...,The information provided seems to refer to Ria...
2,Produce a long descriptive sentence that uses ...,"Stretching across a vast areaOfLand, totaling ..."
3,Write a title for this article:\n\nArbitration...,"""The Sneaky Clauses Taking Away Your Day in Co..."
4,"Definition: In this task, you are given a hate...",geopolitical\n\nStep 1: Understand the text\nI...
...,...,...
195,"Q:Found the following article online, use it t...",The full name of the location where Richard II...
196,Please answer a question about the following a...,"In August of 1961, the pro-independence FLN ki..."
197,Context: Jesse entered Sasha's room without as...,"According to the context provided, Sasha will ..."
198,Please answer the following question: Title: C...,This review depicts the product in an unflatte...


In [9]:
df_openhermes = df_openhermes.iloc[:, ::-1]
df_openhermes = df_openhermes.rename(columns={'instruction': 'question', 'output': 'answer'})
df_openhermes

Unnamed: 0,question,answer
0,Write a Perl script that processes a log file ...,```perl\n#!/usr/bin/perl\n\nuse strict;\nuse w...
1,"What can be seen once in a minute, twice in a ...",The letter 'M'.
2,Famous inventors and their inventions: Identif...,1. Thomas Edison: One of his most significant ...
3,Generate a list of 12 words that start with 'qu'.,1. Quail\n2. Quarry\n3. Quasar\n4. Quench\n5. ...
4,"Who was the first woman to win a Nobel Prize, ...",Marie Curie; Physics
...,...,...
195,A software development company is working on a...,"First, let's calculate the number of hours eac..."
196,"Prove that if G is a group of order p^2, where...","Let G be a group of order p^2, where p is prim..."
197,Develop a Haskell function that takes a list o...,"To handle errors in Haskell, we can use the `E..."
198,Develop a Haskell function that takes a list o...,Here's a Haskell function that achieves the de...


## Merging the two cleaned datasets

In [10]:
df = pd.concat([df_openhermes, df_slimOrca_clean])
df

Unnamed: 0,question,answer
0,Write a Perl script that processes a log file ...,```perl\n#!/usr/bin/perl\n\nuse strict;\nuse w...
1,"What can be seen once in a minute, twice in a ...",The letter 'M'.
2,Famous inventors and their inventions: Identif...,1. Thomas Edison: One of his most significant ...
3,Generate a list of 12 words that start with 'qu'.,1. Quail\n2. Quarry\n3. Quasar\n4. Quench\n5. ...
4,"Who was the first woman to win a Nobel Prize, ...",Marie Curie; Physics
...,...,...
195,"Q:Found the following article online, use it t...",The full name of the location where Richard II...
196,Please answer a question about the following a...,"In August of 1961, the pro-independence FLN ki..."
197,Context: Jesse entered Sasha's room without as...,"According to the context provided, Sasha will ..."
198,Please answer the following question: Title: C...,This review depicts the product in an unflatte...


In [11]:
df = df.assign(language="en", accuracy=-1, acc_explanation="")
df

Unnamed: 0,question,answer,language,accuracy,acc_explanation
0,Write a Perl script that processes a log file ...,```perl\n#!/usr/bin/perl\n\nuse strict;\nuse w...,en,-1,
1,"What can be seen once in a minute, twice in a ...",The letter 'M'.,en,-1,
2,Famous inventors and their inventions: Identif...,1. Thomas Edison: One of his most significant ...,en,-1,
3,Generate a list of 12 words that start with 'qu'.,1. Quail\n2. Quarry\n3. Quasar\n4. Quench\n5. ...,en,-1,
4,"Who was the first woman to win a Nobel Prize, ...",Marie Curie; Physics,en,-1,
...,...,...,...,...,...
195,"Q:Found the following article online, use it t...",The full name of the location where Richard II...,en,-1,
196,Please answer a question about the following a...,"In August of 1961, the pro-independence FLN ki...",en,-1,
197,Context: Jesse entered Sasha's room without as...,"According to the context provided, Sasha will ...",en,-1,
198,Please answer the following question: Title: C...,This review depicts the product in an unflatte...,en,-1,


In [12]:
# df.to_pickle('merged_dataset.pkl')

## Splitting the dataset

In [13]:
import numpy as np

# Split the DataFrame into two parts with 50% of the shuffled rows
df_split = np.array_split(df.sample(frac=1, axis=0, random_state=42).sample(frac=1).reset_index(drop=True), 2)
df_split[0]

  return bound(*args, **kwds)


Unnamed: 0,question,answer,language,accuracy,acc_explanation
0,"An anagram of ""partie"", this word refers to a ...",pirate,en,-1,
1,"An anagram of ""lime"", this word is a unit of d...",mile,en,-1,
2,Remove the spaces from the following sentence:...,Itpreventsuserstosuspectthattherearesomehidden...,en,-1,
3,"Together with other industrial sectors, ARBURG...",Step 1: Identify the text to be transformed to...,en,-1,
4,"Given the task definition and input, reply wit...",This task requires you to analyze a given revi...,en,-1,
...,...,...,...,...,...
195,Develop a concept for a new board game that pr...,Game Title: Mind Maze\n\nConcept:\nMind Maze i...,en,-1,
196,Develop a list of 7 words terminating in 'ship',1. Friendship\n2. Relationship\n3. Leadership\...,en,-1,
197,,,en,-1,
198,A city is planning to upgrade its public trans...,1. Calculate the current total passenger capac...,en,-1,


### Translating the Split

In [14]:
!pip install deep-translator



In [15]:
import time
from deep_translator import GoogleTranslator
from datetime import date

languages = ['es', 'fr', 'it']
df_translated = df_split[1][:11]

# Calculate the number of rows that represent 10% of the total
ten_percent_rows = len(df_translated) // 2

# Initialize a counter for the number of processed rows
processed_rows = 0

# Iterate over each row of the dataframe
for index, row in df_translated.iterrows():
  # skip the row if there is a none value
  if row["question"] is None or row["answer"] is None:
    print("[INFO] Skipping row {} beacuse it has a None value".format(index))
    continue

  if row["language"] != "en":
    print("[INFO] Skipping row {} beacuse it is not in english".format(index))
    continue

  # choose the language by % 3
  language = languages[index % 3]
  
  # check if the answer contains a code block
  if row["answer"] and row["question"] and "```" not in row["answer"]:
    row["question"] = GoogleTranslator(source='en', target=language).translate(row["question"])
    row["answer"] = GoogleTranslator(source='en', target=language).translate(row["answer"])
    
      # print the question and answer
    print("[LOG] Index: {}".format(index))
    print(" - Question: {}".format(row["question"][:25]))
    print(" - Answer: {}".format(row["answer"][:25]))
    print(" From {} -> {}".format(row["language"], language))
  else:
    print("[INFO] Skipping row {} beacuse it does contain a code block".format(index))

  row["language"] = language
  
  # Increment the counter for the number of processed rows
  processed_rows += 1

  # If the number of processed rows is a multiple of ten_percent_rows, save the DataFrame
  if processed_rows % ten_percent_rows == 0:
    # Get today's date in YYYYMMDD format
    today = date.today().strftime("%m%d")
    df_translated.to_pickle(f"{today}_{processed_rows}_translated.pkl")
    print("[INFO] Saved DataFrame to {}.pkl".format(today))

  time.sleep(3)

[LOG] Index: 200
 - Question: Individua tre esempi di f
 - Answer: 1. Batteri: i batteri son
 From en -> it
[LOG] Index: 201
 - Question: Describir el proceso de e
 - Answer: La escultura en arcilla e
 From en -> es
[LOG] Index: 202
 - Question: Créez un dialogue entre d
 - Answer: Personne A : Hé, j'ai vra
 From en -> fr
[LOG] Index: 203
 - Question: L'Empire Icon Award è un 
 - Answer: SÌ.
 From en -> it
[LOG] Index: 204
 - Question: INICIO ENTRADA
COMENZARCO
 - Answer: Algunas ventajas de usar 
 From en -> es
[INFO] Saved DataFrame to 0121.pkl
[LOG] Index: 205
 - Question: Choisissez votre réponse 
 - Answer: 4). neutrons.
 From en -> fr
[LOG] Index: 206
 - Question: Utilizza le informazioni 
 - Answer: Se Jimbo aumenta la lungh
 From en -> it
[INFO] Skipping row 207 beacuse it does contain a code block
[LOG] Index: 208
 - Question: Nauset Regional High Scho
 - Answer: Non, nous ne pouvons pas 
 From en -> fr
[LOG] Index: 209
 - Question: il gestore normativo ti o
 - Answer: Il Regu

## Paper implementation

### Evaluator LLM Setup

In [16]:
import os
os.environ['API_KEY'] = 'AIzaSyAqQRlPN9fXqmAKoZH--hSe72pL8irdJ6o'

In [32]:
import os
import requests
from typing import Dict, Any
import random

proxies_list = [
    ('162.243.184.16', '10002'),
    ('195.154.184.80', '8080'),
    ('135.181.221.83', '3128'),
    ('20.205.61.143', '8123'),
    ('20.206.106.192', '80'),
    ('20.24.43.214', '80')
]

# Randomly select a proxy
proxy_ip, proxy_port = random.choice(proxies_list)

proxies = {
  'http': f'socks5://{proxy_ip}:{proxy_port}',
  'https': f'socks5://{proxy_ip}:{proxy_port}',
}

def generate_content(prompt: str) -> Dict[str, Any]:
    """
    Generates content using the generative language API.

    Parameters
    ----------
    prompt (str): The text to use as a prompt for the content generation.

    Returns
    -------
    response_json (Dict[str, Any]): The JSON response from the API containing the generated content.
    """
    url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key={}".format(os.environ.get("API_KEY"))

    headers = {
        "Content-Type": "application/json"
    }
    data = {
        "contents": [{
            "parts": [{
                "text": prompt
            }]
        }],
        "generationConfig": {
          "temperature": 0
        }
    }

    response = requests.post(url, headers=headers, json=data, proxies=proxies)
    response_json = response.json()
    return response_json


In [18]:
import json
from typing import List, Dict, Any

def get_text_from_response(response_dict: dict) -> List[str]:
    """
    Extracts the generated text from the JSON response of the generative language API.

    Parameters
    ----------
    response_dict (dict): The JSON response as a dict.

    Returns
    -------
    texts (List[str]): A list of generated texts from the response.
    """
    candidates = response_dict.get("candidates", [])

    # Initialize an empty list to store the texts
    texts = []

    for candidate in candidates:

        content = candidate.get("content", {})

        parts = content.get("parts", [])
        # Get the first part from the list (assuming there is only one part)
        part = parts[0] if parts else {}

        text = part.get("text", "")
        texts.append(text)

    # Return the list of texts
    return texts

### Evaluator Prompt Setup

In [19]:
# Import re library
import re

# Import typing library
from typing import Dict

# Define a function to extract score and description
def extract_score_and_description(string: str) -> Dict[int, str]:
  """
  Extracts the score and description from a given string using regex.

  Parameters
  ----------
  string (str): The string to be processed.

  Returns
  -------
  A dictionary with keys 'score' and 'description' and their corresponding values.
  """
  # Split the string by newline characters
  lines = string.split("\n")

  # Use regex to match the score and description patterns
  score_pattern = r"(\d+)"

  # Find the score and description in the string
  score = re.search(score_pattern, string)
  description = "\n".join(lines[1:])

  # Return the score and description as a dictionary
  return {'score': int(score.group()), 'description': description}


In [20]:
def parse_score_and_description(string: str) -> Dict[str, Any]:
    """
    Parses a string that contains a score and a description and returns a dict with the score as an int and the description as a str.

    Parameters
    ----------
    string (str): The string to parse.

    Returns
    -------
    result (Dict[str, Any]): A dict with two keys: "score" and "description". The value of "score" is an int that represents the score on the first line of the string. The value of "description" is a str that contains the remaining lines of the string.
    """
    # Split the string by newline characters
    lines = string.split("\n")

    # Initialize an empty dict to store the result
    result = {}

    # Get the first line of the string and extract the score as an int
    first_line = lines[0]
    score = int(first_line.split(":")[1].strip())
    result["score"] = score

    # Get the remaining lines of the string and join them as a description
    description = "\n".join(lines[1:])
    result["description"] = description

    # Return the result dict
    return result

In [21]:
pd.concat([df_split[0][:10], df_translated])

Unnamed: 0,question,answer,language,accuracy,acc_explanation
0,"An anagram of ""partie"", this word refers to a ...",pirate,en,-1,
1,"An anagram of ""lime"", this word is a unit of d...",mile,en,-1,
2,Remove the spaces from the following sentence:...,Itpreventsuserstosuspectthattherearesomehidden...,en,-1,
3,"Together with other industrial sectors, ARBURG...",Step 1: Identify the text to be transformed to...,en,-1,
4,"Given the task definition and input, reply wit...",This task requires you to analyze a given revi...,en,-1,
5,An online retailer wants to optimize its wareh...,1. Analyze current warehouse layout and operat...,en,-1,
6,Detailed Instructions: In this task you are gi...,Step 1: Identify the main verb in the sentence...,en,-1,
7,BEGININPUT\nBEGINCONTEXT\n\nENDCONTEXT\nIn a r...,The main purpose of the Zaltran-4 mission was ...,en,-1,
8,Compose a love letter between two characters f...,"My Dearest Elizabeth,\n\nAs I sit beneath the ...",en,-1,
9,,,en,-1,


In [22]:
%pip install requests[socks]

Note: you may need to restart the kernel to use updated packages.


In [33]:
import time

dimension = ['accuracy']
input = ''

# TODO Remove sta merda
df_to_evaluate = pd.concat([df_split[0][:10], df_translated])

# Iterate over each row of the dataframe
for index, row in df_to_evaluate.iterrows():
  if row["accuracy"] != -1:
    print("Skipping row {}".format(index))
    continue

  # Assign the values of the columns to the variables
  instruction = row["question"]
  response = row["answer"]

  eval_prompt = f"""System Prompt:
    We would like to request your feedback on the performance of AI assistant in response to the instruction
    and the given input displayed following.
    Instruction: {instruction}
    Input: {input}
    Response: {response}
    User Prompt:
    Please rate according to the {dimension[0]} of the response to the instruction and the input. Each assistant
    receives a score on a scale of 0 to 5, where a higher score indicates higher level of the {dimension[0]}. Please
    first output a single line containing the value indicating the scores. In the subsequent line, please provide a
    comprehensive explanation of your evaluation, avoiding any potential bias."""

  response_json = generate_content(eval_prompt)
  
  print(response_json)
  
  texts = get_text_from_response(response_json)

  print(texts)

  result = extract_score_and_description(texts[0])

  # Assign the score and description to the dataframe
  df_to_evaluate.at[index, 'accuracy'] = result['score']
  df_to_evaluate.at[index, 'acc_explanation'] = result['description']

  time.sleep(3)

ConnectionError: SOCKSHTTPSConnectionPool(host='generativelanguage.googleapis.com', port=443): Max retries exceeded with url: /v1beta/models/gemini-pro:generateContent?key=AIzaSyAqQRlPN9fXqmAKoZH--hSe72pL8irdJ6o (Caused by NewConnectionError('<urllib3.contrib.socks.SOCKSHTTPSConnection object at 0x7fd692b7d5d0>: Failed to establish a new connection: Connection closed unexpectedly'))

In [None]:
test_df

Unnamed: 0,question,answer,accuracy,acc_explanation
0,Write a Perl script that processes a log file ...,```perl\n#!/usr/bin/perl\n\nuse strict;\nuse w...,5,\nExplanation:\nThe response accurately addres...
1,"What can be seen once in a minute, twice in a ...",The letter 'M'.,5,"\nExplanation:\nThe response ""The letter 'M'"" ..."
2,Famous inventors and their inventions: Identif...,1. Thomas Edison: One of his most significant ...,5,\nExplanation:\nThe response accurately addres...
3,Generate a list of 12 words that start with 'qu'.,1. Quail\n2. Quarry\n3. Quasar\n4. Quench\n5. ...,5,\nExplanation: The AI assistant accurately gen...
4,"Who was the first woman to win a Nobel Prize, ...",Marie Curie; Physics,5,\nExplanation:\nThe response is highly accurat...
5,A hotel chain wants to optimize its pricing st...,To develop a dynamic pricing model for the hot...,5,\nExplanation:\nThe response accurately addres...
6,"BEGININPUT\nBEGINCONTEXT\ndate: September 15, ...",Two significant compositions by Elvin Decker a...,4,\nThe response accurately addresses both parts...
7,Design a marketing campaign for a fictional lu...,"Campaign Name: ""Captivate Your Senses""\n\nScen...",4,\nExplanation:\n\nThe response to the instruct...
8,"Timmy starts with 12 stickers, gives away 5, a...","This problem does not mention any balloons, so...",5,\nExplanation:\nThe AI assistant accurately re...
9,A zoo wants to expand its facilities by adding...,Step 1: Calculate the total cost of expansion ...,5,\nExplanation:\nThe AI assistant provided a co...
