In [1]:
import os
import json
import csv
import statistics
import re
from pathlib import Path

In [3]:
def extract_numbers_with_min_digits(data, min_digits=1):
    """
    Extracts all numbers with a minimum number of digits from a nested JSON structure,
    capturing specific patterns including numbers with commas, decimals,
    and certain currency symbols (€). It ignores patterns followed by any currency symbol except €.
    """
    numbers = []
    excluded_numbers = ['50', '50.0', '50.00', '50.000']  # Numbers to be excluded

    if isinstance(data, dict):
        for value in data.values():
            numbers.extend(extract_numbers_with_min_digits(value, min_digits))

    elif isinstance(data, list):
        for item in data:
            numbers.extend(extract_numbers_with_min_digits(item, min_digits))

    elif isinstance(data, str):
        # Regex pattern to capture numbers with commas, decimals, and € currency symbol
        # Ignores patterns followed by any currency symbol except €
        pattern = r'(?<!\S)(?:€)?(\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d+(?:\.\d+)?)\b'
        found_numbers = re.findall(pattern, data)
        for num in found_numbers:
            # Cleaning the number format
            num_clean = num.replace(',', '').replace('€', '')
            # Check if the number has at least min_digits on the left side of the decimal
            if '.' in num_clean:
                left_digits = num_clean.split('.')[0]
                if len(left_digits) >= min_digits and num_clean not in excluded_numbers:
                    numbers.append(float(num_clean))
            else:
                if len(num_clean) >= min_digits and num_clean not in excluded_numbers:
                    numbers.append(float(num_clean))

    return numbers



def process_json_file(json_file_path, folder_name):
    """
    Process a single JSON file and return the extracted data for each person.
    Includes both the original and modified list of extracted numbers for each person.
    """
    with open(json_file_path, 'r') as json_file:
        conversation = json.load(json_file)
        persons_data = {}

        # Extract numbers for each person and maintain original and modified lists
        for message in conversation:
            name = message['name']
            content = message['content']
            if name not in persons_data:
                persons_data[name] = {'original': [], 'modified': []}

            content_numbers = extract_numbers_with_min_digits(content)
            persons_data[name]['original'].extend(content_numbers)

            # Modify the list by removing the first number if more than one
            modified_numbers = content_numbers[1:] if len(content_numbers) > 1 else content_numbers
            persons_data[name]['modified'].extend(modified_numbers)

        # Compile data for CSV
        csv_data = []
        for name, numbers in persons_data.items():
            original_numbers = numbers['original']
            modified_numbers = numbers['modified']

            min_price = min(modified_numbers) if modified_numbers else 0
            max_price = max(modified_numbers) if modified_numbers else 0
            avg_price = statistics.mean(modified_numbers) if modified_numbers else 0
            last_price = modified_numbers[-1] if modified_numbers else 0
            csv_data.append([50, name, min_price, max_price, avg_price, last_price, folder_name, original_numbers, modified_numbers])

        return csv_data

def write_to_csv(csv_file_path, csv_data):
    """
    Write data to a CSV file.
    """
    with open(csv_file_path, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Apartment Size', 'Name', 'Min Price', 'Max Price', 'Avg Price', 'Last Price', 'Folder Name', 'Original Price List', 'Modified Price List'])
        for row in csv_data:
            # Convert the lists of prices to string representations for CSV writing
            row[-2] = ', '.join(map(str, row[-2]))  # Original Price List
            row[-1] = ', '.join(map(str, row[-1]))  # Modified Price List
            writer.writerow(row)

# Update the process_folder function to handle the folder structure of the new cross-agent experiments
def process_folder(base_path, output_directory):
    """
    Process each folder in 'single-factor-experiments' and create a single CSV file containing data
    from all JSON files within the subfolders of each folder.
    """
    all_csv_data = []

    # Extract the parent folder name from the base path
    parent_folder_name = os.path.basename(os.path.normpath(base_path))

    for json_folder in os.listdir(base_path):
        json_folder_path = os.path.join(base_path, json_folder)

        if os.path.isdir(json_folder_path):
            for file in os.listdir(json_folder_path):
                if file.endswith('.json'):
                    json_file_path = os.path.join(json_folder_path, file)
                    csv_data = process_json_file(json_file_path, json_folder)
                    all_csv_data.extend(csv_data)

    # Write all the data to a single CSV file with the parent folder name as the filename
    csv_file_name = f"{parent_folder_name}.csv"
    csv_file_path = os.path.join(output_directory, csv_file_name)
    write_to_csv(csv_file_path, all_csv_data)




In [4]:
# Get the directory of the notebook file
current_directory = os.getcwd()

# Set the base path and output directory accordingly
base_path = os.path.abspath(os.path.join(current_directory, '..', '..', '..', 'hubsim', 'single-factor-experiments', 'bagel-dpo-34b-v0.2-bagel-dpo-34b-v0.2'))
output_directory = os.path.abspath(os.path.join(current_directory, '..', '..', 'parsing', 'output-22.02.2024', 'Bagel-Bagel'))

# Create the output directory if it doesn't exist
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Get a list of all subfolders in the "bagel-dpo-34b-v0.2-bagel-dpo-34b-v0.2" directory
subfolders = [f.path for f in os.scandir(base_path) if f.is_dir()]

# Process each folder
for folder in subfolders:
    process_folder(folder, output_directory)


In [5]:
# Set the base path and output directory accordingly
base_path = os.path.abspath(os.path.join(current_directory, '..', '..', '..', 'hubsim', 'single-factor-experiments', 'bagel-dpo-34b-v0.2-Yi-34B-Chat'))
output_directory = os.path.abspath(os.path.join(current_directory, '..', '..', 'parsing', 'output-22.02.2024', 'Bagel-Yi'))

# Create the output directory if it doesn't exist
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Get a list of all subfolders in the "bagel-dpo-34b-v0.2-Yi-34B-Chat" directory
subfolders = [f.path for f in os.scandir(base_path) if f.is_dir()]

# Process each folder
for folder in subfolders:
    process_folder(folder, output_directory)


In [6]:
# Set the base path and output directory accordingly
base_path = os.path.abspath(os.path.join(current_directory, '..', '..', '..', 'hubsim', 'single-factor-experiments', 'Yi-34B-Chat-Yi-34B-Chat'))
output_directory = os.path.abspath(os.path.join(current_directory, '..', '..', 'parsing', 'output-22.02.2024', 'Yi-Yi'))

# Create the output directory if it doesn't exist
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Get a list of all subfolders in the "Yi-34B-Chat-Yi-34B-Chat" directory
subfolders = [f.path for f in os.scandir(base_path) if f.is_dir()]

# Process each folder
for folder in subfolders:
    process_folder(folder, output_directory)


In [6]:
#file_path = r'f:\HubSim-22.02.2024\hubsim\single-factor-experiments\bagel-dpo-34b-v0.2-bagel-dpo-34b-v0.2\landlord-bagel-dpo-34b-v0.2-Emilia Müller from Germany-bagel-dpo-34b-v0.2-Duisburg-20240216\22963780-cd3b-453f-afb4-b8d6abdd09ea\conversation.json'
file_path = r'F:\MasterTeamProject\Hubsim-21.02.2024\hubsim\single-factor-experiments\bagel-dpo-34b-v0.2-bagel-dpo-34b-v0.2\landlord-bagel-dpo-34b-v0.2-Emilia Müller from Germany-bagel-dpo-34b-v0.2-Duisburg-20240216\22963780-cd3b-453f-afb4-b8d6abdd09ea\conversation.json'
try:
    with open(file_path, 'r') as file:
        content = file.read()
        print(content)
except FileNotFoundError:
    print("File not found.")
except PermissionError:
    print("Permission denied.")
except Exception as e:
    print("An error occurred:", e)

[{"content": "Hello Mister Schmidt, my name is Emilia M\u00fcller from Germany. Thanks for inviting me to see the apartment in Duisburg. Let's talk about the rental price.", "role": "user", "name": "Emilia M\u00fcller from Germany"}, {"content": "Hi Emilia, nice to meet you. Let's talk about the rental price of the apartment. What is the highest rental price you are willing to pay for this apartment?", "role": "user", "name": "Peter Schmidt from Germany"}, {"content": "Let's start with 600 Euros per month.", "role": "user", "name": "Emilia M\u00fcller from Germany"}, {"content": "That's too low for me, Emilia. I'm looking for a higher price. How about 800 Euros per month?", "role": "user", "name": "Peter Schmidt from Germany"}, {"content": " That's too high for me. Let's meet in the middle. How about 700 Euros per month?", "role": "user", "name": "Emilia M\u00fcller from Germany"}, {"content": "Sorry, Emilia. 700 Euros is still too low for me. Let's try something higher. How about 750 