In [60]:
import os

import pandas as pd
import json
from dotenv import load_dotenv
import time
import argparse
from openai import OpenAI

import anthropic
from anthropic.types.message_create_params import MessageCreateParamsNonStreaming
from anthropic.types.messages.batch_create_params import Request
import importlib
import dom_chunker

importlib.reload(dom_chunker)

from dom_chunker import HTMLChunker

from utils import (
    LLMS,
    PROMPTS,
    TASKS,
    batch_format_prompt,
    format_prompt,
    batch_parse_json,
    parse_html,
    completion,
    setup_logger,
    formatted_audits,
    estimate_tokens,
    is_past_5pm_edt
)

logger = setup_logger(__name__)

In [6]:
def is_valid_audit(audit):
  if((audit['scoreDisplayMode'] == 'notApplicable') or
    (audit['scoreDisplayMode'] == 'binary' and audit['score'] == 1) or
    (audit['scoreDisplayMode'] == 'informative') or
    (audit['scoreDisplayMode'] == 'manual') or
    (audit['scoreDisplayMode'] == 'error') or
    (audit['scoreDisplayMode'] == 'metricSavings' and audit['score'] == 1) or
    (audit['scoreDisplayMode'] == 'numeric' and audit['score'] == 1)):
    return False

  return True

def get_audits(dom_name: str, with_location = False):
    """
    Get the audits for a specific domain.

    Args:
        dom_name (str): The name of the domain.
        with_location (bool): Whether to include location in the audits.

    Returns:
        list: A list of audit data.
    """
    dom_path = os.path.join("./../dataset/lh-original-reports", f"{dom_name}.json")

    with open(dom_path, 'r') as file:
        audits = json.load(file)

    audits = [audit for key, audit in audits['audits'].items() if is_valid_audit(audit)]

    print(type(audits))

    return audits

def get_chunks_audits(dom_name: str, prompt_name: str = "eval-html"):
    audits = get_audits(dom_name=dom_name)

    audit_text = formatted_audits(audits)

    # logger.info("Loaded %s audits for inference on task.", len(audits))

    dom_path = os.path.join("./../dataset/chunks", f"{dom_name}.json") 

    chunks = []
    with open(dom_path, 'r') as file:
        chunks = json.load(file)

    chunks_df = pd.DataFrame(chunks)
    # chunks_df = chunks_df[~chunks_df['id'].isin(["script_store", "style_store"])]

    for ix, chunk in chunks_df.iterrows():
        chunks_df.loc[ix, 'no_of_issues'] = len(audits)
        chunks_df.loc[ix, 'audit_issues'] = audit_text
        chunks_df.loc[ix, 'start_time'] = time.time()
        chunk_dict = chunks_df.loc[ix].to_dict()
        messages = format_prompt(PROMPTS[prompt_name], chunk_dict)
        chunks_df.loc[ix, 'prompt'] = json.dumps(messages)
        chunks_df.loc[ix, 'message_tokens'] = estimate_tokens(messages)

    return chunks_df, audits, audit_text

In [66]:
MODEL = "qwen2.5-32b-instruct"

path_to_model_evaluations = os.path.join("./../results/evaluations", f"{MODEL}")

html_pages = [
    "airbnb",
    "ebay", 
    "github", 
    "medium", 
    "netflix", 
    "pinterest", 
    "quora", 
    "reddit", 
    "twitch", 
    "walmart", 
    "youtube",
    "facebook", 
    "twitter", 
    "linkedin", 
    "aliexpress"
]

for html_page in html_pages:
    try:
        path_to_original_html_page = os.path.join("./../dataset/original", f"{html_page}.html")
        if not os.path.exists(path_to_original_html_page):
            print(f"Path to original HTML page does not exist: {path_to_original_html_page}")
            continue

        with open(path_to_original_html_page, 'r') as file:
            original_html = file.read()

        path_to_modified_chunks = os.path.join(path_to_model_evaluations, f"{html_page}.json")
        path_to_modified_chunks_jsonl = os.path.join(path_to_model_evaluations, f"{html_page}.jsonl")
        if os.path.exists(path_to_modified_chunks):
            with open(path_to_modified_chunks, 'r') as file:
                chunks = json.load(file)
        elif os.path.exists(path_to_modified_chunks_jsonl):
            chunks = []
            with open(path_to_modified_chunks_jsonl, 'r') as file:
                for line in file:
                    chunks.append(json.loads(line))
        else:
            print(f"Path to chunks does not exist: {path_to_modified_chunks}")
            continue

        modified_chunks_df = pd.DataFrame(chunks)
        original_chunks_df = get_chunks_audits(dom_name=html_page)[0]
        logger.info(f"Loaded {len(modified_chunks_df)} modified chunks and {len(original_chunks_df)} original chunks for {html_page}.")

      
        # create a new chunk in modified_chunks_df with the original style_store and script_store
        modified_chunks_df = modified_chunks_df[~modified_chunks_df['id'].isin(["script_store", "style_store"])]
        modified_chunks_df = pd.concat([modified_chunks_df, original_chunks_df[original_chunks_df['id'].isin(["script_store", "style_store"])]])
        modified_chunks_df = modified_chunks_df.reset_index(drop=True)

        # get list of modified chunks
        chunker = HTMLChunker(
            html_content=original_html,
        )
        reassembled_page = chunker.reassemble_html(
            chunks=modified_chunks_df.to_dict(orient='records'),
            content_field_name="completion",
        )
        clean_reassembled_page = chunker.reassemble_html(
            chunks=modified_chunks_df.to_dict(orient='records'),
            content_field_name="completion",
            cleanup=True,
        )

        path_to_reassembled_page = os.path.join(f"./../results/reassembled/{MODEL}")
        if not os.path.exists(path_to_reassembled_page):
            os.makedirs(path_to_reassembled_page)
        path_to_clean_reassembled_page = os.path.join(f"./../results/reassembled-clean/{MODEL}")
        if not os.path.exists(path_to_clean_reassembled_page):
            os.makedirs(path_to_clean_reassembled_page)

        with open(f"{path_to_reassembled_page}/{html_page}.html", 'w') as file:
            file.write(reassembled_page)
        with open(f"{path_to_clean_reassembled_page}/{html_page}.html", 'w') as file:
            file.write(clean_reassembled_page)
        logger.info(f"Reassembled page saved to {path_to_reassembled_page}/{html_page}.html")
    except Exception as e:
        logger.error(f"Error loading modified chunks: {e}")
        continue

<class 'list'>


__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 o

<class 'list'>


__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.

<class 'list'>


__main__ - INFO - Reassembled page saved to ./../results/reassembled/qwen2.5-32b-instruct/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/qwen2.5-32b-instruct/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/qwen2.5-32b-instruct/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/qwen2.5-32b-instruct/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/qwen2.5-32b-instruct/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/qwen2.5-32b-instruct/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/qwen2.5-32b-instruct/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/qwen2.5-32b-instruct/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/qwen2.5-32b-instruct/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/qwen2.5-32b-i

<class 'list'>
<class 'list'>


__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks

<class 'list'>


__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for quora.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for quora.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for quora.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for quora.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for quora.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for quora.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for quora.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for quora.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for quora.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for quora.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for quora.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for quora.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for quora.
__main__ - I

<class 'list'>
<class 'list'>


__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 o

<class 'list'>
<class 'list'>


__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks

<class 'list'>
<class 'list'>


__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for facebook.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for facebook.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for facebook.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for facebook.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for facebook.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for facebook.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for facebook.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for facebook.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for facebook.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for facebook.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for facebook.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for facebook.
__main__ - INFO - Loaded 4 modified chunks and 4 ori

<class 'list'>
<class 'list'>
<class 'list'>


__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 m

In [85]:
MODEL = 'gpt-4.1'
path_to_model_evaluations = os.path.join("./../results/batches", f"{MODEL}/output")

for html_page in html_pages:
    try:
        path_to_original_html_page = os.path.join("./../dataset/original", f"{html_page}.html")
        if not os.path.exists(path_to_original_html_page):
            print(f"Path to original HTML page does not exist: {path_to_original_html_page}")
            continue

        with open(path_to_original_html_page, 'r') as file:
            original_html = file.read()

        original_chunks_df = get_chunks_audits(dom_name=html_page)[0]

        path_to_modified_chunks = os.path.join(path_to_model_evaluations, f"{html_page}.jsonl")
        if not os.path.exists(path_to_modified_chunks):
            print(f"Path to chunks does not exist: {path_to_modified_chunks}")
            continue
        with open(path_to_modified_chunks, 'r') as file:
            lines = file.readlines()
        chunks = []
        for line in lines:
            line = line.strip()
            if not line:
                continue
            chunk = json.loads(line)
            # # 'custom_id': 'airbnb_2a44c327-e28e-4100-b0c6-dd39a527b1b3'
            # print({
            #     'id': chunk['custom_id'].split('_')[1],
            #     'completion': parse_html(chunk['response']['body']['choices'][0]['message']['content']),
            # })
            chunk_id = chunk['custom_id'].split('_')[1]
            completion = parse_html(chunk['response']['body']['choices'][0]['message']['content'])
            chunks.append({
                'id': chunk_id,
                'completion': completion,
            })
        # make copies of original chunks into modified_chunks_df

        modified_chunks_df = original_chunks_df.copy()
        for ix, chunk in modified_chunks_df.iterrows():
            chunk_completion = next((c for c in chunks if c['id'] == chunk['id']), None)
            modified_chunks_df.loc[ix, 'completion'] = chunk_completion['completion'] if chunk_completion else ""
            

        logger.info(f"Loaded {len(modified_chunks_df)} modified chunks and {len(original_chunks_df)} original chunks for {html_page}.")

        # create a new chunk in modified_chunks_df with the original style_store and script_store
        modified_chunks_df = modified_chunks_df[~modified_chunks_df['id'].isin(["script_store", "style_store"])]
        modified_chunks_df = pd.concat([modified_chunks_df, original_chunks_df[original_chunks_df['id'].isin(["script_store", "style_store"])]])
        modified_chunks_df = modified_chunks_df.reset_index(drop=True)

        # get list of modified chunks
        chunker = HTMLChunker(
            html_content=original_html,
        )
        reassembled_page = chunker.reassemble_html(
            chunks=modified_chunks_df.to_dict(orient='records'),
            content_field_name="completion",
        )
        clean_reassembled_page = chunker.reassemble_html(
            chunks=modified_chunks_df.to_dict(orient='records'),
            content_field_name="completion",
            cleanup=True,
        )

        path_to_reassembled_page = os.path.join(f"./../results/reassembled/{MODEL}")
        if not os.path.exists(path_to_reassembled_page):
            os.makedirs(path_to_reassembled_page)
        path_to_clean_reassembled_page = os.path.join(f"./../results/reassembled-clean/{MODEL}")
        if not os.path.exists(path_to_clean_reassembled_page):
            os.makedirs(path_to_clean_reassembled_page)

        with open(f"{path_to_reassembled_page}/{html_page}.html", 'w') as file:
            file.write(reassembled_page)
        with open(f"{path_to_clean_reassembled_page}/{html_page}.html", 'w') as file:
            file.write(clean_reassembled_page)
        logger.info(f"Reassembled page saved to {path_to_reassembled_page}/{html_page}.html")
    except Exception as e:
        logger.error(f"Error loading modified chunks: {e}")
        # print tracce
        import traceback
        traceback.print_exc()
        continue

<class 'list'>


__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for airbnb.
__main__ - INFO - Loaded 10 modified chunks and 10 o

<class 'list'>


__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.
__main__ - INFO - Loaded 16 modified chunks and 16 original chunks for ebay.

<class 'list'>


__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/github.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/github.html
__main__ - INFO - Rea

<class 'list'>
<class 'list'>


__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks for netflix.
__main__ - INFO - Loaded 4 modified chunks and 4 original chunks

<class 'list'>


__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/pinterest.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/pinterest.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/pinterest.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/pinterest.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/pinterest.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/pinterest.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/pinterest.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/pinterest.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/pinterest.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/pinterest.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/pin

<class 'list'>
<class 'list'>


__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 original chunks for reddit.
__main__ - INFO - Loaded 10 modified chunks and 10 o

<class 'list'>
<class 'list'>


__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks for walmart.
__main__ - INFO - Loaded 5 modified chunks and 5 original chunks

<class 'list'>


__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/youtube.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/youtube.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/youtube.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/youtube.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/youtube.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/youtube.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/youtube.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/youtube.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/youtube.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/youtube.html
__main__ - INFO - Reassembled page saved to ./../results/reassembled/gpt-4.1/youtube.html
__main__ -

<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>


__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 modified chunks and 8 original chunks for aliexpress.
__main__ - INFO - Loaded 8 m