# **DATA EXTRACTION FROM ZIP + EXTENSION SORTING**

In [2]:
import shutil
import os

# Directory to delete
EXTRACT_DIR = 'test'

# Check if the directory exists and delete it
if os.path.isdir(EXTRACT_DIR):
    shutil.rmtree(EXTRACT_DIR)
    print(f"Directory '{EXTRACT_DIR}' has been deleted.")
else:
    print(f"Directory '{EXTRACT_DIR}' does not exist.")

Directory 'test' has been deleted.


## ZIP Extraction

In [3]:
import zipfile
import shutil
import os
import pandas as pd

CPU cores:  144


In [4]:
def extract_zip(zip_path):

    if not zipfile.is_zipfile(zip_path):
        raise ValueError(f"The file at {zip_path} is not a valid ZIP archive.")
    
    # Determine the output directory name from the zip file name
    base_dir = os.path.dirname(zip_path)
    zip_filename = os.path.basename(zip_path)
    folder_name = os.path.splitext(zip_filename)[0]
    extract_to = os.path.join(base_dir, folder_name)

    # Create the directory if it doesn't exist
    if not os.path.exists(extract_to):
        os.makedirs(extract_to)

    # Extract the ZIP file
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(path=extract_to)

    return extract_to


if __name__ == "__main__":
    zip_file_path = '0.zip' 
    extracted_dir = extract_zip(zip_file_path)
    print(f"Extracted to: {extracted_dir}")

Extracted to: 0


## Data Filtering By Extension

In [5]:
leak_directory = "0"

# Path to the __MACOSX folder
macosx_folder = os.path.join(leak_directory, "__MACOSX")

# Check if __MACOSX exists and remove it - creates issues when analyzing the data, and its not needed, made automatically by MacOS
if os.path.exists(macosx_folder) and os.path.isdir(macosx_folder):
    shutil.rmtree(macosx_folder)
    print(f"Deleted - {macosx_folder}")
else:
    print(f"Folder not found - {macosx_folder}")

# Organize files by extension into subfolders
for root, dirs, files in os.walk(leak_directory):
    for file in files:
        # Skip hidden files and __MACOSX if any reappear
        if file.startswith('.') or '__MACOSX' in root:
            continue

        # Get the file extension (in lowercase, without the dot)
        file_extension = os.path.splitext(file)[1].lower().lstrip('.')
        if not file_extension:
            file_extension = "no_extension"

        # Define the new subfolder path
        subfolder_path = os.path.join(leak_directory, file_extension)

        # Create the subfolder if it doesn't exist
        os.makedirs(subfolder_path, exist_ok=True)

        # Define source and destination paths
        source_path = os.path.join(root, file)
        destination_path = os.path.join(subfolder_path, file)

        # Move the file if source and destination are not the same
        if os.path.abspath(source_path) != os.path.abspath(destination_path):
            shutil.move(source_path, destination_path)

# Remove any empty folders within the parent directory
for dirpath, dirnames, filenames in os.walk(leak_directory, topdown=False):
    if not dirnames and not filenames:
        try:
            os.rmdir(dirpath)
            print(f"Removed empty folder: {dirpath}")
        except OSError:
            pass  

Deleted - 0/__MACOSX


In [6]:
# Delete Duplicate of 0 which is empty

# Directory to delete
EXTRACT_DIR = '0/0'

# Check if the directory exists and delete it
if os.path.isdir(EXTRACT_DIR):
    shutil.rmtree(EXTRACT_DIR)
    print(f"Directory '{EXTRACT_DIR}'deleted")
else:
    print(f"Directory '{EXTRACT_DIR}' does not exist")

Directory '0/0'deleted


## Dataframe with all file location

In [7]:
def get_folder_file_dataframe(root_dir):
    folders = []
    for file_name in os.listdir(root_dir):
        full_path = os.path.join(root_dir, file_name)
        if os.path.isdir(full_path):
            folders.append(file_name)

    series_list = []
    for folder in folders:
        folder_path = os.path.join(root_dir, folder)
        files = [
            os.path.join(folder_path, file)
            for file in os.listdir(folder_path)
            if os.path.isfile(os.path.join(folder_path, file)) and not file.startswith('.')
        ]
        s = pd.Series(files, name=folder)
        series_list.append(s)

    df = pd.concat(series_list, axis=1)
    return df

In [8]:
df = get_folder_file_dataframe(leak_directory)
df.head(30)       

Unnamed: 0,md,png,log,txt
0,0/md/dbc9c90e-a3e6-4d71-bb93-5fb8394095ac.md,0/png/64bba692-d430-440c-9f1e-2575f45770af_6.png,0/log/77010155050.log,0/txt/IDNET.txt
1,0/md/28.md,0/png/12756724-394c-4576-b373-7c53f1abbd94_0.png,0/log/77753527617.log,0/txt/IDTV.txt
2,0/md/5.md,0/png/f179eb06-0c53-44df-a13f-570be23355bb_1.png,0/log/tele2-lbs.log,0/txt/beeline-77774042222.txt
3,0/md/38.md,0/png/5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_24.png,0/log/tele2-cdr.log,0/txt/beeline-77051056626.txt
4,0/md/9d7bc879-3250-4013-ac04-5ff9bd6dff40.md,0/png/5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_30.png,0/log/tele2-crm.log,0/txt/beeline-crm.txt
5,0/md/18.md,0/png/5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_18.png,0/log/77783030133.log,0/txt/UBSCRIBER.txt
6,0/md/9fd06037-11f1-4ad5-9a7d-cbfb3fa4193b.md,0/png/0-adaf869e-920a-4a17-91bd-e2ef3125c10e.png,,0/txt/beeline-cdr.txt
7,0/md/3348953d-66e9-4cac-8675-65bb5f2ef929.md,0/png/5387a301-0af8-4e24-a197-20189f87b9ef_8.png,,0/txt/CRM.txt
8,0/md/1.md,0/png/0-32eb7662-f212-4811-a7c1-1cfeb121cd99.png,,0/txt/LAC.txt
9,0/md/19.md,0/png/912204cb-8ab7-48b8-9abf-d803f3804d08_11.png,,0/txt/beeline-lbs.txt


# **DATA PARSING**

## LLM Classification of MD files

In [9]:
import os
import concurrent.futures
import pandas as pd
from langchain.llms import Ollama
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from tqdm import tqdm

In [10]:
llm = Ollama(model="llama3.1:8b")

prompt_template = PromptTemplate(
    input_variables=["content"],
    template="""
You are analyzing the content of a file.

File content:
\"\"\"
{content}
\"\"\"

1. Classify the content into one of the following categories ONLY: chats, images, other.
2. State your confidence in the classification as one of: high, medium, or low.
3. I will have to make a csv, please give me a list of headers based on the content. E.g. "[<header_name>, <heaer_name2>, etc.]
Respond in the following format:
Category: <chats|images|other>
Confidence: <high|medium|low>
"""
)

chain = LLMChain(llm=llm, prompt=prompt_template)

# Preprocess the files to the first 20 lines to shorten analysis times
def preprocess_first_20_lines(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            lines = [line.strip() for _, line in zip(range(20), f)]
            content = " ".join(lines)
        return file_path, content
    except Exception:
        return file_path, ""

# Use only the first column of the DataFrame
first_column = df.columns[0]
file_paths = df[first_column].dropna().unique().tolist()

# Preprocess the file for faster classification
with concurrent.futures.ThreadPoolExecutor() as executor:
    file_data = list(executor.map(preprocess_first_20_lines, file_paths))

# Classify and collect by category
valid_categories = {"chats", "images", "other"}
categorized_files = {cat: [] for cat in valid_categories}

for file_path, content in tqdm(file_data, desc="Classifying files"):
    if not content:
        continue

    try:
        response = chain.run(content=content).strip().lower()
        lines = response.splitlines()

        category = ""
        for line in lines:
            if line.startswith("category:"):
                category = line.replace("category:", "").strip()
                break
        if category not in valid_categories:
            category = "other"

        categorized_files[category].append(file_path)

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

# Convert to DataFrame (columns = categories)
# Determine the maximum list length among all categories
max_len = 0
for file_list in categorized_files.values():
    if len(file_list) > max_len:
        max_len = len(file_list)

# Pad each list with None to match max length
padded = {}
for category, file_list in categorized_files.items():
    padding_needed = max_len - len(file_list)
    padded[category] = file_list + [None] * padding_needed
result_df = pd.DataFrame(padded)
result_df.to_csv("classified_by_category.csv", index=False)
result_df.head()

  chain = LLMChain(llm=llm, prompt=prompt_template)
  response = chain.run(content=content).strip().lower()
Classifying files: 100%|██████████| 70/70 [03:58<00:00,  3.40s/it] 


Unnamed: 0,other,images,chats
0,0/md/9d7bc879-3250-4013-ac04-5ff9bd6dff40.md,0/md/dbc9c90e-a3e6-4d71-bb93-5fb8394095ac.md,0/md/28.md
1,,0/md/9fd06037-11f1-4ad5-9a7d-cbfb3fa4193b.md,0/md/5.md
2,,0/md/3348953d-66e9-4cac-8675-65bb5f2ef929.md,0/md/38.md
3,,0/md/07f179c5-5705-4dbd-94a7-66eed1e066b0.md,0/md/18.md
4,,0/md/01cdc26f-e773-4ad7-8808-d04abf16aae7.md,0/md/1.md


## Cross Referenced Files

In [12]:
import pandas as pd
import os
import re
from collections import defaultdict

In [13]:
# Extract all unique file paths from the DataFrame 
file_paths = df.stack().dropna().unique().tolist()

# Read file content
def read_file(path):
    try:
        with open(path, 'r', encoding='utf-8') as f:
            return f.read()
    except:
        return "Failed to open file"

# Read file contents into a dictionary 
file_contents = {path: read_file(path) for path in file_paths}

# Build a reference map (which files mention which) 
reference_map = defaultdict(list)

for source_path, content in file_contents.items():
    if content is None:
        continue  # skip if content is None

    for target_path in file_paths:
        if target_path == source_path:
            continue  # skip comparing file to itself

        target_filename = os.path.basename(target_path)
        pattern = re.escape(target_filename)

        if re.search(rf'\b{pattern}\b', content):
            reference_map[source_path].append(target_path)

# Convert reference map to a DataFrame
ref_df = pd.DataFrame([
    {"source_file": src, "mentions": tgt}
    for src, tgts in reference_map.items()
    for tgt in tgts
])

# Group mentions into lists per source_file
ref_summary = ref_df.groupby("source_file")["mentions"].apply(list).reset_index()

# Some source_file names are also in mentions, therefore they aren't a source file anymore
source_files = set(ref_summary["source_file"])
mentioned_files = set(file for mention_list in ref_summary["mentions"] for file in mention_list)
common_files = source_files.intersection(mentioned_files)

# Filter out common files from the DataFrame
filtered_ref_summary = ref_summary[~ref_summary["source_file"].isin(common_files)].reset_index(drop=True)

filtered_ref_summary.to_csv("file_reference_map.csv", index=False)
print(filtered_ref_summary.head(10))
print("Unique file paths in original DataFrame:", df.stack().nunique())

                                    source_file  \
0  0/md/01cdc26f-e773-4ad7-8808-d04abf16aae7.md   
1                                    0/md/10.md   
2                                    0/md/13.md   
3                                    0/md/15.md   
4  0/md/178e3898-903d-47cf-bfbe-061e7dc18895.md   
5                                    0/md/19.md   
6                                     0/md/2.md   
7                                    0/md/20.md   
8                                    0/md/21.md   
9                                    0/md/22.md   

                                            mentions  
0  [0/png/01cdc26f-e773-4ad7-8808-d04abf16aae7_1_...  
1  [0/png/0-6bcc0131-e4ad-421e-bb1f-d8ebe5eeec7b....  
2  [0/png/0-adaf869e-920a-4a17-91bd-e2ef3125c10e....  
3  [0/png/0-b8cea3b1-4dde-4438-9b1a-6faf690bbad0....  
4  [0/png/178e3898-903d-47cf-bfbe-061e7dc18895_8....  
5  [0/png/6848748d-2881-4c26-b153-fcd5373d2f1c.pn...  
6  [0/md/07f179c5-5705-4dbd-94a7-66eed1e066b0.md,... 

In [14]:
import pandas as pd
import os
import re
from ast import literal_eval
import ast

In [15]:
# Check for more linkages in the files + add the missing source files from the chats

def is_readable_text_file(path):
    return any(path.endswith(ext) for ext in ['txt', 'md', 'rst', 'tex', 'nfo', 'readme', 'rtf', 'doc', 'docx',
    'cfg', 'conf', 'config', 'ini', 'json', 'yaml', 'yml', 'toml',
    'log', 'lst', 'cnf', 'properties', 'prefs',
    'csv', 'tsv', 'dat', 'db', 'dbf', 'sql', 'xml',
    'ssv', 'psv', 'jsonl', 'parquet', 'orc',
    'html', 'htm', 'xhtml', 'xht', 'css', 'js',
    'jsx', 'ts', 'tsx', 'vue', 'erb', 'ejs', 'jsp',
    'liquid', 'handlebars', 'hbs', 'mustache',
    'py', 'pyw', 'ipynb', 'java', 'c', 'cpp', 'h', 'hpp', 'cs',
    'sh', 'bash', 'zsh', 'ksh', 'bat', 'cmd', 'ps1',
    'r', 'jl', 'pl', 'pm', 'rb', 'go', 'lua', 'php',
    'swift', 'scala', 'dart', 'asm', 'groovy',
    'rmd', 'sage', 'nb',
    'env', 'gradle', 'makefile', 'mak', 'mk',
    'dockerfile', 'gitignore', 'gitattributes', 'gitmodules',
    'cmake', 'make', 'ninja', 'build',
    'manifest', 'manifest.json', 'vtt', 'srt', 'resx', 'strings',
    'lang', 'po', 'mo', 'pot', 'msg', 'textbundle',
    'rego', 'tf', 'tfvars', 'cue', 'bzl', 'bazel', 'nix', 'dhall',
    'adoc', 'asciidoc', 'creole', 'mediawiki', 'wiki', 'org',
    'eml', 'msg', 'mbox', 'mail', 'ics', 'vcf'])

mentions2_list = []

# Iterate over each row of the summary
for _, row in filtered_ref_summary.iterrows():
    mention_dict = {}

    # For each mentioned file read file contents and create a dictionary key: location - values: file contents
    for mentioned_file in row["mentions"]:
        if not is_readable_text_file(mentioned_file):
            continue  # Skip unreadable formats

        # Read file content
        content = read_file(mentioned_file)
        if content in [None, "Failed to open file"]:
            continue

        # Search for any file-type references using regex
        # Match anything with a name and known file extension
        found_files = re.findall(r'[\w\-/]*\.\w+', content)

        # Filter duplicates and add to the mention_dict
        if found_files:
            mention_dict[mentioned_file] = list(set(found_files))

    mentions2_list.append(mention_dict)

# Add this as a new column
filtered_ref_summary["mentions2"] = mentions2_list

# Get the list of chat files from result_df - LLM clasification
chat_files = result_df['chats'].dropna().unique()

# Get the list of source files already in filtered_ref_summary
source_files = filtered_ref_summary['source_file'].dropna().unique()

# Find missing chat files and collect chat files that are not found in source files
existing_source_files = set(source_files)
missing_chat_files = []
for chat_file in chat_files:
    if chat_file not in existing_source_files:
        missing_chat_files.append(chat_file)

# Create a new DataFrame with empty mentions and linkage columns
new_rows = pd.DataFrame({
    'source_file': missing_chat_files,
})

# Append the new rows to ref_summary
filtered_ref_summary = pd.concat([filtered_ref_summary, new_rows], ignore_index=True)
        
# Save the updated DataFrame to CSV
filtered_ref_summary.to_csv("file_reference_map_extended.csv", index=False)
print(filtered_ref_summary.columns)

Index(['source_file', 'mentions', 'mentions2'], dtype='object')


## MD -> CSV CONVERSATIONS

In [16]:
import os
import pandas as pd
from pathlib import Path
from bs4 import BeautifulSoup

In [17]:
# Create a folder and dumb all csvs
output_dir = "csvs"
os.makedirs(output_dir, exist_ok=True)

# Get all MD files from the DataFrame
all_file_paths = pd.unique(result_df.values.ravel('K'))
md_files = []
for file_path in all_file_paths:
    if isinstance(file_path, str) and file_path.endswith(".md") and os.path.exists(file_path):
        md_files.append(file_path)
        
# Use the LLM's clasification of MD files
chat_files = set(result_df['chats'].dropna().astype(str))

# Extract visible text + ANY file reference 
def extract_text_and_files(td):
    text = td.get_text(strip=True)
    links = []
    for tag in td.find_all(['a', 'img']):
        link = tag.get('href') or tag.get('src')
        if link:
            fname = os.path.basename(link.strip("'\""))
            links.append(fname)
    return text + (" " + " ".join(links) if links else "")

# This method extracts chat table from HTML
def process_chat_md(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()
        soup = BeautifulSoup(content, "lxml")
        rows = soup.find_all("tr")[1:]
        data = []
        for row in rows:
            cols = row.find_all("td")
            if len(cols) == 4:
                data.append({
                    "Time": cols[0].get_text(strip=True),
                    "From": cols[1].get_text(strip=True),
                    "To": cols[2].get_text(strip=True),
                    "Message": extract_text_and_files(cols[3])
                })
        return pd.DataFrame(data) if data else None
    except Exception as e:
        print(f"Error processing chat file {file_path}: {e}")
        return None

# Extract the contents of the other file types that the LLM classified
def process_reference_md(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()
        soup = BeautifulSoup(content, "lxml")
        matches = []
        for tag in soup.find_all(['a', 'img']):
            link = tag.get('href') or tag.get('src')
            if link:
                fname = os.path.basename(link.strip("'\""))
                if not fname.startswith("0-"):
                    matches.append(fname)
        matches = list(set(matches))
        return pd.DataFrame([{"File_Name": f} for f in matches]) if matches else None
    except Exception as e:
        print(f"Error processing reference file {file_path}: {e}")
        return None

# Loop through all MD filesand call the above methods for the afferent file type
for file_path in md_files:
    csv_name = os.path.basename(file_path).replace(".md", ".csv")
    csv_path = os.path.join(output_dir, csv_name)

    if not os.path.exists(csv_path):
        if file_path in chat_files:
            df = process_chat_md(file_path)
        else:
            df = process_reference_md(file_path)

        if df is not None:
            df.to_csv(csv_path, index=False)
        else:
            print(f"Couldn't save to CSV: {file_path}")

In [18]:
import os
import ast
from pathlib import Path

In [19]:
# This cell changes the MD files in the file mapping to CSV

csv_dir = "csvs"

# Replace MD with CSV if the corresponding file exists
def replace_md_with_csv(item):
    if isinstance(item, str) and item.endswith(".md"):
        base_name = Path(item).stem  # .stem extracts the filename with no extension
        # CSV search
        csv_path = os.path.join(csv_dir, f"{base_name}.csv")
        if os.path.exists(csv_path):
            return csv_path
        else:
            print(f"CSV {item} not found")
    return item

# Recursively handle strings, lists, and dictionaries
def process_column_cell(cell):
    if isinstance(cell, str):
        return replace_md_with_csv(cell)
    
    elif isinstance(cell, list):
        new_list = []
        for item in cell:
            new_item = process_column_cell(item)
            new_list.append(new_item)
        return new_list

    elif isinstance(cell, dict):
        new_dict = {}
        for key, value in cell.items():
            new_key = replace_md_with_csv(key)
            new_value = process_column_cell(value)
            new_dict[new_key] = new_value
        return new_dict

    else:
        return cell
    
for col in filtered_ref_summary.columns:
    filtered_ref_summary[col] = filtered_ref_summary[col].apply(process_column_cell)

filtered_ref_summary.to_csv("results/filtered_ref_summary_csv_replaced.csv", index=False)
filtered_ref_summary.head()        

Unnamed: 0,source_file,mentions,mentions2
0,csvs/01cdc26f-e773-4ad7-8808-d04abf16aae7.csv,[0/png/01cdc26f-e773-4ad7-8808-d04abf16aae7_1_...,{}
1,csvs/10.csv,[0/png/0-6bcc0131-e4ad-421e-bb1f-d8ebe5eeec7b....,{'csvs/12756724-394c-4576-b373-7c53f1abbd94.cs...
2,csvs/13.csv,[0/png/0-adaf869e-920a-4a17-91bd-e2ef3125c10e....,{'csvs/585875ff-f8c5-4a02-acd7-fef37dc9ff11.cs...
3,csvs/15.csv,[0/png/0-b8cea3b1-4dde-4438-9b1a-6faf690bbad0....,{}
4,csvs/178e3898-903d-47cf-bfbe-061e7dc18895.csv,[0/png/178e3898-903d-47cf-bfbe-061e7dc18895_8....,{}


In [None]:
import os
import pandas as pd

In [22]:
# Use the csvs folder and the LLM classification of MD files to update the df created by the LLM to a new df that stores the csvs

csv_folder = "csvs"

csv_mapping = {}
# Loop through each file in the CSV folder
for filename in os.listdir(csv_folder):
    # Check for csv
    if filename.endswith('.csv'):
        # Replace the ".csv" extension with ".md" to create the key
        md_filename = filename.replace('.csv', '.md')
        
        # Set the value to the full path to the CSV file 
        csv_full_path = os.path.join(csv_folder, filename)
        
        # Add the key-value pair to the mapping
        csv_mapping[md_filename] = csv_full_path

# Step 2: Define function to replace .md path with .csv path
def replace_md_with_csv(val):
    if isinstance(val, str):
        base = os.path.basename(val)
        return csv_mapping.get(base, val)
    return val

df_csvs_llm = result_df.applymap(replace_md_with_csv)
df_csvs_llm.to_csv("results/updated_with_csv_paths.csv", index=False)
print(df_csvs_llm.head())

                                           other  \
0  csvs/9d7bc879-3250-4013-ac04-5ff9bd6dff40.csv   
1                                           None   
2                                           None   
3                                           None   
4                                           None   

                                          images        chats  
0  csvs/dbc9c90e-a3e6-4d71-bb93-5fb8394095ac.csv  csvs/28.csv  
1  csvs/9fd06037-11f1-4ad5-9a7d-cbfb3fa4193b.csv   csvs/5.csv  
2  csvs/3348953d-66e9-4cac-8675-65bb5f2ef929.csv  csvs/38.csv  
3  csvs/07f179c5-5705-4dbd-94a7-66eed1e066b0.csv  csvs/18.csv  
4  csvs/01cdc26f-e773-4ad7-8808-d04abf16aae7.csv   csvs/1.csv  


## TXT & LOG -> CSV

In [None]:
import os
import csv

In [2]:
SOURCE_FOLDER_1 = "0/log"
SOURCE_FOLDER_2 = "0/txt"
OUTPUT_FOLDER = "logtxt_to_csv"

def detect_format(lines):
    if len(lines) >= 2 and all(c == '-' or c == ' ' for c in lines[1]):
        return 'fixed-width'
    elif ',' in lines[0]:
        return 'csv'
    elif '\t' in lines[0]:
        return 'tsv'
    return 'unknown'

def get_fixed_positions(divider_line):
    positions = []
    in_field = False
    for i, char in enumerate(divider_line):
        if char == '-' and not in_field:
            start = i
            in_field = True
        elif char != '-' and in_field:
            end = i
            positions.append((start, end))
            in_field = False
    if in_field:
        positions.append((start, len(divider_line)))
    return positions

def process_fixed_width(lines, output_path):
    header_line = lines[0]
    divider_line = lines[1]
    data_lines = lines[2:]
    positions = get_fixed_positions(divider_line)
    headers = [header_line[start:end].strip() for start, end in positions]

    with open(output_path, 'w', newline='', encoding='utf-8') as out_file:
        writer = csv.writer(out_file)
        writer.writerow(headers)
        for line in data_lines:
            row = [line[start:end].strip() for start, end in positions]
            if any(row):
                writer.writerow(row)

def process_delimited(lines, output_path, delimiter):
    with open(output_path, 'w', newline='', encoding='utf-8') as out_file:
        writer = csv.writer(out_file)
        for line in lines:
            row = [field.strip().replace(',', '.') if delimiter == '\t' and field.replace(',', '').replace('.', '').isdigit() else field.strip() for field in line.strip().split(delimiter)]
            if any(row):
                writer.writerow(row)

def convert_all_files(source_folders, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    for input_dir in source_folders:
        for file in os.listdir(input_dir):
            if not file.lower().endswith(('.txt', '.log')):
                continue

            file_path = os.path.join(input_dir, file)
            base_name = os.path.basename(file)
            output_path = os.path.join(output_folder, os.path.splitext(base_name)[0] + '.csv')

            with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
                lines = f.readlines()

            if not lines:
                continue

            fmt = detect_format(lines)

            try:
                if fmt == 'fixed-width':
                    process_fixed_width(lines, output_path)
                elif fmt == 'tsv':
                    process_delimited(lines, output_path, delimiter='\t')
                elif fmt == 'csv':
                    process_delimited(lines, output_path, delimiter=',')
                else:
                    print(f"Skipped unknown format: {file}")
                    continue

                print(f"Converted: {file} → {os.path.basename(output_path)}")
            except Exception as e:
                print(f"Failed to convert {file}: {e}")

if __name__ == "__main__":
    convert_all_files([SOURCE_FOLDER_1, SOURCE_FOLDER_2], OUTPUT_FOLDER)

Skipped unknown format: 77010155050.log
Skipped unknown format: 77753527617.log
Converted: tele2-lbs.log → tele2-lbs.csv
Converted: tele2-cdr.log → tele2-cdr.csv
Converted: tele2-crm.log → tele2-crm.csv
Skipped unknown format: 77783030133.log
Converted: IDNET.txt → IDNET.csv
Converted: IDTV.txt → IDTV.csv
Converted: beeline-77774042222.txt → beeline-77774042222.csv
Converted: beeline-77051056626.txt → beeline-77051056626.csv
Converted: beeline-crm.txt → beeline-crm.csv
Skipped unknown format: UBSCRIBER.txt
Converted: beeline-cdr.txt → beeline-cdr.csv
Converted: CRM.txt → CRM.csv
Converted: LAC.txt → LAC.csv
Converted: beeline-lbs.txt → beeline-lbs.csv
Converted: Φ»¥σìò.txt → Φ»¥σìò.csv


## PNG -> CSV

In [None]:
# MacOS Script 

# **DATA TRANSLATION**

## *CONVERSATIONS TRANSLATION – GEMMA3:27B*

In [23]:
import pandas as pd
import requests
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

# Chats - CSV loop
for csv_path in df_csvs_llm['chats']:
    print(f"Processing: {csv_path}")
    
    # Load the individual CSV file
    df = pd.read_csv(csv_path)
    df['message_translation'] = ""

    # Function to get context around the target message
    def get_context(df, index, window=5):
        start = max(0, index - window)
        end = min(len(df), index + window + 1)
        return "\n".join(df['Message'].iloc[start:end].dropna())

    def query_ollama(prompt, model="gemma3:27b"):
        url = "http://localhost:11434/api/generate"
        payload = {
            "model": model,
            "prompt": prompt,
            "stream": False
        }
        try:
            response = requests.post(url, json=payload, timeout=90)
            response.raise_for_status()
            return response.json()["response"].strip()
        except Exception as e:
            print(f"Ollama error: {e}")
            return "[Translation Error]"

    # Function to process one row
    def translate_row(index):
        target = df.at[index, 'Message']
        if pd.isna(target):
            return index, ""
        context = get_context(df, index)
        prompt = (
            "You are translating Chinese messages to English. Below is a series of related messages. "
            "Use the full context to understand the meaning, but only translate the specific message provided.\n\n"
            f"Context:\n{context}\n\n"
            f"Message to translate:\n{target}\n\n"
            "ONLY RETURN the English translation of the message, ANYTHING ELSE IS FORBIDDEN! "
            "If you encounter a filename or file reference, preserve it exactly as it appears, DON'T add anything!"
        )
        translation = query_ollama(prompt)
        return index, translation

    # Run translations in parallel
    futures = []
    with ThreadPoolExecutor(max_workers=8) as executor:
        for index in df.index:
            futures.append(executor.submit(translate_row, index))

        for future in tqdm(as_completed(futures), total=len(futures), desc=f"Translating: {csv_path}"):
            idx, result = future.result()
            df.at[idx, 'message_translation'] = result

    # Save updated DataFrame back to the original file
    df.to_csv(csv_path, index=False)
    print(f"Finished and saved: {csv_path}\n")

Processing: csvs/28.csv


Translating: csvs/28.csv: 100%|██████████| 46/46 [02:55<00:00,  3.81s/it]


Finished and saved: csvs/28.csv

Processing: csvs/5.csv


Translating: csvs/5.csv: 100%|██████████| 17/17 [00:24<00:00,  1.46s/it]


Finished and saved: csvs/5.csv

Processing: csvs/38.csv


Translating: csvs/38.csv: 100%|██████████| 21/21 [01:18<00:00,  3.75s/it]


Finished and saved: csvs/38.csv

Processing: csvs/18.csv


Translating: csvs/18.csv: 100%|██████████| 84/84 [02:10<00:00,  1.55s/it]


Finished and saved: csvs/18.csv

Processing: csvs/1.csv


Translating: csvs/1.csv:   7%|▋         | 550/8290 [20:25<13:53:57,  6.46s/it]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)


Translating: csvs/1.csv:  68%|██████▊   | 5642/8290 [3:25:59<6:53:56,  9.38s/it] 

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)


Translating: csvs/1.csv:  69%|██████▉   | 5711/8290 [3:30:46<5:57:09,  8.31s/it]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)


Translating: csvs/1.csv:  77%|███████▋  | 6419/8290 [4:01:12<50:27,  1.62s/it]  

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)


Translating: csvs/1.csv:  87%|████████▋ | 7230/8290 [4:35:32<2:34:48,  8.76s/it]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)


Translating: csvs/1.csv:  88%|████████▊ | 7268/8290 [4:37:49<2:13:42,  7.85s/it]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)


Translating: csvs/1.csv: 100%|██████████| 8290/8290 [5:20:43<00:00,  2.32s/it]  


Finished and saved: csvs/1.csv

Processing: csvs/19.csv


Translating: csvs/19.csv: 100%|██████████| 21/21 [01:04<00:00,  3.06s/it]


Finished and saved: csvs/19.csv

Processing: csvs/29.csv


Translating: csvs/29.csv: 100%|██████████| 80/80 [03:05<00:00,  2.31s/it]


Finished and saved: csvs/29.csv

Processing: csvs/4.csv


Translating: csvs/4.csv: 100%|██████████| 513/513 [17:16<00:00,  2.02s/it]


Finished and saved: csvs/4.csv

Processing: csvs/39.csv


Translating: csvs/39.csv: 100%|██████████| 821/821 [27:27<00:00,  2.01s/it]


Finished and saved: csvs/39.csv

Processing: csvs/16.csv


Translating: csvs/16.csv: 100%|██████████| 149/149 [04:35<00:00,  1.85s/it]


Finished and saved: csvs/16.csv

Processing: csvs/22.csv


Translating: csvs/22.csv: 100%|██████████| 91/91 [03:16<00:00,  2.16s/it]


Finished and saved: csvs/22.csv

Processing: csvs/32.csv


Translating: csvs/32.csv: 100%|██████████| 86/86 [02:35<00:00,  1.81s/it]


Finished and saved: csvs/32.csv

Processing: csvs/26.csv


Translating: csvs/26.csv: 100%|██████████| 6/6 [00:17<00:00,  2.85s/it]


Finished and saved: csvs/26.csv

Processing: csvs/12.csv


Translating: csvs/12.csv: 100%|██████████| 392/392 [13:52<00:00,  2.12s/it]


Finished and saved: csvs/12.csv

Processing: csvs/36.csv


Translating: csvs/36.csv: 100%|██████████| 199/199 [05:28<00:00,  1.65s/it]


Finished and saved: csvs/36.csv

Processing: csvs/27.csv


Translating: csvs/27.csv: 100%|██████████| 29/29 [02:44<00:00,  5.67s/it]


Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
Finished and saved: csvs/27.csv

Processing: csvs/13.csv


Translating: csvs/13.csv: 100%|██████████| 244/244 [08:18<00:00,  2.04s/it]


Finished and saved: csvs/13.csv

Processing: csvs/37.csv


Translating: csvs/37.csv:  10%|▉         | 32/329 [02:47<52:10, 10.54s/it]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)


Translating: csvs/37.csv:  16%|█▋        | 54/329 [04:46<21:03,  4.59s/it]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)


Translating: csvs/37.csv:  27%|██▋       | 90/329 [07:55<16:51,  4.23s/it]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)


Translating: csvs/37.csv: 100%|██████████| 329/329 [16:39<00:00,  3.04s/it]


Finished and saved: csvs/37.csv

Processing: csvs/17.csv


Translating: csvs/17.csv: 100%|██████████| 55/55 [01:42<00:00,  1.86s/it]


Finished and saved: csvs/17.csv

Processing: csvs/23.csv


Translating: csvs/23.csv: 100%|██████████| 24/24 [00:45<00:00,  1.90s/it]


Finished and saved: csvs/23.csv

Processing: csvs/33.csv


Translating: csvs/33.csv: 100%|██████████| 31/31 [00:52<00:00,  1.70s/it]


Finished and saved: csvs/33.csv

Processing: csvs/24.csv


Translating: csvs/24.csv: 100%|██████████| 55/55 [01:57<00:00,  2.13s/it]


Finished and saved: csvs/24.csv

Processing: csvs/41.csv


Translating: csvs/41.csv: 100%|██████████| 322/322 [10:44<00:00,  2.00s/it]


Finished and saved: csvs/41.csv

Processing: csvs/10.csv


Translating: csvs/10.csv: 100%|██████████| 149/149 [06:13<00:00,  2.51s/it]


Finished and saved: csvs/10.csv

Processing: csvs/9.csv


Translating: csvs/9.csv: 100%|██████████| 235/235 [08:11<00:00,  2.09s/it]


Finished and saved: csvs/9.csv

Processing: csvs/34.csv


Translating: csvs/34.csv: 100%|██████████| 426/426 [13:25<00:00,  1.89s/it]


Finished and saved: csvs/34.csv

Processing: csvs/14.csv


Translating: csvs/14.csv: 100%|██████████| 44/44 [01:18<00:00,  1.78s/it]


Finished and saved: csvs/14.csv

Processing: csvs/20.csv


Translating: csvs/20.csv: 100%|██████████| 68/68 [02:06<00:00,  1.86s/it]


Finished and saved: csvs/20.csv

Processing: csvs/30.csv


Translating: csvs/30.csv: 100%|██████████| 322/322 [08:36<00:00,  1.61s/it]


Finished and saved: csvs/30.csv

Processing: csvs/15.csv


Translating: csvs/15.csv: 100%|██████████| 199/199 [06:06<00:00,  1.84s/it]


Finished and saved: csvs/15.csv

Processing: csvs/21.csv


Translating: csvs/21.csv: 100%|██████████| 1046/1046 [24:47<00:00,  1.42s/it]


Finished and saved: csvs/21.csv

Processing: csvs/31.csv


Translating: csvs/31.csv: 100%|██████████| 44/44 [01:06<00:00,  1.51s/it]


Finished and saved: csvs/31.csv

Processing: csvs/40.csv


Translating: csvs/40.csv: 100%|██████████| 129/129 [04:50<00:00,  2.25s/it]


Finished and saved: csvs/40.csv

Processing: csvs/11.csv


Translating: csvs/11.csv: 100%|██████████| 125/125 [04:12<00:00,  2.02s/it]


Finished and saved: csvs/11.csv

Processing: csvs/35.csv


Translating: csvs/35.csv: 100%|██████████| 196/196 [06:40<00:00,  2.04s/it]


Finished and saved: csvs/35.csv

Processing: csvs/3.csv


Translating: csvs/3.csv: 100%|██████████| 23/23 [00:37<00:00,  1.63s/it]


Finished and saved: csvs/3.csv

Processing: csvs/7.csv


Translating: csvs/7.csv: 100%|██████████| 196/196 [05:34<00:00,  1.71s/it]


Finished and saved: csvs/7.csv

Processing: csvs/6.csv


Translating: csvs/6.csv: 100%|██████████| 136/136 [03:08<00:00,  1.38s/it]


Finished and saved: csvs/6.csv

Processing: csvs/2.csv


Translating: csvs/2.csv: 100%|██████████| 500/500 [16:03<00:00,  1.93s/it]

Finished and saved: csvs/2.csv






## *OCR TRANSLATION - GEMMA3:27b*

In [7]:
import os
import pandas as pd
import requests
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

ocr_folder_path = 'ocr_to_csv/'

def query_ollama(prompt, model="gemma3:27b"):
    url = "http://localhost:11434/api/generate"
    payload = {
        "model": model,
        "prompt": prompt,
        "stream": False
    }
    try:
        response = requests.post(url, json=payload, timeout=90)
        response.raise_for_status()
        return response.json()["response"].strip()
    except Exception as e:
        print(f"Ollama error: {e}")
        return "[Translation Error]"

def translate_row(index, row_text):
    if pd.isna(row_text) or not row_text.strip():
        return index, ""
    prompt = (
        "You are translating Chinese text into fluent English. Translate the following message:\n\n"
        f"{row_text}\n\n"
        "ONLY RETURN the English translation. DO NOT add commentary or extra formatting."
    )
    return index, query_ollama(prompt)

ocr_files = sorted([f for f in os.listdir(ocr_folder_path) if f.endswith('.csv')])

for file_idx, file in enumerate(tqdm(ocr_files, desc="Translating OCR Files", unit="file")):
    print(f"\nProcessing file {file_idx + 1} of {len(ocr_files)}: {file}")
    path = os.path.join(ocr_folder_path, file)
    df = pd.read_csv(path)

    if 'ocr extraction' not in df.columns:
        print(f" Skipping {file} (missing 'ocr extraction' column)")
        continue

    df['ocr translation'] = ""

    futures = []
    with ThreadPoolExecutor(max_workers=8) as executor:
        for idx, row in df.iterrows():
            futures.append(executor.submit(translate_row, idx, row['ocr extraction']))

        for future in as_completed(futures):
            idx, translated = future.result()
            df.at[idx, 'ocr translation'] = translated

    df.to_csv(path, index=False)
    print(f" Saved translated file: {file}")

Translating OCR Files:   0%|          | 0/490 [00:00<?, ?file/s]


Processing file 1 of 490: 0-08a6bcd3-6477-4252-8f35-4f8f80d114f9.csv


Translating OCR Files:   0%|          | 1/490 [01:08<9:17:52, 68.45s/file]

 Saved translated file: 0-08a6bcd3-6477-4252-8f35-4f8f80d114f9.csv

Processing file 2 of 490: 0-0b54af64-c2cd-4acb-9864-73a584aa6ebc.csv


Translating OCR Files:   0%|          | 2/490 [01:24<5:06:57, 37.74s/file]

 Saved translated file: 0-0b54af64-c2cd-4acb-9864-73a584aa6ebc.csv

Processing file 3 of 490: 0-0baba509-5e81-4b88-b509-843822d09e21.csv


Translating OCR Files:   1%|          | 3/490 [01:37<3:34:25, 26.42s/file]

 Saved translated file: 0-0baba509-5e81-4b88-b509-843822d09e21.csv

Processing file 4 of 490: 0-0f319bf6-e667-4bac-a974-dfda1142e9ff.csv


Translating OCR Files:   1%|          | 4/490 [01:46<2:39:12, 19.66s/file]

 Saved translated file: 0-0f319bf6-e667-4bac-a974-dfda1142e9ff.csv

Processing file 5 of 490: 0-129ac70f-8942-4ca7-b1f2-ddeaa3d984b5.csv


Translating OCR Files:   1%|          | 5/490 [01:55<2:06:04, 15.60s/file]

 Saved translated file: 0-129ac70f-8942-4ca7-b1f2-ddeaa3d984b5.csv

Processing file 6 of 490: 0-1a20ded1-50fc-4153-9a95-e158eeb7199e.csv


Translating OCR Files:   1%|          | 6/490 [02:02<1:42:47, 12.74s/file]

 Saved translated file: 0-1a20ded1-50fc-4153-9a95-e158eeb7199e.csv

Processing file 7 of 490: 0-1afcf93d-50f1-4f1e-896d-87b0da7519f7.csv


Translating OCR Files:   1%|▏         | 7/490 [02:09<1:27:10, 10.83s/file]

 Saved translated file: 0-1afcf93d-50f1-4f1e-896d-87b0da7519f7.csv

Processing file 8 of 490: 0-1b0dc208-d2bb-43ea-b744-534f3b759394.csv


Translating OCR Files:   2%|▏         | 8/490 [02:13<1:10:35,  8.79s/file]

 Saved translated file: 0-1b0dc208-d2bb-43ea-b744-534f3b759394.csv

Processing file 9 of 490: 0-1cc570d8-cddb-401e-8c37-ef10c0e4841f.csv


Translating OCR Files:   2%|▏         | 9/490 [02:23<1:12:55,  9.10s/file]

 Saved translated file: 0-1cc570d8-cddb-401e-8c37-ef10c0e4841f.csv

Processing file 10 of 490: 0-300450bf-221e-4eeb-bdda-dc1115c947ea.csv


Translating OCR Files:   2%|▏         | 10/490 [02:34<1:16:15,  9.53s/file]

 Saved translated file: 0-300450bf-221e-4eeb-bdda-dc1115c947ea.csv

Processing file 11 of 490: 0-32eb7662-f212-4811-a7c1-1cfeb121cd99.csv


Translating OCR Files:   2%|▏         | 11/490 [03:03<2:05:40, 15.74s/file]

 Saved translated file: 0-32eb7662-f212-4811-a7c1-1cfeb121cd99.csv

Processing file 12 of 490: 0-330f554f-a3e6-4bd3-8b1b-d5949e1f30e8.csv


Translating OCR Files:   2%|▏         | 12/490 [03:06<1:33:23, 11.72s/file]

 Saved translated file: 0-330f554f-a3e6-4bd3-8b1b-d5949e1f30e8.csv

Processing file 13 of 490: 0-3556e54c-d418-447d-bb2a-43ac0408cc7a.csv


Translating OCR Files:   3%|▎         | 13/490 [03:10<1:15:22,  9.48s/file]

 Saved translated file: 0-3556e54c-d418-447d-bb2a-43ac0408cc7a.csv

Processing file 14 of 490: 0-383d824e-7588-4a92-84b7-fd953dd91cba.csv


Translating OCR Files:   3%|▎         | 14/490 [03:22<1:20:43, 10.18s/file]

 Saved translated file: 0-383d824e-7588-4a92-84b7-fd953dd91cba.csv

Processing file 15 of 490: 0-493542fc-495f-4756-8451-c4ed084d8bf7.csv


Translating OCR Files:   3%|▎         | 15/490 [03:34<1:24:07, 10.63s/file]

 Saved translated file: 0-493542fc-495f-4756-8451-c4ed084d8bf7.csv

Processing file 16 of 490: 0-4ae9bf34-c16c-4684-aa92-fec65a151275.csv


Translating OCR Files:   3%|▎         | 16/490 [04:06<2:14:46, 17.06s/file]

 Saved translated file: 0-4ae9bf34-c16c-4684-aa92-fec65a151275.csv

Processing file 17 of 490: 0-4c74b697-0681-4223-9982-5ffaf4e98ed0.csv


Translating OCR Files:   3%|▎         | 17/490 [04:07<1:36:57, 12.30s/file]

 Saved translated file: 0-4c74b697-0681-4223-9982-5ffaf4e98ed0.csv

Processing file 18 of 490: 0-4ea07c23-a1a6-411b-bcfb-552d095b66c9.csv


Translating OCR Files:   4%|▎         | 18/490 [04:19<1:35:16, 12.11s/file]

 Saved translated file: 0-4ea07c23-a1a6-411b-bcfb-552d095b66c9.csv

Processing file 19 of 490: 0-5a84cde3-7175-4044-8c88-d4c883a8fd38.csv


Translating OCR Files:   4%|▍         | 19/490 [04:35<1:43:56, 13.24s/file]

 Saved translated file: 0-5a84cde3-7175-4044-8c88-d4c883a8fd38.csv

Processing file 20 of 490: 0-5ae9bdca-fdf9-4948-8c11-a9e400b331aa.csv


Translating OCR Files:   4%|▍         | 20/490 [04:43<1:31:36, 11.70s/file]

 Saved translated file: 0-5ae9bdca-fdf9-4948-8c11-a9e400b331aa.csv

Processing file 21 of 490: 0-5d4e3e02-1dfc-469e-8af9-8dbe2b9f1564.csv


Translating OCR Files:   4%|▍         | 21/490 [04:44<1:06:53,  8.56s/file]

 Saved translated file: 0-5d4e3e02-1dfc-469e-8af9-8dbe2b9f1564.csv

Processing file 22 of 490: 0-5ef1d666-e19d-4570-b800-6693a4f680ee.csv


Translating OCR Files:   4%|▍         | 22/490 [05:24<2:21:19, 18.12s/file]

 Saved translated file: 0-5ef1d666-e19d-4570-b800-6693a4f680ee.csv

Processing file 23 of 490: 0-62583414-9e32-4d09-8989-b5fa32a98a81.csv


Translating OCR Files:   5%|▍         | 23/490 [05:30<1:51:22, 14.31s/file]

 Saved translated file: 0-62583414-9e32-4d09-8989-b5fa32a98a81.csv

Processing file 24 of 490: 0-62ff30cf-de5f-4388-82aa-b69b0fd0f07c.csv


Translating OCR Files:   5%|▍         | 24/490 [05:40<1:42:18, 13.17s/file]

 Saved translated file: 0-62ff30cf-de5f-4388-82aa-b69b0fd0f07c.csv

Processing file 25 of 490: 0-645dfc97-3268-4e1d-920d-4138545456fa.csv


Translating OCR Files:   5%|▌         | 25/490 [05:42<1:15:01,  9.68s/file]

 Saved translated file: 0-645dfc97-3268-4e1d-920d-4138545456fa.csv

Processing file 26 of 490: 0-6848748d-2881-4c26-b153-fcd5373d2f1c.csv


Translating OCR Files:   5%|▌         | 26/490 [06:07<1:51:53, 14.47s/file]

 Saved translated file: 0-6848748d-2881-4c26-b153-fcd5373d2f1c.csv

Processing file 27 of 490: 0-6bcc0131-e4ad-421e-bb1f-d8ebe5eeec7b.csv


Translating OCR Files:   6%|▌         | 27/490 [06:10<1:24:14, 10.92s/file]

 Saved translated file: 0-6bcc0131-e4ad-421e-bb1f-d8ebe5eeec7b.csv

Processing file 28 of 490: 0-6cbb3eeb-17e9-4af6-8da1-36eb6437f7bc.csv


Translating OCR Files:   6%|▌         | 28/490 [06:27<1:38:43, 12.82s/file]

 Saved translated file: 0-6cbb3eeb-17e9-4af6-8da1-36eb6437f7bc.csv

Processing file 29 of 490: 0-6e9aced1-df28-4e57-b7c8-641609ff4450.csv


Translating OCR Files:   6%|▌         | 29/490 [06:39<1:36:22, 12.54s/file]

 Saved translated file: 0-6e9aced1-df28-4e57-b7c8-641609ff4450.csv

Processing file 30 of 490: 0-70c63791-2797-4bf0-a778-ea08819aa9de.csv


Translating OCR Files:   6%|▌         | 30/490 [06:40<1:10:00,  9.13s/file]

 Saved translated file: 0-70c63791-2797-4bf0-a778-ea08819aa9de.csv

Processing file 31 of 490: 0-7150f512-e7a2-4f2c-86bc-58b671b25ba9.csv


Translating OCR Files:   6%|▋         | 31/490 [06:46<1:02:51,  8.22s/file]

 Saved translated file: 0-7150f512-e7a2-4f2c-86bc-58b671b25ba9.csv

Processing file 32 of 490: 0-785cc8c9-1225-4f93-b633-349bc5113512.csv


Translating OCR Files:   7%|▋         | 32/490 [06:53<57:48,  7.57s/file]  

 Saved translated file: 0-785cc8c9-1225-4f93-b633-349bc5113512.csv

Processing file 33 of 490: 0-79d9b7f2-cfe4-4615-9b75-8fea33fc0c9d.csv


Translating OCR Files:   7%|▋         | 33/490 [06:56<48:09,  6.32s/file]

 Saved translated file: 0-79d9b7f2-cfe4-4615-9b75-8fea33fc0c9d.csv

Processing file 34 of 490: 0-94b16e53-f035-4aa9-a76e-80bc6e936d10.csv


Translating OCR Files:   7%|▋         | 34/490 [06:59<41:13,  5.43s/file]

 Saved translated file: 0-94b16e53-f035-4aa9-a76e-80bc6e936d10.csv

Processing file 35 of 490: 0-96af60b3-299c-4e26-bca3-d9eb3e113b94.csv


Translating OCR Files:   7%|▋         | 35/490 [07:19<1:13:42,  9.72s/file]

 Saved translated file: 0-96af60b3-299c-4e26-bca3-d9eb3e113b94.csv

Processing file 36 of 490: 0-987ba39a-cc1c-4367-8d6d-f5a49a940198.csv


Translating OCR Files:   7%|▋         | 36/490 [07:25<1:05:22,  8.64s/file]

 Saved translated file: 0-987ba39a-cc1c-4367-8d6d-f5a49a940198.csv

Processing file 37 of 490: 0-9a8077f5-ac41-491f-b192-6b4609324bda.csv


Translating OCR Files:   8%|▊         | 37/490 [07:27<50:14,  6.65s/file]  

 Saved translated file: 0-9a8077f5-ac41-491f-b192-6b4609324bda.csv

Processing file 38 of 490: 0-9c8c9989-2293-4e68-9ffe-6f7a5f14562f.csv


Translating OCR Files:   8%|▊         | 38/490 [07:30<41:34,  5.52s/file]

 Saved translated file: 0-9c8c9989-2293-4e68-9ffe-6f7a5f14562f.csv

Processing file 39 of 490: 0-aa99f763-6849-4f6b-adf2-58f0cc2ed545.csv


Translating OCR Files:   8%|▊         | 39/490 [07:38<47:09,  6.27s/file]

 Saved translated file: 0-aa99f763-6849-4f6b-adf2-58f0cc2ed545.csv

Processing file 40 of 490: 0-adaf869e-920a-4a17-91bd-e2ef3125c10e.csv


Translating OCR Files:   8%|▊         | 40/490 [07:48<55:26,  7.39s/file]

 Saved translated file: 0-adaf869e-920a-4a17-91bd-e2ef3125c10e.csv

Processing file 41 of 490: 0-af93eff8-2973-4746-9041-b2223016b117.csv


Translating OCR Files:   8%|▊         | 41/490 [07:56<56:33,  7.56s/file]

 Saved translated file: 0-af93eff8-2973-4746-9041-b2223016b117.csv

Processing file 42 of 490: 0-b0a4acaa-d768-4f6d-8e54-6d20f271bb7c.csv


Translating OCR Files:   9%|▊         | 42/490 [07:58<44:26,  5.95s/file]

 Saved translated file: 0-b0a4acaa-d768-4f6d-8e54-6d20f271bb7c.csv

Processing file 43 of 490: 0-b3ce4d51-6024-4b43-b0d2-d3faaf3c2879.csv


Translating OCR Files:   9%|▉         | 43/490 [08:06<49:21,  6.63s/file]

 Saved translated file: 0-b3ce4d51-6024-4b43-b0d2-d3faaf3c2879.csv

Processing file 44 of 490: 0-b6eb1b15-cf99-475c-921f-f06e5c1019d4.csv


Translating OCR Files:   9%|▉         | 44/490 [08:08<37:23,  5.03s/file]

 Saved translated file: 0-b6eb1b15-cf99-475c-921f-f06e5c1019d4.csv

Processing file 45 of 490: 0-b8b76b6d-a50e-4246-82ee-3c8a5dcd523e.csv


Translating OCR Files:   9%|▉         | 45/490 [08:14<41:11,  5.55s/file]

 Saved translated file: 0-b8b76b6d-a50e-4246-82ee-3c8a5dcd523e.csv

Processing file 46 of 490: 0-b8cea3b1-4dde-4438-9b1a-6faf690bbad0.csv


Translating OCR Files:   9%|▉         | 46/490 [08:17<34:58,  4.73s/file]

 Saved translated file: 0-b8cea3b1-4dde-4438-9b1a-6faf690bbad0.csv

Processing file 47 of 490: 0-b9d9c584-5e21-4a49-952b-ffecca4eb91e.csv


Translating OCR Files:  10%|▉         | 47/490 [08:33<59:42,  8.09s/file]

 Saved translated file: 0-b9d9c584-5e21-4a49-952b-ffecca4eb91e.csv

Processing file 48 of 490: 0-bcad4fdf-3771-4873-92fa-23240654118a.csv


Translating OCR Files:  10%|▉         | 48/490 [08:50<1:18:41, 10.68s/file]

 Saved translated file: 0-bcad4fdf-3771-4873-92fa-23240654118a.csv

Processing file 49 of 490: 0-c5f1d959-39d1-4176-9cb1-1fb6e8baedc3.csv


Translating OCR Files:  10%|█         | 49/490 [09:10<1:39:59, 13.60s/file]

 Saved translated file: 0-c5f1d959-39d1-4176-9cb1-1fb6e8baedc3.csv

Processing file 50 of 490: 0-dd5b6a38-dc17-4122-a242-32006b381b3a.csv


Translating OCR Files:  10%|█         | 50/490 [09:12<1:12:26,  9.88s/file]

 Saved translated file: 0-dd5b6a38-dc17-4122-a242-32006b381b3a.csv

Processing file 51 of 490: 0-de359f8d-0745-4a93-959a-d1a6c361e326.csv


Translating OCR Files:  10%|█         | 51/490 [09:22<1:13:51, 10.10s/file]

 Saved translated file: 0-de359f8d-0745-4a93-959a-d1a6c361e326.csv

Processing file 52 of 490: 0-e07a9457-86f1-4f0f-86d7-8ea816b8d8d3.csv


Translating OCR Files:  11%|█         | 52/490 [09:31<1:11:20,  9.77s/file]

 Saved translated file: 0-e07a9457-86f1-4f0f-86d7-8ea816b8d8d3.csv

Processing file 53 of 490: 0-e705d192-90ee-4fd1-9dcd-061958d1817f.csv


Translating OCR Files:  11%|█         | 53/490 [09:34<55:45,  7.66s/file]  

 Saved translated file: 0-e705d192-90ee-4fd1-9dcd-061958d1817f.csv

Processing file 54 of 490: 0-ee47dfea-2626-4107-8ab3-4663167e0493.csv


Translating OCR Files:  11%|█         | 54/490 [09:45<1:02:25,  8.59s/file]

 Saved translated file: 0-ee47dfea-2626-4107-8ab3-4663167e0493.csv

Processing file 55 of 490: 0-f0ce8a7b-909d-4fc5-ba13-ea66b2dc6448.csv


Translating OCR Files:  11%|█         | 55/490 [09:47<48:21,  6.67s/file]  

 Saved translated file: 0-f0ce8a7b-909d-4fc5-ba13-ea66b2dc6448.csv

Processing file 56 of 490: 0-f313f521-80a1-4db5-a8a7-53d29ee09890.csv


Translating OCR Files:  11%|█▏        | 56/490 [10:04<1:11:18,  9.86s/file]

 Saved translated file: 0-f313f521-80a1-4db5-a8a7-53d29ee09890.csv

Processing file 57 of 490: 0-f41b7574-57b4-4c9f-907c-2a3c48a56157.csv


Translating OCR Files:  12%|█▏        | 57/490 [10:21<1:26:56, 12.05s/file]

 Saved translated file: 0-f41b7574-57b4-4c9f-907c-2a3c48a56157.csv

Processing file 58 of 490: 0-fc27ce32-9c96-416c-9c38-84977255e0ba.csv


Translating OCR Files:  12%|█▏        | 58/490 [10:36<1:31:36, 12.72s/file]

 Saved translated file: 0-fc27ce32-9c96-416c-9c38-84977255e0ba.csv

Processing file 59 of 490: 0-fcf90a92-794c-40c6-aa4f-8ea82f8bed51.csv


Translating OCR Files:  12%|█▏        | 59/490 [10:53<1:40:50, 14.04s/file]

 Saved translated file: 0-fcf90a92-794c-40c6-aa4f-8ea82f8bed51.csv

Processing file 60 of 490: 0-fe221e78-67e4-4d88-b73d-e58a9943a036.csv


Translating OCR Files:  12%|█▏        | 60/490 [11:01<1:28:08, 12.30s/file]

 Saved translated file: 0-fe221e78-67e4-4d88-b73d-e58a9943a036.csv

Processing file 61 of 490: 01cdc26f-e773-4ad7-8808-d04abf16aae7_1_0.csv


Translating OCR Files:  12%|█▏        | 61/490 [11:55<2:58:08, 24.91s/file]

 Saved translated file: 01cdc26f-e773-4ad7-8808-d04abf16aae7_1_0.csv

Processing file 62 of 490: 01cdc26f-e773-4ad7-8808-d04abf16aae7_2_0.csv


Translating OCR Files:  13%|█▎        | 62/490 [12:02<2:18:43, 19.45s/file]

 Saved translated file: 01cdc26f-e773-4ad7-8808-d04abf16aae7_2_0.csv

Processing file 63 of 490: 07f179c5-5705-4dbd-94a7-66eed1e066b0_0.csv


Translating OCR Files:  13%|█▎        | 63/490 [13:32<4:49:19, 40.66s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: 07f179c5-5705-4dbd-94a7-66eed1e066b0_0.csv

Processing file 64 of 490: 07f179c5-5705-4dbd-94a7-66eed1e066b0_1.csv


Translating OCR Files:  13%|█▎        | 64/490 [14:07<4:35:25, 38.79s/file]

 Saved translated file: 07f179c5-5705-4dbd-94a7-66eed1e066b0_1.csv

Processing file 65 of 490: 07f179c5-5705-4dbd-94a7-66eed1e066b0_2.csv


Translating OCR Files:  13%|█▎        | 65/490 [15:32<6:14:42, 52.90s/file]

 Saved translated file: 07f179c5-5705-4dbd-94a7-66eed1e066b0_2.csv

Processing file 66 of 490: 08a6bcd3-6477-4252-8f35-4f8f80d114f9.csv


Translating OCR Files:  13%|█▎        | 66/490 [16:03<5:26:47, 46.24s/file]

 Saved translated file: 08a6bcd3-6477-4252-8f35-4f8f80d114f9.csv

Processing file 67 of 490: 0b54af64-c2cd-4acb-9864-73a584aa6ebc.csv


Translating OCR Files:  14%|█▎        | 67/490 [16:23<4:30:15, 38.33s/file]

 Saved translated file: 0b54af64-c2cd-4acb-9864-73a584aa6ebc.csv

Processing file 68 of 490: 0baba509-5e81-4b88-b509-843822d09e21.csv


Translating OCR Files:  14%|█▍        | 68/490 [16:43<3:52:02, 32.99s/file]

 Saved translated file: 0baba509-5e81-4b88-b509-843822d09e21.csv

Processing file 69 of 490: 0f319bf6-e667-4bac-a974-dfda1142e9ff.csv


Translating OCR Files:  14%|█▍        | 69/490 [17:07<3:32:09, 30.24s/file]

 Saved translated file: 0f319bf6-e667-4bac-a974-dfda1142e9ff.csv

Processing file 70 of 490: 12756724-394c-4576-b373-7c53f1abbd94_0.csv


Translating OCR Files:  14%|█▍        | 70/490 [17:13<2:41:02, 23.01s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_0.csv

Processing file 71 of 490: 12756724-394c-4576-b373-7c53f1abbd94_1.csv


Translating OCR Files:  14%|█▍        | 71/490 [18:02<3:34:42, 30.75s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_1.csv

Processing file 72 of 490: 12756724-394c-4576-b373-7c53f1abbd94_10.csv


Translating OCR Files:  15%|█▍        | 72/490 [18:47<4:03:43, 34.98s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_10.csv

Processing file 73 of 490: 12756724-394c-4576-b373-7c53f1abbd94_11.csv


Translating OCR Files:  15%|█▍        | 73/490 [19:30<4:19:08, 37.29s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_11.csv

Processing file 74 of 490: 12756724-394c-4576-b373-7c53f1abbd94_12.csv


Translating OCR Files:  15%|█▌        | 74/490 [20:04<4:11:11, 36.23s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_12.csv

Processing file 75 of 490: 12756724-394c-4576-b373-7c53f1abbd94_13.csv


Translating OCR Files:  15%|█▌        | 75/490 [21:21<5:35:24, 48.49s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_13.csv

Processing file 76 of 490: 12756724-394c-4576-b373-7c53f1abbd94_14.csv


Translating OCR Files:  16%|█▌        | 76/490 [22:09<5:34:35, 48.49s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_14.csv

Processing file 77 of 490: 12756724-394c-4576-b373-7c53f1abbd94_15.csv


Translating OCR Files:  16%|█▌        | 77/490 [22:35<4:47:47, 41.81s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_15.csv

Processing file 78 of 490: 12756724-394c-4576-b373-7c53f1abbd94_16.csv


Translating OCR Files:  16%|█▌        | 78/490 [22:58<4:07:19, 36.02s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_16.csv

Processing file 79 of 490: 12756724-394c-4576-b373-7c53f1abbd94_17.csv


Translating OCR Files:  16%|█▌        | 79/490 [23:27<3:52:33, 33.95s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_17.csv

Processing file 80 of 490: 12756724-394c-4576-b373-7c53f1abbd94_18.csv


Translating OCR Files:  16%|█▋        | 80/490 [24:01<3:52:35, 34.04s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_18.csv

Processing file 81 of 490: 12756724-394c-4576-b373-7c53f1abbd94_19.csv


Translating OCR Files:  17%|█▋        | 81/490 [24:52<4:26:45, 39.13s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_19.csv

Processing file 82 of 490: 12756724-394c-4576-b373-7c53f1abbd94_2.csv


Translating OCR Files:  17%|█▋        | 82/490 [25:38<4:39:52, 41.16s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_2.csv

Processing file 83 of 490: 12756724-394c-4576-b373-7c53f1abbd94_20.csv


Translating OCR Files:  17%|█▋        | 83/490 [26:41<5:22:42, 47.57s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_20.csv

Processing file 84 of 490: 12756724-394c-4576-b373-7c53f1abbd94_21.csv


Translating OCR Files:  17%|█▋        | 84/490 [26:51<4:05:59, 36.35s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_21.csv

Processing file 85 of 490: 12756724-394c-4576-b373-7c53f1abbd94_22.csv


Translating OCR Files:  17%|█▋        | 85/490 [27:00<3:10:06, 28.16s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_22.csv

Processing file 86 of 490: 12756724-394c-4576-b373-7c53f1abbd94_23.csv


Translating OCR Files:  18%|█▊        | 86/490 [27:47<3:48:52, 33.99s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_23.csv

Processing file 87 of 490: 12756724-394c-4576-b373-7c53f1abbd94_24.csv


Translating OCR Files:  18%|█▊        | 87/490 [28:15<3:34:32, 31.94s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_24.csv

Processing file 88 of 490: 12756724-394c-4576-b373-7c53f1abbd94_25.csv


Translating OCR Files:  18%|█▊        | 88/490 [28:50<3:41:24, 33.04s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_25.csv

Processing file 89 of 490: 12756724-394c-4576-b373-7c53f1abbd94_26.csv


Translating OCR Files:  18%|█▊        | 89/490 [30:02<4:57:56, 44.58s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_26.csv

Processing file 90 of 490: 12756724-394c-4576-b373-7c53f1abbd94_27.csv


Translating OCR Files:  18%|█▊        | 90/490 [31:14<5:53:10, 52.98s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_27.csv

Processing file 91 of 490: 12756724-394c-4576-b373-7c53f1abbd94_28.csv


Translating OCR Files:  19%|█▊        | 91/490 [31:50<5:17:30, 47.74s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_28.csv

Processing file 92 of 490: 12756724-394c-4576-b373-7c53f1abbd94_29.csv


Translating OCR Files:  19%|█▉        | 92/490 [33:13<6:26:48, 58.31s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_29.csv

Processing file 93 of 490: 12756724-394c-4576-b373-7c53f1abbd94_3.csv


Translating OCR Files:  19%|█▉        | 93/490 [34:11<6:24:35, 58.12s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_3.csv

Processing file 94 of 490: 12756724-394c-4576-b373-7c53f1abbd94_30.csv


Translating OCR Files:  19%|█▉        | 94/490 [35:22<6:50:23, 62.18s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_30.csv

Processing file 95 of 490: 12756724-394c-4576-b373-7c53f1abbd94_31.csv


Translating OCR Files:  19%|█▉        | 95/490 [36:06<6:13:50, 56.79s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_31.csv

Processing file 96 of 490: 12756724-394c-4576-b373-7c53f1abbd94_32.csv


Translating OCR Files:  20%|█▉        | 96/490 [36:10<4:28:50, 40.94s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_32.csv

Processing file 97 of 490: 12756724-394c-4576-b373-7c53f1abbd94_33.csv


Translating OCR Files:  20%|█▉        | 97/490 [36:18<3:22:19, 30.89s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_33.csv

Processing file 98 of 490: 12756724-394c-4576-b373-7c53f1abbd94_34.csv


Translating OCR Files:  20%|██        | 98/490 [37:01<3:46:01, 34.60s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_34.csv

Processing file 99 of 490: 12756724-394c-4576-b373-7c53f1abbd94_35.csv


Translating OCR Files:  20%|██        | 99/490 [37:14<3:02:58, 28.08s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_35.csv

Processing file 100 of 490: 12756724-394c-4576-b373-7c53f1abbd94_36.csv


Translating OCR Files:  20%|██        | 100/490 [37:21<2:22:14, 21.88s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_36.csv

Processing file 101 of 490: 12756724-394c-4576-b373-7c53f1abbd94_37.csv


Translating OCR Files:  21%|██        | 101/490 [38:38<4:08:59, 38.40s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_37.csv

Processing file 102 of 490: 12756724-394c-4576-b373-7c53f1abbd94_38.csv


Translating OCR Files:  21%|██        | 102/490 [39:57<5:26:11, 50.44s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_38.csv

Processing file 103 of 490: 12756724-394c-4576-b373-7c53f1abbd94_39.csv


Translating OCR Files:  21%|██        | 103/490 [40:53<5:36:41, 52.20s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_39.csv

Processing file 104 of 490: 12756724-394c-4576-b373-7c53f1abbd94_4.csv


Translating OCR Files:  21%|██        | 104/490 [41:45<5:34:35, 52.01s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_4.csv

Processing file 105 of 490: 12756724-394c-4576-b373-7c53f1abbd94_40.csv


Translating OCR Files:  21%|██▏       | 105/490 [42:31<5:22:50, 50.31s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_40.csv

Processing file 106 of 490: 12756724-394c-4576-b373-7c53f1abbd94_41.csv


Translating OCR Files:  22%|██▏       | 106/490 [42:40<4:02:59, 37.97s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_41.csv

Processing file 107 of 490: 12756724-394c-4576-b373-7c53f1abbd94_42.csv


Translating OCR Files:  22%|██▏       | 107/490 [43:19<4:04:21, 38.28s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_42.csv

Processing file 108 of 490: 12756724-394c-4576-b373-7c53f1abbd94_43.csv


Translating OCR Files:  22%|██▏       | 108/490 [44:08<4:23:43, 41.42s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_43.csv

Processing file 109 of 490: 12756724-394c-4576-b373-7c53f1abbd94_44.csv


Translating OCR Files:  22%|██▏       | 109/490 [44:45<4:14:50, 40.13s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_44.csv

Processing file 110 of 490: 12756724-394c-4576-b373-7c53f1abbd94_45.csv


Translating OCR Files:  22%|██▏       | 110/490 [45:29<4:20:29, 41.13s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_45.csv

Processing file 111 of 490: 12756724-394c-4576-b373-7c53f1abbd94_46.csv


Translating OCR Files:  23%|██▎       | 111/490 [45:32<3:08:33, 29.85s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_46.csv

Processing file 112 of 490: 12756724-394c-4576-b373-7c53f1abbd94_47.csv


Translating OCR Files:  23%|██▎       | 112/490 [46:17<3:36:46, 34.41s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_47.csv

Processing file 113 of 490: 12756724-394c-4576-b373-7c53f1abbd94_48.csv


Translating OCR Files:  23%|██▎       | 113/490 [47:07<4:05:09, 39.02s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_48.csv

Processing file 114 of 490: 12756724-394c-4576-b373-7c53f1abbd94_49.csv


Translating OCR Files:  23%|██▎       | 114/490 [47:42<3:57:29, 37.90s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_49.csv

Processing file 115 of 490: 12756724-394c-4576-b373-7c53f1abbd94_5.csv


Translating OCR Files:  23%|██▎       | 115/490 [47:51<3:02:44, 29.24s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_5.csv

Processing file 116 of 490: 12756724-394c-4576-b373-7c53f1abbd94_6.csv


Translating OCR Files:  24%|██▎       | 116/490 [48:47<3:52:36, 37.32s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_6.csv

Processing file 117 of 490: 12756724-394c-4576-b373-7c53f1abbd94_7.csv


Translating OCR Files:  24%|██▍       | 117/490 [50:10<5:15:39, 50.78s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_7.csv

Processing file 118 of 490: 12756724-394c-4576-b373-7c53f1abbd94_8.csv


Translating OCR Files:  24%|██▍       | 118/490 [51:15<5:41:14, 55.04s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_8.csv

Processing file 119 of 490: 12756724-394c-4576-b373-7c53f1abbd94_9.csv


Translating OCR Files:  24%|██▍       | 119/490 [52:07<5:34:42, 54.13s/file]

 Saved translated file: 12756724-394c-4576-b373-7c53f1abbd94_9.csv

Processing file 120 of 490: 129ac70f-8942-4ca7-b1f2-ddeaa3d984b5.csv


Translating OCR Files:  24%|██▍       | 120/490 [53:10<5:51:20, 56.97s/file]

 Saved translated file: 129ac70f-8942-4ca7-b1f2-ddeaa3d984b5.csv

Processing file 121 of 490: 178e3898-903d-47cf-bfbe-061e7dc18895_0.csv


Translating OCR Files:  25%|██▍       | 121/490 [53:17<4:17:30, 41.87s/file]

 Saved translated file: 178e3898-903d-47cf-bfbe-061e7dc18895_0.csv

Processing file 122 of 490: 178e3898-903d-47cf-bfbe-061e7dc18895_1.csv


Translating OCR Files:  25%|██▍       | 122/490 [54:03<4:25:24, 43.27s/file]

 Saved translated file: 178e3898-903d-47cf-bfbe-061e7dc18895_1.csv

Processing file 123 of 490: 178e3898-903d-47cf-bfbe-061e7dc18895_10.csv


Translating OCR Files:  25%|██▌       | 123/490 [54:28<3:50:36, 37.70s/file]

 Saved translated file: 178e3898-903d-47cf-bfbe-061e7dc18895_10.csv

Processing file 124 of 490: 178e3898-903d-47cf-bfbe-061e7dc18895_2.csv


Translating OCR Files:  25%|██▌       | 124/490 [55:44<4:59:45, 49.14s/file]

 Saved translated file: 178e3898-903d-47cf-bfbe-061e7dc18895_2.csv

Processing file 125 of 490: 178e3898-903d-47cf-bfbe-061e7dc18895_3.csv


Translating OCR Files:  26%|██▌       | 125/490 [56:46<5:21:56, 52.92s/file]

 Saved translated file: 178e3898-903d-47cf-bfbe-061e7dc18895_3.csv

Processing file 126 of 490: 178e3898-903d-47cf-bfbe-061e7dc18895_4.csv


Translating OCR Files:  26%|██▌       | 126/490 [57:28<5:01:33, 49.71s/file]

 Saved translated file: 178e3898-903d-47cf-bfbe-061e7dc18895_4.csv

Processing file 127 of 490: 178e3898-903d-47cf-bfbe-061e7dc18895_5.csv


Translating OCR Files:  26%|██▌       | 127/490 [58:15<4:56:41, 49.04s/file]

 Saved translated file: 178e3898-903d-47cf-bfbe-061e7dc18895_5.csv

Processing file 128 of 490: 178e3898-903d-47cf-bfbe-061e7dc18895_6.csv


Translating OCR Files:  26%|██▌       | 128/490 [58:44<4:19:46, 43.06s/file]

 Saved translated file: 178e3898-903d-47cf-bfbe-061e7dc18895_6.csv

Processing file 129 of 490: 178e3898-903d-47cf-bfbe-061e7dc18895_7.csv


Translating OCR Files:  26%|██▋       | 129/490 [59:16<3:58:29, 39.64s/file]

 Saved translated file: 178e3898-903d-47cf-bfbe-061e7dc18895_7.csv

Processing file 130 of 490: 178e3898-903d-47cf-bfbe-061e7dc18895_8.csv


Translating OCR Files:  27%|██▋       | 130/490 [1:00:03<4:10:46, 41.80s/file]

 Saved translated file: 178e3898-903d-47cf-bfbe-061e7dc18895_8.csv

Processing file 131 of 490: 178e3898-903d-47cf-bfbe-061e7dc18895_9.csv


Translating OCR Files:  27%|██▋       | 131/490 [1:00:44<4:08:42, 41.57s/file]

 Saved translated file: 178e3898-903d-47cf-bfbe-061e7dc18895_9.csv

Processing file 132 of 490: 1a20ded1-50fc-4153-9a95-e158eeb7199e.csv


Translating OCR Files:  27%|██▋       | 132/490 [1:00:51<3:06:30, 31.26s/file]

 Saved translated file: 1a20ded1-50fc-4153-9a95-e158eeb7199e.csv

Processing file 133 of 490: 1afcf93d-50f1-4f1e-896d-87b0da7519f7.csv


Translating OCR Files:  27%|██▋       | 133/490 [1:00:59<2:24:52, 24.35s/file]

 Saved translated file: 1afcf93d-50f1-4f1e-896d-87b0da7519f7.csv

Processing file 134 of 490: 1b0dc208-d2bb-43ea-b744-534f3b759394.csv


Translating OCR Files:  27%|██▋       | 134/490 [1:02:04<3:36:07, 36.43s/file]

 Saved translated file: 1b0dc208-d2bb-43ea-b744-534f3b759394.csv

Processing file 135 of 490: 1cc570d8-cddb-401e-8c37-ef10c0e4841f.csv


Translating OCR Files:  28%|██▊       | 135/490 [1:02:20<3:00:09, 30.45s/file]

 Saved translated file: 1cc570d8-cddb-401e-8c37-ef10c0e4841f.csv

Processing file 136 of 490: 2db27de1-d5c5-4f89-8572-da697a6329e4_1_0.csv


Translating OCR Files:  28%|██▊       | 136/490 [1:02:47<2:52:07, 29.17s/file]

 Saved translated file: 2db27de1-d5c5-4f89-8572-da697a6329e4_1_0.csv

Processing file 137 of 490: 2db27de1-d5c5-4f89-8572-da697a6329e4_2_0.csv


Translating OCR Files:  28%|██▊       | 137/490 [1:02:58<2:20:07, 23.82s/file]

 Saved translated file: 2db27de1-d5c5-4f89-8572-da697a6329e4_2_0.csv

Processing file 138 of 490: 2db27de1-d5c5-4f89-8572-da697a6329e4_2_1.csv


Translating OCR Files:  28%|██▊       | 138/490 [1:03:00<1:40:51, 17.19s/file]

 Saved translated file: 2db27de1-d5c5-4f89-8572-da697a6329e4_2_1.csv

Processing file 139 of 490: 2db27de1-d5c5-4f89-8572-da697a6329e4_3_0.csv


Translating OCR Files:  28%|██▊       | 139/490 [1:04:03<3:02:10, 31.14s/file]

 Saved translated file: 2db27de1-d5c5-4f89-8572-da697a6329e4_3_0.csv

Processing file 140 of 490: 2db27de1-d5c5-4f89-8572-da697a6329e4_4_0.csv


Translating OCR Files:  29%|██▊       | 140/490 [1:04:08<2:14:56, 23.13s/file]

 Saved translated file: 2db27de1-d5c5-4f89-8572-da697a6329e4_4_0.csv

Processing file 141 of 490: 2db27de1-d5c5-4f89-8572-da697a6329e4_5_0.csv


Translating OCR Files:  29%|██▉       | 141/490 [1:04:09<1:35:31, 16.42s/file]

 Saved translated file: 2db27de1-d5c5-4f89-8572-da697a6329e4_5_0.csv

Processing file 142 of 490: 300450bf-221e-4eeb-bdda-dc1115c947ea.csv


Translating OCR Files:  29%|██▉       | 142/490 [1:04:20<1:26:15, 14.87s/file]

 Saved translated file: 300450bf-221e-4eeb-bdda-dc1115c947ea.csv

Processing file 143 of 490: 32eb7662-f212-4811-a7c1-1cfeb121cd99.csv


Translating OCR Files:  29%|██▉       | 143/490 [1:05:04<2:16:34, 23.62s/file]

 Saved translated file: 32eb7662-f212-4811-a7c1-1cfeb121cd99.csv

Processing file 144 of 490: 330f554f-a3e6-4bd3-8b1b-d5949e1f30e8.csv


Translating OCR Files:  29%|██▉       | 144/490 [1:05:06<1:39:46, 17.30s/file]

 Saved translated file: 330f554f-a3e6-4bd3-8b1b-d5949e1f30e8.csv

Processing file 145 of 490: 3348953d-66e9-4cac-8675-65bb5f2ef929_1_0.csv


Translating OCR Files:  30%|██▉       | 145/490 [1:05:35<1:58:54, 20.68s/file]

 Saved translated file: 3348953d-66e9-4cac-8675-65bb5f2ef929_1_0.csv

Processing file 146 of 490: 3348953d-66e9-4cac-8675-65bb5f2ef929_2_0.csv


Translating OCR Files:  30%|██▉       | 146/490 [1:05:52<1:52:26, 19.61s/file]

 Saved translated file: 3348953d-66e9-4cac-8675-65bb5f2ef929_2_0.csv

Processing file 147 of 490: 3348953d-66e9-4cac-8675-65bb5f2ef929_2_1.csv


Translating OCR Files:  30%|███       | 147/490 [1:05:55<1:23:29, 14.61s/file]

 Saved translated file: 3348953d-66e9-4cac-8675-65bb5f2ef929_2_1.csv

Processing file 148 of 490: 3348953d-66e9-4cac-8675-65bb5f2ef929_3_0.csv


Translating OCR Files:  30%|███       | 148/490 [1:05:56<1:00:39, 10.64s/file]

 Saved translated file: 3348953d-66e9-4cac-8675-65bb5f2ef929_3_0.csv

Processing file 149 of 490: 3348953d-66e9-4cac-8675-65bb5f2ef929_4_0.csv


Translating OCR Files:  30%|███       | 149/490 [1:05:58<44:57,  7.91s/file]  

 Saved translated file: 3348953d-66e9-4cac-8675-65bb5f2ef929_4_0.csv

Processing file 150 of 490: 3348953d-66e9-4cac-8675-65bb5f2ef929_5_0.csv


Translating OCR Files:  31%|███       | 150/490 [1:05:59<33:10,  5.85s/file]

 Saved translated file: 3348953d-66e9-4cac-8675-65bb5f2ef929_5_0.csv

Processing file 151 of 490: 3556e54c-d418-447d-bb2a-43ac0408cc7a.csv


Translating OCR Files:  31%|███       | 151/490 [1:06:04<31:02,  5.50s/file]

 Saved translated file: 3556e54c-d418-447d-bb2a-43ac0408cc7a.csv

Processing file 152 of 490: 383d824e-7588-4a92-84b7-fd953dd91cba.csv


Translating OCR Files:  31%|███       | 152/490 [1:06:16<42:01,  7.46s/file]

 Saved translated file: 383d824e-7588-4a92-84b7-fd953dd91cba.csv

Processing file 153 of 490: 3f451a52-d210-48d9-b56e-d28b9570bdc4_0.csv


Translating OCR Files:  31%|███       | 153/490 [1:07:08<1:56:45, 20.79s/file]

 Saved translated file: 3f451a52-d210-48d9-b56e-d28b9570bdc4_0.csv

Processing file 154 of 490: 48fd4c79-41ca-459e-a5a5-a3738e7a4af3_0.csv


Translating OCR Files:  31%|███▏      | 154/490 [1:08:11<3:08:35, 33.68s/file]

 Saved translated file: 48fd4c79-41ca-459e-a5a5-a3738e7a4af3_0.csv

Processing file 155 of 490: 493542fc-495f-4756-8451-c4ed084d8bf7.csv


Translating OCR Files:  32%|███▏      | 155/490 [1:08:28<2:39:59, 28.66s/file]

 Saved translated file: 493542fc-495f-4756-8451-c4ed084d8bf7.csv

Processing file 156 of 490: 4ae9bf34-c16c-4684-aa92-fec65a151275.csv


Translating OCR Files:  32%|███▏      | 156/490 [1:09:02<2:48:27, 30.26s/file]

 Saved translated file: 4ae9bf34-c16c-4684-aa92-fec65a151275.csv

Processing file 157 of 490: 4c74b697-0681-4223-9982-5ffaf4e98ed0.csv


Translating OCR Files:  32%|███▏      | 157/490 [1:09:28<2:40:43, 28.96s/file]

 Saved translated file: 4c74b697-0681-4223-9982-5ffaf4e98ed0.csv

Processing file 158 of 490: 4ea07c23-a1a6-411b-bcfb-552d095b66c9.csv


Translating OCR Files:  32%|███▏      | 158/490 [1:09:42<2:14:58, 24.39s/file]

 Saved translated file: 4ea07c23-a1a6-411b-bcfb-552d095b66c9.csv

Processing file 159 of 490: 5387a301-0af8-4e24-a197-20189f87b9ef_0.csv


Translating OCR Files:  32%|███▏      | 159/490 [1:09:48<1:44:39, 18.97s/file]

 Saved translated file: 5387a301-0af8-4e24-a197-20189f87b9ef_0.csv

Processing file 160 of 490: 5387a301-0af8-4e24-a197-20189f87b9ef_1.csv


Translating OCR Files:  33%|███▎      | 160/490 [1:10:18<2:02:01, 22.19s/file]

 Saved translated file: 5387a301-0af8-4e24-a197-20189f87b9ef_1.csv

Processing file 161 of 490: 5387a301-0af8-4e24-a197-20189f87b9ef_10.csv


Translating OCR Files:  33%|███▎      | 161/490 [1:11:06<2:43:38, 29.84s/file]

 Saved translated file: 5387a301-0af8-4e24-a197-20189f87b9ef_10.csv

Processing file 162 of 490: 5387a301-0af8-4e24-a197-20189f87b9ef_11.csv


Translating OCR Files:  33%|███▎      | 162/490 [1:12:36<4:22:00, 47.93s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: 5387a301-0af8-4e24-a197-20189f87b9ef_11.csv

Processing file 163 of 490: 5387a301-0af8-4e24-a197-20189f87b9ef_2.csv


Translating OCR Files:  33%|███▎      | 163/490 [1:14:06<5:30:12, 60.59s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: 5387a301-0af8-4e24-a197-20189f87b9ef_2.csv

Processing file 164 of 490: 5387a301-0af8-4e24-a197-20189f87b9ef_3.csv


Translating OCR Files:  33%|███▎      | 164/490 [1:15:22<5:54:44, 65.29s/file]

 Saved translated file: 5387a301-0af8-4e24-a197-20189f87b9ef_3.csv

Processing file 165 of 490: 5387a301-0af8-4e24-a197-20189f87b9ef_4.csv


Translating OCR Files:  34%|███▎      | 165/490 [1:16:18<5:37:38, 62.33s/file]

 Saved translated file: 5387a301-0af8-4e24-a197-20189f87b9ef_4.csv

Processing file 166 of 490: 5387a301-0af8-4e24-a197-20189f87b9ef_5.csv


Translating OCR Files:  34%|███▍      | 166/490 [1:17:01<5:06:20, 56.73s/file]

 Saved translated file: 5387a301-0af8-4e24-a197-20189f87b9ef_5.csv

Processing file 167 of 490: 5387a301-0af8-4e24-a197-20189f87b9ef_6.csv


Translating OCR Files:  34%|███▍      | 167/490 [1:17:33<4:24:31, 49.14s/file]

 Saved translated file: 5387a301-0af8-4e24-a197-20189f87b9ef_6.csv

Processing file 168 of 490: 5387a301-0af8-4e24-a197-20189f87b9ef_7.csv


Translating OCR Files:  34%|███▍      | 168/490 [1:18:13<4:09:10, 46.43s/file]

 Saved translated file: 5387a301-0af8-4e24-a197-20189f87b9ef_7.csv

Processing file 169 of 490: 5387a301-0af8-4e24-a197-20189f87b9ef_8.csv


Translating OCR Files:  34%|███▍      | 169/490 [1:19:04<4:16:29, 47.94s/file]

 Saved translated file: 5387a301-0af8-4e24-a197-20189f87b9ef_8.csv

Processing file 170 of 490: 5387a301-0af8-4e24-a197-20189f87b9ef_9.csv


Translating OCR Files:  35%|███▍      | 170/490 [1:19:15<3:16:11, 36.79s/file]

 Saved translated file: 5387a301-0af8-4e24-a197-20189f87b9ef_9.csv

Processing file 171 of 490: 547aba02-6757-49c1-acb5-6df217cebfc7_0.csv


Translating OCR Files:  35%|███▍      | 171/490 [1:19:52<3:15:11, 36.71s/file]

 Saved translated file: 547aba02-6757-49c1-acb5-6df217cebfc7_0.csv

Processing file 172 of 490: 547aba02-6757-49c1-acb5-6df217cebfc7_1.csv


Translating OCR Files:  35%|███▌      | 172/490 [1:20:22<3:03:52, 34.69s/file]

 Saved translated file: 547aba02-6757-49c1-acb5-6df217cebfc7_1.csv

Processing file 173 of 490: 547aba02-6757-49c1-acb5-6df217cebfc7_2.csv


Translating OCR Files:  35%|███▌      | 173/490 [1:20:40<2:37:20, 29.78s/file]

 Saved translated file: 547aba02-6757-49c1-acb5-6df217cebfc7_2.csv

Processing file 174 of 490: 547aba02-6757-49c1-acb5-6df217cebfc7_3.csv


Translating OCR Files:  36%|███▌      | 174/490 [1:21:10<2:36:44, 29.76s/file]

 Saved translated file: 547aba02-6757-49c1-acb5-6df217cebfc7_3.csv

Processing file 175 of 490: 54990932-71af-48dd-9a7a-2617b1407c54_0.csv


Translating OCR Files:  36%|███▌      | 175/490 [1:22:14<3:31:10, 40.22s/file]

 Saved translated file: 54990932-71af-48dd-9a7a-2617b1407c54_0.csv

Processing file 176 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_0.csv


Translating OCR Files:  36%|███▌      | 176/490 [1:22:33<2:56:35, 33.74s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_0.csv

Processing file 177 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_1.csv


Translating OCR Files:  36%|███▌      | 177/490 [1:22:50<2:29:21, 28.63s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_1.csv

Processing file 178 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_10.csv


Translating OCR Files:  36%|███▋      | 178/490 [1:23:14<2:21:57, 27.30s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_10.csv

Processing file 179 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_11.csv


Translating OCR Files:  37%|███▋      | 179/490 [1:23:31<2:06:22, 24.38s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_11.csv

Processing file 180 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_12.csv


Translating OCR Files:  37%|███▋      | 180/490 [1:24:17<2:38:36, 30.70s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_12.csv

Processing file 181 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_13.csv


Translating OCR Files:  37%|███▋      | 181/490 [1:24:45<2:34:54, 30.08s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_13.csv

Processing file 182 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_14.csv


Translating OCR Files:  37%|███▋      | 182/490 [1:24:50<1:54:35, 22.32s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_14.csv

Processing file 183 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_15.csv


Translating OCR Files:  37%|███▋      | 183/490 [1:25:52<2:55:55, 34.38s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_15.csv

Processing file 184 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_16.csv


Translating OCR Files:  38%|███▊      | 184/490 [1:26:37<3:10:53, 37.43s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_16.csv

Processing file 185 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_17.csv


Translating OCR Files:  38%|███▊      | 185/490 [1:26:49<2:31:42, 29.84s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_17.csv

Processing file 186 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_18.csv


Translating OCR Files:  38%|███▊      | 186/490 [1:26:56<1:57:27, 23.18s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_18.csv

Processing file 187 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_19.csv


Translating OCR Files:  38%|███▊      | 187/490 [1:27:25<2:05:01, 24.76s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_19.csv

Processing file 188 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_2.csv


Translating OCR Files:  38%|███▊      | 188/490 [1:27:26<1:29:32, 17.79s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_2.csv

Processing file 189 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_3.csv


Translating OCR Files:  39%|███▊      | 189/490 [1:27:36<1:17:08, 15.38s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_3.csv

Processing file 190 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_4.csv


Translating OCR Files:  39%|███▉      | 190/490 [1:27:38<56:00, 11.20s/file]  

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_4.csv

Processing file 191 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_5.csv


Translating OCR Files:  39%|███▉      | 191/490 [1:27:45<50:29, 10.13s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_5.csv

Processing file 192 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_6.csv


Translating OCR Files:  39%|███▉      | 192/490 [1:28:07<1:07:47, 13.65s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_6.csv

Processing file 193 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_7.csv


Translating OCR Files:  39%|███▉      | 193/490 [1:28:31<1:22:10, 16.60s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_7.csv

Processing file 194 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_8.csv


Translating OCR Files:  40%|███▉      | 194/490 [1:29:02<1:43:10, 20.91s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_8.csv

Processing file 195 of 490: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_9.csv


Translating OCR Files:  40%|███▉      | 195/490 [1:29:24<1:45:39, 21.49s/file]

 Saved translated file: 585875ff-f8c5-4a02-acd7-fef37dc9ff11_9.csv

Processing file 196 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_0.csv


Translating OCR Files:  40%|████      | 196/490 [1:29:28<1:19:34, 16.24s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_0.csv

Processing file 197 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_1.csv


Translating OCR Files:  40%|████      | 197/490 [1:29:35<1:05:39, 13.45s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_1.csv

Processing file 198 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_10.csv


Translating OCR Files:  40%|████      | 198/490 [1:29:52<1:09:31, 14.29s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_10.csv

Processing file 199 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_11.csv


Translating OCR Files:  41%|████      | 199/490 [1:30:10<1:15:02, 15.47s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_11.csv

Processing file 200 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_12.csv


Translating OCR Files:  41%|████      | 200/490 [1:30:29<1:20:06, 16.57s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_12.csv

Processing file 201 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_13.csv


Translating OCR Files:  41%|████      | 201/490 [1:30:59<1:39:20, 20.62s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_13.csv

Processing file 202 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_14.csv


Translating OCR Files:  41%|████      | 202/490 [1:32:05<2:44:14, 34.22s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_14.csv

Processing file 203 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_15.csv


Translating OCR Files:  41%|████▏     | 203/490 [1:32:46<2:53:31, 36.28s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_15.csv

Processing file 204 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_16.csv


Translating OCR Files:  42%|████▏     | 204/490 [1:32:50<2:06:04, 26.45s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_16.csv

Processing file 205 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_17.csv


Translating OCR Files:  42%|████▏     | 205/490 [1:33:54<2:59:03, 37.70s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_17.csv

Processing file 206 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_18.csv


Translating OCR Files:  42%|████▏     | 206/490 [1:34:36<3:04:32, 38.99s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_18.csv

Processing file 207 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_19.csv


Translating OCR Files:  42%|████▏     | 207/490 [1:34:51<2:31:13, 32.06s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_19.csv

Processing file 208 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_2.csv


Translating OCR Files:  42%|████▏     | 208/490 [1:35:12<2:14:48, 28.68s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_2.csv

Processing file 209 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_20.csv


Translating OCR Files:  43%|████▎     | 209/490 [1:36:14<3:01:13, 38.70s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_20.csv

Processing file 210 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_21.csv


Translating OCR Files:  43%|████▎     | 210/490 [1:37:43<4:10:04, 53.59s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_21.csv

Processing file 211 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_22.csv


Translating OCR Files:  43%|████▎     | 211/490 [1:38:29<3:59:09, 51.43s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_22.csv

Processing file 212 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_23.csv


Translating OCR Files:  43%|████▎     | 212/490 [1:39:23<4:01:47, 52.19s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_23.csv

Processing file 213 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_24.csv


Translating OCR Files:  43%|████▎     | 213/490 [1:39:43<3:16:18, 42.52s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_24.csv

Processing file 214 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_25.csv


Translating OCR Files:  44%|████▎     | 214/490 [1:40:22<3:10:58, 41.52s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_25.csv

Processing file 215 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_26.csv


Translating OCR Files:  44%|████▍     | 215/490 [1:40:31<2:25:24, 31.73s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_26.csv

Processing file 216 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_27.csv


Translating OCR Files:  44%|████▍     | 216/490 [1:40:38<1:51:39, 24.45s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_27.csv

Processing file 217 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_28.csv


Translating OCR Files:  44%|████▍     | 217/490 [1:41:39<2:40:05, 35.19s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_28.csv

Processing file 218 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_29.csv


Translating OCR Files:  44%|████▍     | 218/490 [1:42:55<3:34:46, 47.38s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_29.csv

Processing file 219 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_3.csv


Translating OCR Files:  45%|████▍     | 219/490 [1:44:15<4:18:28, 57.23s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_3.csv

Processing file 220 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_30.csv


Translating OCR Files:  45%|████▍     | 220/490 [1:45:00<4:01:26, 53.65s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_30.csv

Processing file 221 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_31.csv


Translating OCR Files:  45%|████▌     | 221/490 [1:45:57<4:04:20, 54.50s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_31.csv

Processing file 222 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_32.csv


Translating OCR Files:  45%|████▌     | 222/490 [1:46:15<3:14:59, 43.66s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_32.csv

Processing file 223 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_33.csv


Translating OCR Files:  46%|████▌     | 223/490 [1:47:24<3:47:36, 51.15s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_33.csv

Processing file 224 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_34.csv


Translating OCR Files:  46%|████▌     | 224/490 [1:48:39<4:19:22, 58.50s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_34.csv

Processing file 225 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_35.csv


Translating OCR Files:  46%|████▌     | 225/490 [1:49:57<4:43:18, 64.15s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_35.csv

Processing file 226 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_36.csv


Translating OCR Files:  46%|████▌     | 226/490 [1:51:27<5:16:33, 71.94s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_36.csv

Processing file 227 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_37.csv


Translating OCR Files:  46%|████▋     | 227/490 [1:52:16<4:45:14, 65.08s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_37.csv

Processing file 228 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_38.csv


Translating OCR Files:  47%|████▋     | 228/490 [1:53:14<4:35:54, 63.18s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_38.csv

Processing file 229 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_39.csv


Translating OCR Files:  47%|████▋     | 229/490 [1:54:19<4:36:03, 63.46s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_39.csv

Processing file 230 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_4.csv


Translating OCR Files:  47%|████▋     | 230/490 [1:55:36<4:53:43, 67.78s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_4.csv

Processing file 231 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_40.csv


Translating OCR Files:  47%|████▋     | 231/490 [1:56:27<4:30:46, 62.73s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_40.csv

Processing file 232 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_41.csv


Translating OCR Files:  47%|████▋     | 232/490 [1:57:26<4:24:50, 61.59s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_41.csv

Processing file 233 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_42.csv


Translating OCR Files:  48%|████▊     | 233/490 [1:58:11<4:02:13, 56.55s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_42.csv

Processing file 234 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_43.csv


Translating OCR Files:  48%|████▊     | 234/490 [1:59:26<4:25:04, 62.13s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_43.csv

Processing file 235 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_5.csv


Translating OCR Files:  48%|████▊     | 235/490 [2:00:18<4:10:20, 58.90s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_5.csv

Processing file 236 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_6.csv


Translating OCR Files:  48%|████▊     | 236/490 [2:01:22<4:15:46, 60.42s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_6.csv

Processing file 237 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_7.csv


Translating OCR Files:  48%|████▊     | 237/490 [2:02:00<3:46:20, 53.68s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_7.csv

Processing file 238 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_8.csv


Translating OCR Files:  49%|████▊     | 238/490 [2:02:24<3:08:43, 44.94s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_8.csv

Processing file 239 of 490: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_9.csv


Translating OCR Files:  49%|████▉     | 239/490 [2:02:46<2:39:06, 38.03s/file]

 Saved translated file: 5a6b122c-39c1-4581-8c1f-2d6f36a9f8a0_9.csv

Processing file 240 of 490: 5a84cde3-7175-4044-8c88-d4c883a8fd38.csv


Translating OCR Files:  49%|████▉     | 240/490 [2:03:01<2:09:46, 31.15s/file]

 Saved translated file: 5a84cde3-7175-4044-8c88-d4c883a8fd38.csv

Processing file 241 of 490: 5ae9bdca-fdf9-4948-8c11-a9e400b331aa.csv


Translating OCR Files:  49%|████▉     | 241/490 [2:03:08<1:39:31, 23.98s/file]

 Saved translated file: 5ae9bdca-fdf9-4948-8c11-a9e400b331aa.csv

Processing file 242 of 490: 5d4e3e02-1dfc-469e-8af9-8dbe2b9f1564.csv


Translating OCR Files:  49%|████▉     | 242/490 [2:04:22<2:40:14, 38.77s/file]

 Saved translated file: 5d4e3e02-1dfc-469e-8af9-8dbe2b9f1564.csv

Processing file 243 of 490: 5e5bd90e-60c5-402f-b488-750456a81a13_0.csv


Translating OCR Files:  50%|████▉     | 243/490 [2:04:25<1:56:27, 28.29s/file]

 Saved translated file: 5e5bd90e-60c5-402f-b488-750456a81a13_0.csv

Processing file 244 of 490: 5e5bd90e-60c5-402f-b488-750456a81a13_1.csv


Translating OCR Files:  50%|████▉     | 244/490 [2:04:27<1:23:03, 20.26s/file]

 Saved translated file: 5e5bd90e-60c5-402f-b488-750456a81a13_1.csv

Processing file 245 of 490: 5e5bd90e-60c5-402f-b488-750456a81a13_10.csv


Translating OCR Files:  50%|█████     | 245/490 [2:05:01<1:39:03, 24.26s/file]

 Saved translated file: 5e5bd90e-60c5-402f-b488-750456a81a13_10.csv

Processing file 246 of 490: 5e5bd90e-60c5-402f-b488-750456a81a13_11.csv


Translating OCR Files:  50%|█████     | 246/490 [2:05:19<1:31:20, 22.46s/file]

 Saved translated file: 5e5bd90e-60c5-402f-b488-750456a81a13_11.csv

Processing file 247 of 490: 5e5bd90e-60c5-402f-b488-750456a81a13_12.csv


Translating OCR Files:  50%|█████     | 247/490 [2:05:35<1:22:48, 20.45s/file]

 Saved translated file: 5e5bd90e-60c5-402f-b488-750456a81a13_12.csv

Processing file 248 of 490: 5e5bd90e-60c5-402f-b488-750456a81a13_2.csv


Translating OCR Files:  51%|█████     | 248/490 [2:06:01<1:29:44, 22.25s/file]

 Saved translated file: 5e5bd90e-60c5-402f-b488-750456a81a13_2.csv

Processing file 249 of 490: 5e5bd90e-60c5-402f-b488-750456a81a13_3.csv


Translating OCR Files:  51%|█████     | 249/490 [2:07:09<2:24:07, 35.88s/file]

 Saved translated file: 5e5bd90e-60c5-402f-b488-750456a81a13_3.csv

Processing file 250 of 490: 5e5bd90e-60c5-402f-b488-750456a81a13_4.csv


Translating OCR Files:  51%|█████     | 250/490 [2:07:35<2:12:23, 33.10s/file]

 Saved translated file: 5e5bd90e-60c5-402f-b488-750456a81a13_4.csv

Processing file 251 of 490: 5e5bd90e-60c5-402f-b488-750456a81a13_5.csv


Translating OCR Files:  51%|█████     | 251/490 [2:07:58<1:59:01, 29.88s/file]

 Saved translated file: 5e5bd90e-60c5-402f-b488-750456a81a13_5.csv

Processing file 252 of 490: 5e5bd90e-60c5-402f-b488-750456a81a13_6.csv


Translating OCR Files:  51%|█████▏    | 252/490 [2:08:47<2:21:56, 35.78s/file]

 Saved translated file: 5e5bd90e-60c5-402f-b488-750456a81a13_6.csv

Processing file 253 of 490: 5e5bd90e-60c5-402f-b488-750456a81a13_7.csv


Translating OCR Files:  52%|█████▏    | 253/490 [2:09:13<2:09:31, 32.79s/file]

 Saved translated file: 5e5bd90e-60c5-402f-b488-750456a81a13_7.csv

Processing file 254 of 490: 5e5bd90e-60c5-402f-b488-750456a81a13_8.csv


Translating OCR Files:  52%|█████▏    | 254/490 [2:09:44<2:06:13, 32.09s/file]

 Saved translated file: 5e5bd90e-60c5-402f-b488-750456a81a13_8.csv

Processing file 255 of 490: 5e5bd90e-60c5-402f-b488-750456a81a13_9.csv


Translating OCR Files:  52%|█████▏    | 255/490 [2:10:27<2:19:00, 35.49s/file]

 Saved translated file: 5e5bd90e-60c5-402f-b488-750456a81a13_9.csv

Processing file 256 of 490: 5ef1d666-e19d-4570-b800-6693a4f680ee.csv


Translating OCR Files:  52%|█████▏    | 256/490 [2:11:00<2:16:02, 34.88s/file]

 Saved translated file: 5ef1d666-e19d-4570-b800-6693a4f680ee.csv

Processing file 257 of 490: 62583414-9e32-4d09-8989-b5fa32a98a81.csv


Translating OCR Files:  52%|█████▏    | 257/490 [2:11:06<1:41:06, 26.04s/file]

 Saved translated file: 62583414-9e32-4d09-8989-b5fa32a98a81.csv

Processing file 258 of 490: 62ff30cf-de5f-4388-82aa-b69b0fd0f07c.csv


Translating OCR Files:  53%|█████▎    | 258/490 [2:11:14<1:20:30, 20.82s/file]

 Saved translated file: 62ff30cf-de5f-4388-82aa-b69b0fd0f07c.csv

Processing file 259 of 490: 645dfc97-3268-4e1d-920d-4138545456fa.csv


Translating OCR Files:  53%|█████▎    | 259/490 [2:11:39<1:24:55, 22.06s/file]

 Saved translated file: 645dfc97-3268-4e1d-920d-4138545456fa.csv

Processing file 260 of 490: 64bba692-d430-440c-9f1e-2575f45770af_0.csv


Translating OCR Files:  53%|█████▎    | 260/490 [2:11:44<1:04:44, 16.89s/file]

 Saved translated file: 64bba692-d430-440c-9f1e-2575f45770af_0.csv

Processing file 261 of 490: 64bba692-d430-440c-9f1e-2575f45770af_1.csv


Translating OCR Files:  53%|█████▎    | 261/490 [2:12:02<1:05:38, 17.20s/file]

 Saved translated file: 64bba692-d430-440c-9f1e-2575f45770af_1.csv

Processing file 262 of 490: 64bba692-d430-440c-9f1e-2575f45770af_10.csv


Translating OCR Files:  53%|█████▎    | 262/490 [2:12:18<1:04:16, 16.92s/file]

 Saved translated file: 64bba692-d430-440c-9f1e-2575f45770af_10.csv

Processing file 263 of 490: 64bba692-d430-440c-9f1e-2575f45770af_11.csv


Translating OCR Files:  54%|█████▎    | 263/490 [2:12:48<1:18:11, 20.67s/file]

 Saved translated file: 64bba692-d430-440c-9f1e-2575f45770af_11.csv

Processing file 264 of 490: 64bba692-d430-440c-9f1e-2575f45770af_12.csv


Translating OCR Files:  54%|█████▍    | 264/490 [2:13:13<1:22:58, 22.03s/file]

 Saved translated file: 64bba692-d430-440c-9f1e-2575f45770af_12.csv

Processing file 265 of 490: 64bba692-d430-440c-9f1e-2575f45770af_2.csv


Translating OCR Files:  54%|█████▍    | 265/490 [2:13:59<1:49:22, 29.17s/file]

 Saved translated file: 64bba692-d430-440c-9f1e-2575f45770af_2.csv

Processing file 266 of 490: 64bba692-d430-440c-9f1e-2575f45770af_3.csv


Translating OCR Files:  54%|█████▍    | 266/490 [2:14:21<1:41:01, 27.06s/file]

 Saved translated file: 64bba692-d430-440c-9f1e-2575f45770af_3.csv

Processing file 267 of 490: 64bba692-d430-440c-9f1e-2575f45770af_4.csv


Translating OCR Files:  54%|█████▍    | 267/490 [2:14:35<1:25:39, 23.05s/file]

 Saved translated file: 64bba692-d430-440c-9f1e-2575f45770af_4.csv

Processing file 268 of 490: 64bba692-d430-440c-9f1e-2575f45770af_5.csv


Translating OCR Files:  55%|█████▍    | 268/490 [2:14:44<1:10:21, 19.02s/file]

 Saved translated file: 64bba692-d430-440c-9f1e-2575f45770af_5.csv

Processing file 269 of 490: 64bba692-d430-440c-9f1e-2575f45770af_6.csv


Translating OCR Files:  55%|█████▍    | 269/490 [2:15:11<1:18:19, 21.26s/file]

 Saved translated file: 64bba692-d430-440c-9f1e-2575f45770af_6.csv

Processing file 270 of 490: 64bba692-d430-440c-9f1e-2575f45770af_7.csv


Translating OCR Files:  55%|█████▌    | 270/490 [2:15:36<1:21:53, 22.33s/file]

 Saved translated file: 64bba692-d430-440c-9f1e-2575f45770af_7.csv

Processing file 271 of 490: 64bba692-d430-440c-9f1e-2575f45770af_8.csv


Translating OCR Files:  55%|█████▌    | 271/490 [2:15:54<1:17:27, 21.22s/file]

 Saved translated file: 64bba692-d430-440c-9f1e-2575f45770af_8.csv

Processing file 272 of 490: 64bba692-d430-440c-9f1e-2575f45770af_9.csv


Translating OCR Files:  56%|█████▌    | 272/490 [2:16:08<1:09:02, 19.00s/file]

 Saved translated file: 64bba692-d430-440c-9f1e-2575f45770af_9.csv

Processing file 273 of 490: 6848748d-2881-4c26-b153-fcd5373d2f1c.csv


Translating OCR Files:  56%|█████▌    | 273/490 [2:16:37<1:20:00, 22.12s/file]

 Saved translated file: 6848748d-2881-4c26-b153-fcd5373d2f1c.csv

Processing file 274 of 490: 6bcc0131-e4ad-421e-bb1f-d8ebe5eeec7b.csv


Translating OCR Files:  56%|█████▌    | 274/490 [2:16:40<58:09, 16.15s/file]  

 Saved translated file: 6bcc0131-e4ad-421e-bb1f-d8ebe5eeec7b.csv

Processing file 275 of 490: 6cbb3eeb-17e9-4af6-8da1-36eb6437f7bc.csv


Translating OCR Files:  56%|█████▌    | 275/490 [2:16:59<1:01:02, 17.03s/file]

 Saved translated file: 6cbb3eeb-17e9-4af6-8da1-36eb6437f7bc.csv

Processing file 276 of 490: 6d7fc7b3-c892-4cb5-bd4b-a5713c089d88_0.csv


Translating OCR Files:  56%|█████▋    | 276/490 [2:17:54<1:41:59, 28.60s/file]

 Saved translated file: 6d7fc7b3-c892-4cb5-bd4b-a5713c089d88_0.csv

Processing file 277 of 490: 6e9aced1-df28-4e57-b7c8-641609ff4450.csv


Translating OCR Files:  57%|█████▋    | 277/490 [2:18:29<1:48:18, 30.51s/file]

 Saved translated file: 6e9aced1-df28-4e57-b7c8-641609ff4450.csv

Processing file 278 of 490: 70c63791-2797-4bf0-a778-ea08819aa9de.csv


Translating OCR Files:  57%|█████▋    | 278/490 [2:18:50<1:36:53, 27.42s/file]

 Saved translated file: 70c63791-2797-4bf0-a778-ea08819aa9de.csv

Processing file 279 of 490: 7150f512-e7a2-4f2c-86bc-58b671b25ba9.csv


Translating OCR Files:  57%|█████▋    | 279/490 [2:18:56<1:14:06, 21.07s/file]

 Saved translated file: 7150f512-e7a2-4f2c-86bc-58b671b25ba9.csv

Processing file 280 of 490: 785cc8c9-1225-4f93-b633-349bc5113512.csv


Translating OCR Files:  57%|█████▋    | 280/490 [2:19:03<59:09, 16.90s/file]  

 Saved translated file: 785cc8c9-1225-4f93-b633-349bc5113512.csv

Processing file 281 of 490: 79d9b7f2-cfe4-4615-9b75-8fea33fc0c9d.csv


Translating OCR Files:  57%|█████▋    | 281/490 [2:19:08<46:08, 13.25s/file]

 Saved translated file: 79d9b7f2-cfe4-4615-9b75-8fea33fc0c9d.csv

Processing file 282 of 490: 912204cb-8ab7-48b8-9abf-d803f3804d08_0.csv


Translating OCR Files:  58%|█████▊    | 282/490 [2:19:12<37:00, 10.67s/file]

 Saved translated file: 912204cb-8ab7-48b8-9abf-d803f3804d08_0.csv

Processing file 283 of 490: 912204cb-8ab7-48b8-9abf-d803f3804d08_1.csv


Translating OCR Files:  58%|█████▊    | 283/490 [2:19:17<31:02,  9.00s/file]

 Saved translated file: 912204cb-8ab7-48b8-9abf-d803f3804d08_1.csv

Processing file 284 of 490: 912204cb-8ab7-48b8-9abf-d803f3804d08_10.csv


Translating OCR Files:  58%|█████▊    | 284/490 [2:19:48<52:37, 15.33s/file]

 Saved translated file: 912204cb-8ab7-48b8-9abf-d803f3804d08_10.csv

Processing file 285 of 490: 912204cb-8ab7-48b8-9abf-d803f3804d08_11.csv


Translating OCR Files:  58%|█████▊    | 285/490 [2:20:32<1:22:27, 24.14s/file]

 Saved translated file: 912204cb-8ab7-48b8-9abf-d803f3804d08_11.csv

Processing file 286 of 490: 912204cb-8ab7-48b8-9abf-d803f3804d08_12.csv


Translating OCR Files:  58%|█████▊    | 286/490 [2:21:46<2:13:08, 39.16s/file]

 Saved translated file: 912204cb-8ab7-48b8-9abf-d803f3804d08_12.csv

Processing file 287 of 490: 912204cb-8ab7-48b8-9abf-d803f3804d08_2.csv


Translating OCR Files:  59%|█████▊    | 287/490 [2:23:12<2:59:20, 53.01s/file]

 Saved translated file: 912204cb-8ab7-48b8-9abf-d803f3804d08_2.csv

Processing file 288 of 490: 912204cb-8ab7-48b8-9abf-d803f3804d08_3.csv


Translating OCR Files:  59%|█████▉    | 288/490 [2:24:12<3:05:28, 55.09s/file]

 Saved translated file: 912204cb-8ab7-48b8-9abf-d803f3804d08_3.csv

Processing file 289 of 490: 912204cb-8ab7-48b8-9abf-d803f3804d08_4.csv


Translating OCR Files:  59%|█████▉    | 289/490 [2:24:29<2:26:28, 43.72s/file]

 Saved translated file: 912204cb-8ab7-48b8-9abf-d803f3804d08_4.csv

Processing file 290 of 490: 912204cb-8ab7-48b8-9abf-d803f3804d08_5.csv


Translating OCR Files:  59%|█████▉    | 290/490 [2:24:56<2:08:46, 38.63s/file]

 Saved translated file: 912204cb-8ab7-48b8-9abf-d803f3804d08_5.csv

Processing file 291 of 490: 912204cb-8ab7-48b8-9abf-d803f3804d08_6.csv


Translating OCR Files:  59%|█████▉    | 291/490 [2:25:37<2:10:51, 39.46s/file]

 Saved translated file: 912204cb-8ab7-48b8-9abf-d803f3804d08_6.csv

Processing file 292 of 490: 912204cb-8ab7-48b8-9abf-d803f3804d08_7.csv


Translating OCR Files:  60%|█████▉    | 292/490 [2:25:47<1:41:22, 30.72s/file]

 Saved translated file: 912204cb-8ab7-48b8-9abf-d803f3804d08_7.csv

Processing file 293 of 490: 912204cb-8ab7-48b8-9abf-d803f3804d08_8.csv


Translating OCR Files:  60%|█████▉    | 293/490 [2:25:58<1:20:56, 24.65s/file]

 Saved translated file: 912204cb-8ab7-48b8-9abf-d803f3804d08_8.csv

Processing file 294 of 490: 912204cb-8ab7-48b8-9abf-d803f3804d08_9.csv


Translating OCR Files:  60%|██████    | 294/490 [2:26:12<1:10:35, 21.61s/file]

 Saved translated file: 912204cb-8ab7-48b8-9abf-d803f3804d08_9.csv

Processing file 295 of 490: 94b16e53-f035-4aa9-a76e-80bc6e936d10.csv


Translating OCR Files:  60%|██████    | 295/490 [2:26:16<53:07, 16.34s/file]  

 Saved translated file: 94b16e53-f035-4aa9-a76e-80bc6e936d10.csv

Processing file 296 of 490: 96af60b3-299c-4e26-bca3-d9eb3e113b94.csv


Translating OCR Files:  60%|██████    | 296/490 [2:26:42<1:01:19, 18.97s/file]

 Saved translated file: 96af60b3-299c-4e26-bca3-d9eb3e113b94.csv

Processing file 297 of 490: 987ba39a-cc1c-4367-8d6d-f5a49a940198.csv


Translating OCR Files:  61%|██████    | 297/490 [2:26:47<48:22, 15.04s/file]  

 Saved translated file: 987ba39a-cc1c-4367-8d6d-f5a49a940198.csv

Processing file 298 of 490: 9a8077f5-ac41-491f-b192-6b4609324bda.csv


Translating OCR Files:  61%|██████    | 298/490 [2:26:50<35:59, 11.25s/file]

 Saved translated file: 9a8077f5-ac41-491f-b192-6b4609324bda.csv

Processing file 299 of 490: 9c8c9989-2293-4e68-9ffe-6f7a5f14562f.csv


Translating OCR Files:  61%|██████    | 299/490 [2:26:53<28:19,  8.90s/file]

 Saved translated file: 9c8c9989-2293-4e68-9ffe-6f7a5f14562f.csv

Processing file 300 of 490: 9d7bc879-3250-4013-ac04-5ff9bd6dff40_0.csv


Translating OCR Files:  61%|██████    | 300/490 [2:27:48<1:11:49, 22.68s/file]

 Saved translated file: 9d7bc879-3250-4013-ac04-5ff9bd6dff40_0.csv

Processing file 301 of 490: 9d7bc879-3250-4013-ac04-5ff9bd6dff40_1.csv


Translating OCR Files:  61%|██████▏   | 301/490 [2:28:02<1:03:23, 20.13s/file]

 Saved translated file: 9d7bc879-3250-4013-ac04-5ff9bd6dff40_1.csv

Processing file 302 of 490: 9fd06037-11f1-4ad5-9a7d-cbfb3fa4193b_0.csv


Translating OCR Files:  62%|██████▏   | 302/490 [2:28:22<1:02:58, 20.10s/file]

 Saved translated file: 9fd06037-11f1-4ad5-9a7d-cbfb3fa4193b_0.csv

Processing file 303 of 490: 9fd06037-11f1-4ad5-9a7d-cbfb3fa4193b_1.csv


Translating OCR Files:  62%|██████▏   | 303/490 [2:29:19<1:36:42, 31.03s/file]

 Saved translated file: 9fd06037-11f1-4ad5-9a7d-cbfb3fa4193b_1.csv

Processing file 304 of 490: 9fd06037-11f1-4ad5-9a7d-cbfb3fa4193b_2.csv


Translating OCR Files:  62%|██████▏   | 304/490 [2:29:28<1:15:45, 24.44s/file]

 Saved translated file: 9fd06037-11f1-4ad5-9a7d-cbfb3fa4193b_2.csv

Processing file 305 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_0.csv


Translating OCR Files:  62%|██████▏   | 305/490 [2:29:32<57:01, 18.49s/file]  

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_0.csv

Processing file 306 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_1.csv


Translating OCR Files:  62%|██████▏   | 306/490 [2:29:45<50:47, 16.56s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_1.csv

Processing file 307 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_10.csv


Translating OCR Files:  63%|██████▎   | 307/490 [2:29:59<48:24, 15.87s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_10.csv

Processing file 308 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_11.csv


Translating OCR Files:  63%|██████▎   | 308/490 [2:30:23<55:49, 18.40s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_11.csv

Processing file 309 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_12.csv


Translating OCR Files:  63%|██████▎   | 309/490 [2:30:39<53:19, 17.68s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_12.csv

Processing file 310 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_13.csv


Translating OCR Files:  63%|██████▎   | 310/490 [2:30:57<52:55, 17.64s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_13.csv

Processing file 311 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_14.csv


Translating OCR Files:  63%|██████▎   | 311/490 [2:31:25<1:02:01, 20.79s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_14.csv

Processing file 312 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_15.csv


Translating OCR Files:  64%|██████▎   | 312/490 [2:31:49<1:04:49, 21.85s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_15.csv

Processing file 313 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_16.csv


Translating OCR Files:  64%|██████▍   | 313/490 [2:32:37<1:27:42, 29.73s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_16.csv

Processing file 314 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_17.csv


Translating OCR Files:  64%|██████▍   | 314/490 [2:34:07<2:20:22, 47.85s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_17.csv

Processing file 315 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_18.csv


Translating OCR Files:  64%|██████▍   | 315/490 [2:34:30<1:57:04, 40.14s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_18.csv

Processing file 316 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_2.csv


Translating OCR Files:  64%|██████▍   | 316/490 [2:34:45<1:34:41, 32.65s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_2.csv

Processing file 317 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_3.csv


Translating OCR Files:  65%|██████▍   | 317/490 [2:36:04<2:14:29, 46.64s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_3.csv

Processing file 318 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_4.csv


Translating OCR Files:  65%|██████▍   | 318/490 [2:36:22<1:49:21, 38.15s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_4.csv

Processing file 319 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_5.csv


Translating OCR Files:  65%|██████▌   | 319/490 [2:37:14<2:00:07, 42.15s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_5.csv

Processing file 320 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_6.csv


Translating OCR Files:  65%|██████▌   | 320/490 [2:37:36<1:42:41, 36.24s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_6.csv

Processing file 321 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_7.csv


Translating OCR Files:  66%|██████▌   | 321/490 [2:38:09<1:38:57, 35.13s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_7.csv

Processing file 322 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_8.csv


Translating OCR Files:  66%|██████▌   | 322/490 [2:38:30<1:26:59, 31.07s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_8.csv

Processing file 323 of 490: 9fe6b262-9944-417d-a0c4-9f2de1de2994_9.csv


Translating OCR Files:  66%|██████▌   | 323/490 [2:39:04<1:28:59, 31.97s/file]

 Saved translated file: 9fe6b262-9944-417d-a0c4-9f2de1de2994_9.csv

Processing file 324 of 490: a1ba4d8b-f382-44c4-ac3f-746a44746bb4_0.csv


Translating OCR Files:  66%|██████▌   | 324/490 [2:40:34<2:16:04, 49.18s/file]

 Saved translated file: a1ba4d8b-f382-44c4-ac3f-746a44746bb4_0.csv

Processing file 325 of 490: a1ba4d8b-f382-44c4-ac3f-746a44746bb4_1.csv


Translating OCR Files:  66%|██████▋   | 325/490 [2:40:40<1:40:04, 36.39s/file]

 Saved translated file: a1ba4d8b-f382-44c4-ac3f-746a44746bb4_1.csv

Processing file 326 of 490: aa99f763-6849-4f6b-adf2-58f0cc2ed545.csv


Translating OCR Files:  67%|██████▋   | 326/490 [2:40:50<1:17:40, 28.42s/file]

 Saved translated file: aa99f763-6849-4f6b-adf2-58f0cc2ed545.csv

Processing file 327 of 490: adaf869e-920a-4a17-91bd-e2ef3125c10e.csv


Translating OCR Files:  67%|██████▋   | 327/490 [2:41:01<1:03:15, 23.29s/file]

 Saved translated file: adaf869e-920a-4a17-91bd-e2ef3125c10e.csv

Processing file 328 of 490: aedc6a39-7862-4bbc-99e7-780ab3980282_1_0.csv


Translating OCR Files:  67%|██████▋   | 328/490 [2:41:10<51:15, 18.98s/file]  

 Saved translated file: aedc6a39-7862-4bbc-99e7-780ab3980282_1_0.csv

Processing file 329 of 490: aedc6a39-7862-4bbc-99e7-780ab3980282_2_0.csv


Translating OCR Files:  67%|██████▋   | 329/490 [2:42:17<1:29:37, 33.40s/file]

 Saved translated file: aedc6a39-7862-4bbc-99e7-780ab3980282_2_0.csv

Processing file 330 of 490: aedc6a39-7862-4bbc-99e7-780ab3980282_2_1.csv


Translating OCR Files:  67%|██████▋   | 330/490 [2:42:19<1:03:29, 23.81s/file]

 Saved translated file: aedc6a39-7862-4bbc-99e7-780ab3980282_2_1.csv

Processing file 331 of 490: aedc6a39-7862-4bbc-99e7-780ab3980282_3_0.csv


Translating OCR Files:  68%|██████▊   | 331/490 [2:43:11<1:25:31, 32.27s/file]

 Saved translated file: aedc6a39-7862-4bbc-99e7-780ab3980282_3_0.csv

Processing file 332 of 490: aedc6a39-7862-4bbc-99e7-780ab3980282_4_0.csv


Translating OCR Files:  68%|██████▊   | 332/490 [2:44:41<2:10:41, 49.63s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: aedc6a39-7862-4bbc-99e7-780ab3980282_4_0.csv

Processing file 333 of 490: aedc6a39-7862-4bbc-99e7-780ab3980282_4_1.csv


Translating OCR Files:  68%|██████▊   | 333/490 [2:45:00<1:45:54, 40.48s/file]

 Saved translated file: aedc6a39-7862-4bbc-99e7-780ab3980282_4_1.csv

Processing file 334 of 490: af93eff8-2973-4746-9041-b2223016b117.csv


Translating OCR Files:  68%|██████▊   | 334/490 [2:45:12<1:22:34, 31.76s/file]

 Saved translated file: af93eff8-2973-4746-9041-b2223016b117.csv

Processing file 335 of 490: b0a4acaa-d768-4f6d-8e54-6d20f271bb7c.csv


Translating OCR Files:  68%|██████▊   | 335/490 [2:45:14<59:12, 22.92s/file]  

 Saved translated file: b0a4acaa-d768-4f6d-8e54-6d20f271bb7c.csv

Processing file 336 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_0.csv


Translating OCR Files:  69%|██████▊   | 336/490 [2:45:19<44:45, 17.44s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_0.csv

Processing file 337 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_1.csv


Translating OCR Files:  69%|██████▉   | 337/490 [2:45:32<41:09, 16.14s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_1.csv

Processing file 338 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_10.csv


Translating OCR Files:  69%|██████▉   | 338/490 [2:46:22<1:06:41, 26.33s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_10.csv

Processing file 339 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_11.csv


Translating OCR Files:  69%|██████▉   | 339/490 [2:47:12<1:24:38, 33.63s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_11.csv

Processing file 340 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_12.csv


Translating OCR Files:  69%|██████▉   | 340/490 [2:47:47<1:25:01, 34.01s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_12.csv

Processing file 341 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_13.csv


Translating OCR Files:  70%|██████▉   | 341/490 [2:48:34<1:33:51, 37.80s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_13.csv

Processing file 342 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_14.csv


Translating OCR Files:  70%|██████▉   | 342/490 [2:49:27<1:44:42, 42.45s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_14.csv

Processing file 343 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_15.csv


Translating OCR Files:  70%|███████   | 343/490 [2:50:01<1:37:31, 39.81s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_15.csv

Processing file 344 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_16.csv


Translating OCR Files:  70%|███████   | 344/490 [2:50:26<1:26:23, 35.50s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_16.csv

Processing file 345 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_17.csv


Translating OCR Files:  70%|███████   | 345/490 [2:50:57<1:22:15, 34.04s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_17.csv

Processing file 346 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_18.csv


Translating OCR Files:  71%|███████   | 346/490 [2:52:16<1:54:08, 47.56s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_18.csv

Processing file 347 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_19.csv


Translating OCR Files:  71%|███████   | 347/490 [2:52:34<1:32:18, 38.73s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_19.csv

Processing file 348 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_2.csv


Translating OCR Files:  71%|███████   | 348/490 [2:52:46<1:12:53, 30.80s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_2.csv

Processing file 349 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_20.csv


Translating OCR Files:  71%|███████   | 349/490 [2:53:30<1:21:07, 34.52s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_20.csv

Processing file 350 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_21.csv


Translating OCR Files:  71%|███████▏  | 350/490 [2:54:35<1:42:13, 43.81s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_21.csv

Processing file 351 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_22.csv


Translating OCR Files:  72%|███████▏  | 351/490 [2:55:28<1:48:01, 46.63s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_22.csv

Processing file 352 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_23.csv


Translating OCR Files:  72%|███████▏  | 352/490 [2:56:59<2:17:16, 59.68s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_23.csv

Processing file 353 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_24.csv


Translating OCR Files:  72%|███████▏  | 353/490 [2:57:43<2:06:01, 55.19s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_24.csv

Processing file 354 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_3.csv


Translating OCR Files:  72%|███████▏  | 354/490 [2:58:56<2:17:00, 60.45s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_3.csv

Processing file 355 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_4.csv


Translating OCR Files:  72%|███████▏  | 355/490 [3:00:12<2:26:18, 65.03s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_4.csv

Processing file 356 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_5.csv


Translating OCR Files:  73%|███████▎  | 356/490 [3:00:54<2:10:15, 58.32s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_5.csv

Processing file 357 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_6.csv


Translating OCR Files:  73%|███████▎  | 357/490 [3:02:24<2:30:04, 67.70s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_6.csv

Processing file 358 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_7.csv


Translating OCR Files:  73%|███████▎  | 358/490 [3:03:54<2:43:44, 74.43s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_7.csv

Processing file 359 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_8.csv


Translating OCR Files:  73%|███████▎  | 359/490 [3:05:06<2:40:46, 73.64s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_8.csv

Processing file 360 of 490: b3031e66-40b6-45e8-9bcd-891dc1a280da_9.csv


Translating OCR Files:  73%|███████▎  | 360/490 [3:06:17<2:37:42, 72.79s/file]

 Saved translated file: b3031e66-40b6-45e8-9bcd-891dc1a280da_9.csv

Processing file 361 of 490: b3ce4d51-6024-4b43-b0d2-d3faaf3c2879.csv


Translating OCR Files:  74%|███████▎  | 361/490 [3:06:43<2:06:24, 58.80s/file]

 Saved translated file: b3ce4d51-6024-4b43-b0d2-d3faaf3c2879.csv

Processing file 362 of 490: b6eb1b15-cf99-475c-921f-f06e5c1019d4.csv


Translating OCR Files:  74%|███████▍  | 362/490 [3:07:10<1:45:24, 49.41s/file]

 Saved translated file: b6eb1b15-cf99-475c-921f-f06e5c1019d4.csv

Processing file 363 of 490: b8b76b6d-a50e-4246-82ee-3c8a5dcd523e.csv


Translating OCR Files:  74%|███████▍  | 363/490 [3:07:21<1:20:11, 37.88s/file]

 Saved translated file: b8b76b6d-a50e-4246-82ee-3c8a5dcd523e.csv

Processing file 364 of 490: b8cea3b1-4dde-4438-9b1a-6faf690bbad0.csv


Translating OCR Files:  74%|███████▍  | 364/490 [3:07:25<58:12, 27.72s/file]  

 Saved translated file: b8cea3b1-4dde-4438-9b1a-6faf690bbad0.csv

Processing file 365 of 490: b9d9c584-5e21-4a49-952b-ffecca4eb91e.csv


Translating OCR Files:  74%|███████▍  | 365/490 [3:08:12<1:09:18, 33.27s/file]

 Saved translated file: b9d9c584-5e21-4a49-952b-ffecca4eb91e.csv

Processing file 366 of 490: bcad4fdf-3771-4873-92fa-23240654118a.csv


Translating OCR Files:  75%|███████▍  | 366/490 [3:08:28<58:02, 28.08s/file]  

 Saved translated file: bcad4fdf-3771-4873-92fa-23240654118a.csv

Processing file 367 of 490: c5f1d959-39d1-4176-9cb1-1fb6e8baedc3.csv


Translating OCR Files:  75%|███████▍  | 367/490 [3:08:49<53:42, 26.20s/file]

 Saved translated file: c5f1d959-39d1-4176-9cb1-1fb6e8baedc3.csv

Processing file 368 of 490: d410e4aa-fb52-4ed4-9078-4483267a02b3_0.csv


Translating OCR Files:  75%|███████▌  | 368/490 [3:10:08<1:25:14, 41.92s/file]

 Saved translated file: d410e4aa-fb52-4ed4-9078-4483267a02b3_0.csv

Processing file 369 of 490: d410e4aa-fb52-4ed4-9078-4483267a02b3_1.csv


Translating OCR Files:  75%|███████▌  | 369/490 [3:11:38<1:53:38, 56.35s/file]

 Saved translated file: d410e4aa-fb52-4ed4-9078-4483267a02b3_1.csv

Processing file 370 of 490: d410e4aa-fb52-4ed4-9078-4483267a02b3_2.csv


Translating OCR Files:  76%|███████▌  | 370/490 [3:13:02<2:09:34, 64.79s/file]

 Saved translated file: d410e4aa-fb52-4ed4-9078-4483267a02b3_2.csv

Processing file 371 of 490: d410e4aa-fb52-4ed4-9078-4483267a02b3_3.csv


Translating OCR Files:  76%|███████▌  | 371/490 [3:14:10<2:10:20, 65.72s/file]

 Saved translated file: d410e4aa-fb52-4ed4-9078-4483267a02b3_3.csv

Processing file 372 of 490: d410e4aa-fb52-4ed4-9078-4483267a02b3_4.csv


Translating OCR Files:  76%|███████▌  | 372/490 [3:14:31<1:42:58, 52.36s/file]

 Saved translated file: d410e4aa-fb52-4ed4-9078-4483267a02b3_4.csv

Processing file 373 of 490: d5ff8b65-db15-418a-b33e-169498d79110_0.csv


Translating OCR Files:  76%|███████▌  | 373/490 [3:14:36<1:13:51, 37.88s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_0.csv

Processing file 374 of 490: d5ff8b65-db15-418a-b33e-169498d79110_1.csv


Translating OCR Files:  76%|███████▋  | 374/490 [3:14:46<57:19, 29.65s/file]  

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_1.csv

Processing file 375 of 490: d5ff8b65-db15-418a-b33e-169498d79110_10.csv


Translating OCR Files:  77%|███████▋  | 375/490 [3:15:00<47:40, 24.87s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_10.csv

Processing file 376 of 490: d5ff8b65-db15-418a-b33e-169498d79110_11.csv


Translating OCR Files:  77%|███████▋  | 376/490 [3:15:21<44:56, 23.65s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_11.csv

Processing file 377 of 490: d5ff8b65-db15-418a-b33e-169498d79110_12.csv


Translating OCR Files:  77%|███████▋  | 377/490 [3:15:41<42:34, 22.60s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_12.csv

Processing file 378 of 490: d5ff8b65-db15-418a-b33e-169498d79110_13.csv


Translating OCR Files:  77%|███████▋  | 378/490 [3:15:53<36:31, 19.57s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_13.csv

Processing file 379 of 490: d5ff8b65-db15-418a-b33e-169498d79110_14.csv


Translating OCR Files:  77%|███████▋  | 379/490 [3:16:07<33:09, 17.92s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_14.csv

Processing file 380 of 490: d5ff8b65-db15-418a-b33e-169498d79110_15.csv


Translating OCR Files:  78%|███████▊  | 380/490 [3:16:22<30:49, 16.81s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_15.csv

Processing file 381 of 490: d5ff8b65-db15-418a-b33e-169498d79110_16.csv


Translating OCR Files:  78%|███████▊  | 381/490 [3:16:33<27:49, 15.32s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_16.csv

Processing file 382 of 490: d5ff8b65-db15-418a-b33e-169498d79110_17.csv


Translating OCR Files:  78%|███████▊  | 382/490 [3:16:45<25:44, 14.30s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_17.csv

Processing file 383 of 490: d5ff8b65-db15-418a-b33e-169498d79110_18.csv


Translating OCR Files:  78%|███████▊  | 383/490 [3:17:00<25:40, 14.40s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_18.csv

Processing file 384 of 490: d5ff8b65-db15-418a-b33e-169498d79110_19.csv


Translating OCR Files:  78%|███████▊  | 384/490 [3:17:30<33:43, 19.09s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_19.csv

Processing file 385 of 490: d5ff8b65-db15-418a-b33e-169498d79110_2.csv


Translating OCR Files:  79%|███████▊  | 385/490 [3:18:11<45:09, 25.81s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_2.csv

Processing file 386 of 490: d5ff8b65-db15-418a-b33e-169498d79110_20.csv


Translating OCR Files:  79%|███████▉  | 386/490 [3:18:57<54:57, 31.71s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_20.csv

Processing file 387 of 490: d5ff8b65-db15-418a-b33e-169498d79110_21.csv


Translating OCR Files:  79%|███████▉  | 387/490 [3:19:38<59:31, 34.68s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_21.csv

Processing file 388 of 490: d5ff8b65-db15-418a-b33e-169498d79110_3.csv


Translating OCR Files:  79%|███████▉  | 388/490 [3:20:59<1:22:28, 48.52s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_3.csv

Processing file 389 of 490: d5ff8b65-db15-418a-b33e-169498d79110_4.csv


Translating OCR Files:  79%|███████▉  | 389/490 [3:22:13<1:34:12, 55.96s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_4.csv

Processing file 390 of 490: d5ff8b65-db15-418a-b33e-169498d79110_5.csv


Translating OCR Files:  80%|███████▉  | 390/490 [3:22:46<1:21:47, 49.07s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_5.csv

Processing file 391 of 490: d5ff8b65-db15-418a-b33e-169498d79110_6.csv


Translating OCR Files:  80%|███████▉  | 391/490 [3:23:16<1:11:37, 43.41s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_6.csv

Processing file 392 of 490: d5ff8b65-db15-418a-b33e-169498d79110_7.csv


Translating OCR Files:  80%|████████  | 392/490 [3:23:40<1:01:16, 37.52s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_7.csv

Processing file 393 of 490: d5ff8b65-db15-418a-b33e-169498d79110_8.csv


Translating OCR Files:  80%|████████  | 393/490 [3:24:04<54:07, 33.48s/file]  

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_8.csv

Processing file 394 of 490: d5ff8b65-db15-418a-b33e-169498d79110_9.csv


Translating OCR Files:  80%|████████  | 394/490 [3:24:22<46:15, 28.91s/file]

 Saved translated file: d5ff8b65-db15-418a-b33e-169498d79110_9.csv

Processing file 395 of 490: dbc9c90e-a3e6-4d71-bb93-5fb8394095ac_0.csv


Translating OCR Files:  81%|████████  | 395/490 [3:24:34<37:58, 23.99s/file]

 Saved translated file: dbc9c90e-a3e6-4d71-bb93-5fb8394095ac_0.csv

Processing file 396 of 490: dd5b6a38-dc17-4122-a242-32006b381b3a.csv


Translating OCR Files:  81%|████████  | 396/490 [3:25:01<38:47, 24.76s/file]

 Saved translated file: dd5b6a38-dc17-4122-a242-32006b381b3a.csv

Processing file 397 of 490: de359f8d-0745-4a93-959a-d1a6c361e326.csv


Translating OCR Files:  81%|████████  | 397/490 [3:25:13<32:40, 21.08s/file]

 Saved translated file: de359f8d-0745-4a93-959a-d1a6c361e326.csv

Processing file 398 of 490: e07a9457-86f1-4f0f-86d7-8ea816b8d8d3.csv


Translating OCR Files:  81%|████████  | 398/490 [3:25:23<26:53, 17.53s/file]

 Saved translated file: e07a9457-86f1-4f0f-86d7-8ea816b8d8d3.csv

Processing file 399 of 490: e182d867-dc18-43fd-a418-26dcf784242f_1_0.csv


Translating OCR Files:  81%|████████▏ | 399/490 [3:26:38<52:40, 34.73s/file]

 Saved translated file: e182d867-dc18-43fd-a418-26dcf784242f_1_0.csv

Processing file 400 of 490: e182d867-dc18-43fd-a418-26dcf784242f_1_1.csv


Translating OCR Files:  82%|████████▏ | 400/490 [3:27:39<1:04:10, 42.78s/file]

 Saved translated file: e182d867-dc18-43fd-a418-26dcf784242f_1_1.csv

Processing file 401 of 490: e182d867-dc18-43fd-a418-26dcf784242f_1_2.csv


Translating OCR Files:  82%|████████▏ | 401/490 [3:29:09<1:24:31, 56.98s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: e182d867-dc18-43fd-a418-26dcf784242f_1_2.csv

Processing file 402 of 490: e182d867-dc18-43fd-a418-26dcf784242f_1_3.csv


Translating OCR Files:  82%|████████▏ | 402/490 [3:30:29<1:33:28, 63.74s/file]

 Saved translated file: e182d867-dc18-43fd-a418-26dcf784242f_1_3.csv

Processing file 403 of 490: e182d867-dc18-43fd-a418-26dcf784242f_1_4.csv


Translating OCR Files:  82%|████████▏ | 403/490 [3:31:59<1:43:53, 71.65s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: e182d867-dc18-43fd-a418-26dcf784242f_1_4.csv

Processing file 404 of 490: e182d867-dc18-43fd-a418-26dcf784242f_2_0.csv


Translating OCR Files:  82%|████████▏ | 404/490 [3:33:29<1:50:38, 77.20s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: e182d867-dc18-43fd-a418-26dcf784242f_2_0.csv

Processing file 405 of 490: e182d867-dc18-43fd-a418-26dcf784242f_2_1.csv


Translating OCR Files:  83%|████████▎ | 405/490 [3:34:59<1:54:51, 81.08s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: e182d867-dc18-43fd-a418-26dcf784242f_2_1.csv

Processing file 406 of 490: e182d867-dc18-43fd-a418-26dcf784242f_2_2.csv


Translating OCR Files:  83%|████████▎ | 406/490 [3:36:29<1:57:18, 83.79s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: e182d867-dc18-43fd-a418-26dcf784242f_2_2.csv

Processing file 407 of 490: e182d867-dc18-43fd-a418-26dcf784242f_2_3.csv


Translating OCR Files:  83%|████████▎ | 407/490 [3:36:31<1:22:00, 59.29s/file]

 Saved translated file: e182d867-dc18-43fd-a418-26dcf784242f_2_3.csv

Processing file 408 of 490: e182d867-dc18-43fd-a418-26dcf784242f_3_0.csv


Translating OCR Files:  83%|████████▎ | 408/490 [3:37:26<1:19:05, 57.87s/file]

 Saved translated file: e182d867-dc18-43fd-a418-26dcf784242f_3_0.csv

Processing file 409 of 490: e705d192-90ee-4fd1-9dcd-061958d1817f.csv


Translating OCR Files:  83%|████████▎ | 409/490 [3:37:29<55:46, 41.32s/file]  

 Saved translated file: e705d192-90ee-4fd1-9dcd-061958d1817f.csv

Processing file 410 of 490: eda5b003-9250-4913-b724-74cca86240af_0.csv


Translating OCR Files:  84%|████████▎ | 410/490 [3:37:36<41:36, 31.21s/file]

 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_0.csv

Processing file 411 of 490: eda5b003-9250-4913-b724-74cca86240af_1.csv


Translating OCR Files:  84%|████████▍ | 411/490 [3:37:39<29:54, 22.72s/file]

 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_1.csv

Processing file 412 of 490: eda5b003-9250-4913-b724-74cca86240af_10.csv


Translating OCR Files:  84%|████████▍ | 412/490 [3:38:26<38:46, 29.83s/file]

 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_10.csv

Processing file 413 of 490: eda5b003-9250-4913-b724-74cca86240af_11.csv


Translating OCR Files:  84%|████████▍ | 413/490 [3:39:04<41:31, 32.35s/file]

 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_11.csv

Processing file 414 of 490: eda5b003-9250-4913-b724-74cca86240af_12.csv


Translating OCR Files:  84%|████████▍ | 414/490 [3:40:27<1:00:13, 47.54s/file]

 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_12.csv

Processing file 415 of 490: eda5b003-9250-4913-b724-74cca86240af_13.csv


Translating OCR Files:  85%|████████▍ | 415/490 [3:41:57<1:15:24, 60.32s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_13.csv

Processing file 416 of 490: eda5b003-9250-4913-b724-74cca86240af_14.csv


Translating OCR Files:  85%|████████▍ | 416/490 [3:42:41<1:08:13, 55.32s/file]

 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_14.csv

Processing file 417 of 490: eda5b003-9250-4913-b724-74cca86240af_2.csv


Translating OCR Files:  85%|████████▌ | 417/490 [3:43:20<1:01:33, 50.60s/file]

 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_2.csv

Processing file 418 of 490: eda5b003-9250-4913-b724-74cca86240af_3.csv


Translating OCR Files:  85%|████████▌ | 418/490 [3:44:14<1:01:42, 51.42s/file]

 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_3.csv

Processing file 419 of 490: eda5b003-9250-4913-b724-74cca86240af_4.csv


Translating OCR Files:  86%|████████▌ | 419/490 [3:45:09<1:02:20, 52.68s/file]

 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_4.csv

Processing file 420 of 490: eda5b003-9250-4913-b724-74cca86240af_5.csv


Translating OCR Files:  86%|████████▌ | 420/490 [3:45:42<54:30, 46.73s/file]  

 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_5.csv

Processing file 421 of 490: eda5b003-9250-4913-b724-74cca86240af_6.csv


Translating OCR Files:  86%|████████▌ | 421/490 [3:46:21<51:12, 44.53s/file]

 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_6.csv

Processing file 422 of 490: eda5b003-9250-4913-b724-74cca86240af_7.csv


Translating OCR Files:  86%|████████▌ | 422/490 [3:47:01<48:57, 43.19s/file]

 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_7.csv

Processing file 423 of 490: eda5b003-9250-4913-b724-74cca86240af_8.csv


Translating OCR Files:  86%|████████▋ | 423/490 [3:47:36<45:15, 40.53s/file]

 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_8.csv

Processing file 424 of 490: eda5b003-9250-4913-b724-74cca86240af_9.csv


Translating OCR Files:  87%|████████▋ | 424/490 [3:48:41<52:34, 47.80s/file]

 Saved translated file: eda5b003-9250-4913-b724-74cca86240af_9.csv

Processing file 425 of 490: ee47dfea-2626-4107-8ab3-4663167e0493.csv


Translating OCR Files:  87%|████████▋ | 425/490 [3:49:21<49:19, 45.52s/file]

 Saved translated file: ee47dfea-2626-4107-8ab3-4663167e0493.csv

Processing file 426 of 490: f0ce8a7b-909d-4fc5-ba13-ea66b2dc6448.csv


Translating OCR Files:  87%|████████▋ | 426/490 [3:49:23<34:47, 32.62s/file]

 Saved translated file: f0ce8a7b-909d-4fc5-ba13-ea66b2dc6448.csv

Processing file 427 of 490: f179eb06-0c53-44df-a13f-570be23355bb_0.csv


Translating OCR Files:  87%|████████▋ | 427/490 [3:49:28<25:28, 24.26s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_0.csv

Processing file 428 of 490: f179eb06-0c53-44df-a13f-570be23355bb_1.csv


Translating OCR Files:  87%|████████▋ | 428/490 [3:49:29<17:59, 17.42s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_1.csv

Processing file 429 of 490: f179eb06-0c53-44df-a13f-570be23355bb_10.csv


Translating OCR Files:  88%|████████▊ | 429/490 [3:49:56<20:32, 20.21s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_10.csv

Processing file 430 of 490: f179eb06-0c53-44df-a13f-570be23355bb_11.csv


Translating OCR Files:  88%|████████▊ | 430/490 [3:50:27<23:14, 23.25s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_11.csv

Processing file 431 of 490: f179eb06-0c53-44df-a13f-570be23355bb_12.csv


Translating OCR Files:  88%|████████▊ | 431/490 [3:51:01<26:11, 26.64s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_12.csv

Processing file 432 of 490: f179eb06-0c53-44df-a13f-570be23355bb_13.csv


Translating OCR Files:  88%|████████▊ | 432/490 [3:51:22<24:04, 24.90s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_13.csv

Processing file 433 of 490: f179eb06-0c53-44df-a13f-570be23355bb_14.csv


Translating OCR Files:  88%|████████▊ | 433/490 [3:51:43<22:40, 23.87s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_14.csv

Processing file 434 of 490: f179eb06-0c53-44df-a13f-570be23355bb_15.csv


Translating OCR Files:  89%|████████▊ | 434/490 [3:52:33<29:21, 31.45s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_15.csv

Processing file 435 of 490: f179eb06-0c53-44df-a13f-570be23355bb_16.csv


Translating OCR Files:  89%|████████▉ | 435/490 [3:52:56<26:45, 29.19s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_16.csv

Processing file 436 of 490: f179eb06-0c53-44df-a13f-570be23355bb_17.csv


Translating OCR Files:  89%|████████▉ | 436/490 [3:53:24<25:56, 28.83s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_17.csv

Processing file 437 of 490: f179eb06-0c53-44df-a13f-570be23355bb_18.csv


Translating OCR Files:  89%|████████▉ | 437/490 [3:54:00<27:09, 30.74s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_18.csv

Processing file 438 of 490: f179eb06-0c53-44df-a13f-570be23355bb_19.csv


Translating OCR Files:  89%|████████▉ | 438/490 [3:55:30<42:05, 48.56s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_19.csv

Processing file 439 of 490: f179eb06-0c53-44df-a13f-570be23355bb_2.csv


Translating OCR Files:  90%|████████▉ | 439/490 [3:55:50<34:07, 40.15s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_2.csv

Processing file 440 of 490: f179eb06-0c53-44df-a13f-570be23355bb_20.csv


Translating OCR Files:  90%|████████▉ | 440/490 [3:56:25<32:04, 38.49s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_20.csv

Processing file 441 of 490: f179eb06-0c53-44df-a13f-570be23355bb_3.csv


Translating OCR Files:  90%|█████████ | 441/490 [3:56:33<24:03, 29.46s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_3.csv

Processing file 442 of 490: f179eb06-0c53-44df-a13f-570be23355bb_4.csv


Translating OCR Files:  90%|█████████ | 442/490 [3:58:03<38:08, 47.67s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_4.csv

Processing file 443 of 490: f179eb06-0c53-44df-a13f-570be23355bb_5.csv


Translating OCR Files:  90%|█████████ | 443/490 [3:59:05<40:32, 51.76s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_5.csv

Processing file 444 of 490: f179eb06-0c53-44df-a13f-570be23355bb_6.csv


Translating OCR Files:  91%|█████████ | 444/490 [3:59:53<38:47, 50.59s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_6.csv

Processing file 445 of 490: f179eb06-0c53-44df-a13f-570be23355bb_7.csv


Translating OCR Files:  91%|█████████ | 445/490 [4:00:33<35:42, 47.62s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_7.csv

Processing file 446 of 490: f179eb06-0c53-44df-a13f-570be23355bb_8.csv


Translating OCR Files:  91%|█████████ | 446/490 [4:01:11<32:47, 44.71s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_8.csv

Processing file 447 of 490: f179eb06-0c53-44df-a13f-570be23355bb_9.csv


Translating OCR Files:  91%|█████████ | 447/490 [4:01:30<26:34, 37.08s/file]

 Saved translated file: f179eb06-0c53-44df-a13f-570be23355bb_9.csv

Processing file 448 of 490: f313f521-80a1-4db5-a8a7-53d29ee09890.csv


Translating OCR Files:  91%|█████████▏| 448/490 [4:01:57<23:48, 34.02s/file]

 Saved translated file: f313f521-80a1-4db5-a8a7-53d29ee09890.csv

Processing file 449 of 490: f41b7574-57b4-4c9f-907c-2a3c48a56157.csv


Translating OCR Files:  92%|█████████▏| 449/490 [4:02:18<20:30, 30.01s/file]

 Saved translated file: f41b7574-57b4-4c9f-907c-2a3c48a56157.csv

Processing file 450 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_0.csv


Translating OCR Files:  92%|█████████▏| 450/490 [4:02:23<14:55, 22.38s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_0.csv

Processing file 451 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_1.csv


Translating OCR Files:  92%|█████████▏| 451/490 [4:02:35<12:31, 19.28s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_1.csv

Processing file 452 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_10.csv


Translating OCR Files:  92%|█████████▏| 452/490 [4:02:47<10:58, 17.33s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_10.csv

Processing file 453 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_11.csv


Translating OCR Files:  92%|█████████▏| 453/490 [4:03:08<11:19, 18.37s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_11.csv

Processing file 454 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_12.csv


Translating OCR Files:  93%|█████████▎| 454/490 [4:03:55<16:11, 27.00s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_12.csv

Processing file 455 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_13.csv


Translating OCR Files:  93%|█████████▎| 455/490 [4:04:27<16:38, 28.54s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_13.csv

Processing file 456 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_14.csv


Translating OCR Files:  93%|█████████▎| 456/490 [4:04:30<11:43, 20.69s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_14.csv

Processing file 457 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_15.csv


Translating OCR Files:  93%|█████████▎| 457/490 [4:05:22<16:29, 29.98s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_15.csv

Processing file 458 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_16.csv


Translating OCR Files:  93%|█████████▎| 458/490 [4:05:25<11:41, 21.92s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_16.csv

Processing file 459 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_17.csv


Translating OCR Files:  94%|█████████▎| 459/490 [4:05:57<12:56, 25.06s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_17.csv

Processing file 460 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_2.csv


Translating OCR Files:  94%|█████████▍| 460/490 [4:06:11<10:50, 21.69s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_2.csv

Processing file 461 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_3.csv


Translating OCR Files:  94%|█████████▍| 461/490 [4:07:41<20:24, 42.24s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_3.csv

Processing file 462 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_4.csv


Translating OCR Files:  94%|█████████▍| 462/490 [4:09:11<26:25, 56.61s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_4.csv

Processing file 463 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_5.csv


Translating OCR Files:  94%|█████████▍| 463/490 [4:09:49<22:58, 51.05s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_5.csv

Processing file 464 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_6.csv


Translating OCR Files:  95%|█████████▍| 464/490 [4:10:22<19:43, 45.52s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_6.csv

Processing file 465 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_7.csv


Translating OCR Files:  95%|█████████▍| 465/490 [4:11:08<19:01, 45.67s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_7.csv

Processing file 466 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_8.csv


Translating OCR Files:  95%|█████████▌| 466/490 [4:11:41<16:45, 41.91s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_8.csv

Processing file 467 of 490: f7205881-3904-42ec-ab2c-04f36fa24785_9.csv


Translating OCR Files:  95%|█████████▌| 467/490 [4:12:07<14:13, 37.12s/file]

 Saved translated file: f7205881-3904-42ec-ab2c-04f36fa24785_9.csv

Processing file 468 of 490: fc27ce32-9c96-416c-9c38-84977255e0ba.csv


Translating OCR Files:  96%|█████████▌| 468/490 [4:12:26<11:37, 31.70s/file]

 Saved translated file: fc27ce32-9c96-416c-9c38-84977255e0ba.csv

Processing file 469 of 490: fcf90a92-794c-40c6-aa4f-8ea82f8bed51.csv


Translating OCR Files:  96%|█████████▌| 469/490 [4:12:49<10:12, 29.15s/file]

 Saved translated file: fcf90a92-794c-40c6-aa4f-8ea82f8bed51.csv

Processing file 470 of 490: fe221e78-67e4-4d88-b73d-e58a9943a036.csv


Translating OCR Files:  96%|█████████▌| 470/490 [4:13:03<08:09, 24.47s/file]

 Saved translated file: fe221e78-67e4-4d88-b73d-e58a9943a036.csv

Processing file 471 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_0.csv


Translating OCR Files:  96%|█████████▌| 471/490 [4:13:09<05:59, 18.92s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_0.csv

Processing file 472 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_1.csv


Translating OCR Files:  96%|█████████▋| 472/490 [4:13:47<07:25, 24.77s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_1.csv

Processing file 473 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_10.csv


Translating OCR Files:  97%|█████████▋| 473/490 [4:14:21<07:46, 27.45s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_10.csv

Processing file 474 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_11.csv


Translating OCR Files:  97%|█████████▋| 474/490 [4:14:41<06:45, 25.33s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_11.csv

Processing file 475 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_12.csv


Translating OCR Files:  97%|█████████▋| 475/490 [4:15:05<06:12, 24.86s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_12.csv

Processing file 476 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_13.csv


Translating OCR Files:  97%|█████████▋| 476/490 [4:15:23<05:18, 22.75s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_13.csv

Processing file 477 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_14.csv


Translating OCR Files:  97%|█████████▋| 477/490 [4:15:56<05:38, 26.02s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_14.csv

Processing file 478 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_15.csv


Translating OCR Files:  98%|█████████▊| 478/490 [4:16:23<05:14, 26.23s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_15.csv

Processing file 479 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_16.csv


Translating OCR Files:  98%|█████████▊| 479/490 [4:17:14<06:10, 33.66s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_16.csv

Processing file 480 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_17.csv


Translating OCR Files:  98%|█████████▊| 480/490 [4:17:34<04:55, 29.55s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_17.csv

Processing file 481 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_18.csv


Translating OCR Files:  98%|█████████▊| 481/490 [4:19:04<07:09, 47.72s/file]

Ollama error: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=90)
 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_18.csv

Processing file 482 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_19.csv


Translating OCR Files:  98%|█████████▊| 482/490 [4:19:24<05:14, 39.33s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_19.csv

Processing file 483 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_2.csv


Translating OCR Files:  99%|█████████▊| 483/490 [4:19:58<04:23, 37.59s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_2.csv

Processing file 484 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_3.csv


Translating OCR Files:  99%|█████████▉| 484/490 [4:21:20<05:06, 51.08s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_3.csv

Processing file 485 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_4.csv


Translating OCR Files:  99%|█████████▉| 485/490 [4:22:32<04:46, 57.28s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_4.csv

Processing file 486 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_5.csv


Translating OCR Files:  99%|█████████▉| 486/490 [4:23:11<03:26, 51.70s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_5.csv

Processing file 487 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_6.csv


Translating OCR Files:  99%|█████████▉| 487/490 [4:23:47<02:20, 46.98s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_6.csv

Processing file 488 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_7.csv


Translating OCR Files: 100%|█████████▉| 488/490 [4:24:08<01:18, 39.30s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_7.csv

Processing file 489 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_8.csv


Translating OCR Files: 100%|█████████▉| 489/490 [4:24:45<00:38, 38.50s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_8.csv

Processing file 490 of 490: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_9.csv


Translating OCR Files: 100%|██████████| 490/490 [4:25:20<00:00, 32.49s/file]

 Saved translated file: fe245192-1f9c-4f28-9b32-046fb7ce7e1e_9.csv





In [None]:
import pandas as pd
import os
import re
import ast

In [10]:
# Changing the files in the chats with the translated OCR output

mapping_csv_path = 'results/filtered_ref_summary_csv_replaced.csv'
chat_reference_csv_path = 'results/updated_with_csv_paths.csv'  
ocr_folder_path = 'ocr_to_csv'

# Use the csv file stroing all the paths to filtered chats by the LLM
chat_paths_df = pd.read_csv(chat_reference_csv_path)

mapping_df = pd.read_csv(mapping_csv_path)

# Link md files and the pngs inside based on uuid
md_uuid_to_pngs = {}

for index, row in mapping_df.iterrows():
    mentions2_text = row.get('mentions2', '{}')
    try:
        mentions2 = ast.literal_eval(mentions2_text)
        for md_path, png_list in mentions2.items():
            md_uuid = md_path.split('/')[-1].replace('.csv', '')
            png_names = [os.path.basename(p) for p in png_list]
            md_uuid_to_pngs[md_uuid] = png_names
    except:
        continue

# Function to fetch OCR translation from OCR folder
def get_ocr_translation(png_filename):
    csv_filename = png_filename.replace('.png', '.csv')
    csv_path = os.path.join(ocr_folder_path, csv_filename)

    if os.path.exists(csv_path):
        try:
            df = pd.read_csv(csv_path)
            if 'ocr translation' in df.columns:
                non_empty_translations = df['ocr translation'].dropna().astype(str)
                for text in non_empty_translations:
                    if text.strip():
                        return text
        except Exception as e:
            print(f"Error reading {csv_path}: {e}")
            return ''
    return ''

# Function to replace .png/.md in message with OCR translation
def replace_with_ocr_translation(message):
    if not isinstance(message, str):
        return message

    png_files = re.findall(r'(\d+-[a-f0-9\-]+(?:_[0-9]+)?\.png)', message)
    for png in png_files:
        ocr_text = get_ocr_translation(png)
        if ocr_text:
            message = message.replace(png, f"OCR translation:\n{ocr_text}")

    md_uuids = re.findall(r'([a-f0-9\-]{36})\.md', message)
    for md_uuid in md_uuids:
        png_list = md_uuid_to_pngs.get(md_uuid, [])
        ocr_texts = []
        for png in png_list:
            ocr_text = get_ocr_translation(png)
            if ocr_text:
                ocr_texts.append(f"OCR for {png}:\n{ocr_text}")
        if ocr_texts:
            combined = "\n\n".join(ocr_texts)
            full_md_match = re.search(r'\S*' + re.escape(md_uuid) + r'\.md', message)
            if full_md_match:
                message = message.replace(full_md_match.group(0), combined)

    return message

# Loop over each chat CSV listed in the 'chats' column
for idx, row in chat_paths_df.iterrows():
    chat_csv_path = row['chats']
    if not isinstance(chat_csv_path, str) or not os.path.exists(chat_csv_path):
        print(f"Skipping missing or invalid path: {chat_csv_path}")
        continue

    print(f"Processing: {chat_csv_path}")
    try:
        chat_df = pd.read_csv(chat_csv_path)
        if 'message_translation' in chat_df.columns:
            chat_df['message_translation'] = chat_df['message_translation'].apply(replace_with_ocr_translation)
            chat_df.to_csv(chat_csv_path, index=False)
        else:
            print(f"Column 'message_translation' not found in {chat_csv_path}")
    except Exception as e:
        print(f"Error processing {chat_csv_path}: {e}")

Processing: csvs/28.csv
Processing: csvs/5.csv
Processing: csvs/38.csv
Processing: csvs/18.csv
Processing: csvs/1.csv
Processing: csvs/19.csv
Processing: csvs/29.csv
Processing: csvs/4.csv
Processing: csvs/39.csv
Processing: csvs/16.csv
Processing: csvs/22.csv
Processing: csvs/32.csv
Processing: csvs/26.csv
Processing: csvs/12.csv
Processing: csvs/36.csv
Processing: csvs/27.csv
Processing: csvs/13.csv
Processing: csvs/37.csv
Processing: csvs/17.csv
Processing: csvs/23.csv
Processing: csvs/33.csv
Processing: csvs/24.csv
Processing: csvs/41.csv
Processing: csvs/10.csv
Processing: csvs/9.csv
Processing: csvs/34.csv
Processing: csvs/14.csv
Processing: csvs/20.csv
Processing: csvs/30.csv
Processing: csvs/15.csv
Processing: csvs/21.csv
Processing: csvs/31.csv
Processing: csvs/40.csv
Processing: csvs/11.csv
Processing: csvs/35.csv
Processing: csvs/3.csv
Processing: csvs/7.csv
Processing: csvs/6.csv
Processing: csvs/2.csv


# **USER ANALYSIS**

## *Username association with mentioned names*

In [None]:
import pandas as pd
import os
import re
import requests
from collections import defaultdict, Counter

In [77]:
def query_ollama(prompt, model="gemma3:27b"):
    url = "http://localhost:11434/api/generate"
    payload = {
        "model": model,
        "prompt": prompt,
        "stream": False
    }
    try:
        response = requests.post(url, json=payload, timeout=90)
        response.raise_for_status()
        return response.json()["response"].strip().lower()
    except Exception as e:
        return f"[error: {e}]"

# Function to build the LLM prompt for analyzing honorific usage
def build_prompt(sender, receiver, message_text, honorific):
    return (
        f"There is a conversation between two users:\n"
        f"User A: {sender}\n"
        f"User B: {receiver}\n\n"
        f"The message is as follows:\n"
        f"User A → User B: {message_text}\n\n"
        f"Question: In the sentence '{message_text}', who is {honorific} — "
        f"the sender, the receiver, or a third party?\n\n"
        f"Output format: {honorific} - give the username of the person or "
        f"if they are a thirdparty, no explanation needed"
    )

# List of honorifics to look for in messages
honorific_keywords = [
    r'Mr\.', r'Ms\.', r'Mrs\.', r'Miss', r'Brother',
    r'Sister', r'Mister', r'Miss', r'Comrade', r'Colleague'
]

# Regex pattern to detect honorifics followed by a name (e.g., Mr. Smith)
honorific_pattern = re.compile(
    r'\b(?:' + '|'.join(honorific_keywords) + r')\s+(?:[A-Z]\b|[A-Z][a-z]+)\b',
    re.IGNORECASE
)

# Regex pattern to check if the message directly addresses someone
direct_address_pattern = re.compile(
    r'^(hi|hello|dear)?\s*(?:' + '|'.join(honorific_keywords) + r')\s+(?:[A-Z]\b|[A-Z][a-z]+)\b[\s,:]',
    re.IGNORECASE
)

# Load the CSV containing paths to individual chat logs
df_paths = pd.read_csv("results/updated_with_csv_paths.csv")

# Dictionaries to store results for each user
title_counts = defaultdict(Counter)        # Tracks how often each honorific is used for a user
title_examples = defaultdict(list)         # Stores example messages where the honorific was used

# Loop through each chat file
for chat_file in df_paths['chats'].dropna().unique():
    if not (isinstance(chat_file, str) and chat_file.endswith('.csv') and os.path.exists(chat_file)):
        continue

    try:
        df_chat = pd.read_csv(chat_file)
    except Exception as e:
        print(f"Error reading {chat_file}: {e}")
        continue

    # Standardize column names for easier handling
    df_chat.columns = [col.lower() for col in df_chat.columns]
    message_columns = [col for col in df_chat.columns if 'message_translation' in col]
    sender_column = df_chat.columns[1]
    receiver_column = df_chat.columns[2]

    # Process each message in the chat file
    for index, row in df_chat.iterrows():
        for message_col in message_columns:
            if pd.isna(row.get(message_col)):
                continue

            message = str(row[message_col]).strip()
            sender = row.get(sender_column)
            receiver = row.get(receiver_column)

            if not receiver or receiver == sender:
                continue

            matches = honorific_pattern.findall(message)
            if len(matches) != 1:
                continue  # Skip messages with no or multiple honorifics

            honorific = matches[0].strip()

            # Check if the honorific is used in a direct address
            if not re.match(rf"^(hi|hello|dear)?\s*{re.escape(honorific)}[\s,:]", message, re.IGNORECASE):
                continue

            # Ask the LLM who the honorific refers to
            prompt = build_prompt(sender, receiver, message, honorific)
            result = query_ollama(prompt)

            if "thirdparty" in result:
                continue  # If the model says it refers to a third party we skip

            # Count and store the result if valid
            title_counts[receiver][honorific] += 1
            title_examples[receiver].append((honorific, os.path.basename(chat_file), index, message))

summary_rows = []
for user_id, counter in title_counts.items():
    if not counter:
        continue

    top_honorific, count = counter.most_common(1)[0]

    # Grab up to 3 example messages
    examples = [
        f"{file}:{line} - \"{msg}\""
        for t, file, line, msg in title_examples[user_id] if t == top_honorific
    ][:3]

    summary_rows.append({
        "username": user_id,
        "real_name": top_honorific,
        "count": count,
        "examples": " | ".join(examples)
    })

df_summary = pd.DataFrame(summary_rows, columns=["username", "real_name", "count", "examples"])
print(df_summary)
df_summary.to_csv("honorific_usernames_cleaned.csv", index=False)

              username     real_name  count  \
0             Shutd0wn        Mr. Wu      1   
1               adpw90     Mr. Zheng      1   
2   wxid_5390224027312      Mr. Wang      2   
3  wxid_70w3p1jin84k22   Sister Qian      1   
4         SWEET5683yao  Brother Qing      2   
5               lengmo         Mr. C      4   
6  wxid_7p054rmzkhqf21        Mr. Lu      2   
7            wei592628       Mr. Wei      4   
8             nullroot      Mr. Zhou      2   
9           gzp1991101      Mr. Gong      7   

                                            examples  
0  28.csv:0 - "Mr. Wu, are you available now? I'd...  
1  4.csv:21 - "Mr. Zheng, help me see where this ...  
2  39.csv:311 - "Mr. Wang, there's nothing I can ...  
3  16.csv:104 - "Sister Qian, don't settle for le...  
4  22.csv:6 - "Brother Qing, are you interested?"...  
5  26.csv:1 - "Mr. C, this is a summary of the sa...  
6  24.csv:27 - "Mr. Lu, regarding what Ying menti...  
7  10.csv:1 - "Mr. Wei, roughly what aspec

In [3]:
import pandas as pd
import os
import re

In [4]:
# This code extracts all the messages where each real name is mentioned while keeping in mind receiver and sender

df_users = pd.read_csv("honorific_usernames_cleaned.csv") 
df_paths = pd.read_csv("results/updated_with_csv_paths.csv")
chat_paths = df_paths['chats'].dropna().unique()

all_matched = []

# Search messages for real_name
for _, user_row in df_users.iterrows():
    username = user_row['username']
    real_name = user_row['real_name']
    name_key = real_name.split()[-1].lower()

    name_pattern = re.compile(
        rf"\b(?:mr\.|ms\.|mrs\.|miss|dr\.|prof\.|brother|sister)?\s*\b{name_key}\b(?:\s+\w+)?",
        re.IGNORECASE
    )

    for csv_path in chat_paths:
        if not isinstance(csv_path, str) or not csv_path.endswith('.csv') or not os.path.exists(csv_path):
            continue

        try:
            df = pd.read_csv(csv_path)
        except Exception as e:
            print(f"Error reading {csv_path}: {e}")
            continue

        df.columns = [col.lower() for col in df.columns]
        text_cols = [col for col in df.columns if 'message_translation' in col]
        sender_col = df.columns[1]
        receiver_col = df.columns[2]

        for idx, row in df.iterrows():
            for col in text_cols:
                message = str(row.get(col))
                if pd.isna(message):
                    continue

                if name_pattern.search(message.lower()):
                    all_matched.append({
                        "username": username,
                        "real_name": real_name,
                        "file": os.path.basename(csv_path),
                        "line": idx,
                        "sender": row.get(sender_col, "unknown"),
                        "receiver": row.get(receiver_col, "unknown"),
                        "message": message.strip()
                    })

df_all = pd.DataFrame(all_matched)
df_all.to_csv("all_user_name_mentions.csv", index=False)
print(df_all.head())

   username real_name    file  line               sender  receiver  \
0  Shutd0wn    Mr. Wu  28.csv     0  wxid_xusilpfkh31g21  Shutd0wn   
1  Shutd0wn    Mr. Wu  28.csv    26  wxid_xusilpfkh31g21  Shutd0wn   
2  Shutd0wn    Mr. Wu  38.csv     8  wxid_zbytkn4qjl3r22    lengmo   
3  Shutd0wn    Mr. Wu   1.csv   546             Shutd0wn    lengmo   
4  Shutd0wn    Mr. Wu   1.csv  1662             Shutd0wn    lengmo   

                                             message  
0  Mr. Wu, are you available now? I'd like to hav...  
1    Mr. Wu, I'd like to request 3 days off [smiles]  
2  I spoke with Mr. Wu, he thinks it’s necessary,...  
3  Wu Haibo invites you to a Tencent Meeting\nMee...  
4                              Zhao Wu was reported.  


In [1]:
import pandas as pd
import requests

ERROR! Session/line number was not unique in database. History logging moved to new session 82


In [2]:
# LLM try out for a quick identification of companies, real names and position. This will be later used to find what company the leak is about.

df_all = pd.read_csv("all_user_name_mentions.csv")

# Group by username
grouped = df_all.groupby("username")

profiles = []

# Prompt builder
def build_profile_prompt(df_user, username, real_name):
    lines = [
        f"You are analyzing chat logs mentioning a person referred to as '{real_name}'.",
        f"The known system username for this person is: {username}.",
        "Each message includes sender, receiver, and content. Look for clues in the messages names and position mentioned.",
        "Use these messages to infer their real identity and professional role.",
        "Check if the real_name is an initial for a name. Example: Mr A might be Andrew Anthony.",
        "Instructions:",
        "- Provide exactly one value per field.",
        "- Include a confidence score for each.",
        "- NO alternatives, NO explanation anywhere."
        "-If unsure, just provide waht is know and leave other things empty"
        "NEVER BREAK THE FORMAT"
    ]

    for i, row in df_user.iterrows():
        lines.append(
            f"{i+1}. Sender: {row['sender']} | Receiver: {row['receiver']} | Message: \"{row['message']}\""
        )

    lines.append(
        "\nOutput format:\n"
        f"Username: {username}\n"
        "Real Name: <...> (confidence: <...>)\n"
        "Company: <...> (confidence: <...>)\n"
        "Position: <...> (confidence: <...>)"
    )

    return "\n".join(lines)

def query_ollama(prompt, model="gemma3:27b"):
    try:
        res = requests.post(
            "http://localhost:11434/api/generate",
            json={"model": model, "prompt": prompt, "stream": False},
            timeout=1000
        )
        res.raise_for_status()
        return res.json()["response"].strip()
    except Exception as e:
        return f"[error: {e}]"

# Run for each user and collect results
for username, df_user in grouped:
    real_name = df_user["real_name"].iloc[0]
    if len(df_user) == 0:
        continue

    df_user = df_user.head(100)  # limit to 100 messages per user for performance info
    prompt = build_profile_prompt(df_user, username, real_name)
    result = query_ollama(prompt)
    profiles.append({
        "username": username,
        "real_name": real_name,
        "llm_profile": result
    })
    print(f"\nProfile for {real_name} ({username}):\n{result}\n")

df_profiles = pd.DataFrame(profiles)
df_profiles.to_csv("llm_user_profiles.csv", index=False)
print("Saved to llm_user_profiles.csv")


Profile for Brother Qing (SWEET5683yao):
Username: SWEET5683yao
Real Name: Chen Qing (confidence: 0.95)
Company: Haici Industrial and Information Technology Leadership Academy (confidence: 0.85)
Position: Dean (confidence: 0.85)


Profile for Mr. Wu (Shutd0wn):
Username: Shutd0wn
Real Name: Wu Haibo (confidence: 0.95)
Company: Qi An Xin (confidence: 0.7)
Position: President (confidence: 0.8)


Profile for Mr. Zheng (adpw90):
Username: adpw90
Real Name: Zheng Song (confidence: 0.95)
Company: Public Security Bureau/Criminal Investigation (confidence: 0.85)
Position: Team Lead/Manager (confidence: 0.75)


Profile for Mr. Gong (gzp1991101):
Username: gzp1991101
Real Name: Gong Zheping (confidence: 0.7)
Company: Unknown (confidence: 0.1)
Position: General Manager/President/Director (confidence: 0.8)


Profile for Mr. C (lengmo):
Okay, here's an analysis of the provided text and an attempt to extract the requested information.  It's a *very* challenging task as the data is conversational an

## *Company Identification to enrich user profiles*

In [None]:
import pandas as pd
import re
import os
from collections import defaultdict

In [73]:
# The cell above identified a couple of companies that were associated with some people, this cell filters and extracts the company names, searches for them in all CHATS and OCR extractions
# and has a function to identify any URLs so that an LLM can later match company name with URL

# Function to extract URLs
def find_urls(text):
    url_pattern = re.compile(
        r'\b(www\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.[a-zA-Z]{2,})\b'
    )
    return url_pattern.findall(text)

# Collect all mentions in a unified structure
all_mentions = []

# --- CHAT SEARCH SECTION ---
for path in chat_paths:
    if not os.path.isfile(path):
        continue
    try:
        df_chat = pd.read_csv(path)
    except:
        continue
    df_chat.columns = [col.lower() for col in df_chat.columns]
    text_cols = [col for col in df_chat.columns if "message_translation" in col]
    sender_col = df_chat.columns[1]
    receiver_col = df_chat.columns[2]

    for idx, row in df_chat.iterrows():
        for col in text_cols:
            msg = str(row.get(col))
            if pd.isna(msg):
                continue
            for company in companies:
                if company.lower() in msg.lower():
                    all_mentions.append({
                        "source": "chat",
                        "type": "company",
                        "value": company,
                        "message": msg.strip(),
                        "sender": row.get(sender_col),
                        "receiver": row.get(receiver_col),
                        "file": os.path.basename(path),
                        "line": idx
                    })
            urls = find_urls(msg)
            for url in urls:
                all_mentions.append({
                    "source": "chat",
                    "type": "url",
                    "value": url,
                    "message": msg.strip(),
                    "sender": row.get(sender_col),
                    "receiver": row.get(receiver_col),
                    "file": os.path.basename(path),
                    "line": idx
                })

# --- OCR SEARCH SECTION ---
ocr_folder = "ocr_to_csv"
if os.path.exists(ocr_folder):
    for file in os.listdir(ocr_folder):
        if file.endswith(".csv"):
            ocr_path = os.path.join(ocr_folder, file)
            try:
                df_ocr = pd.read_csv(ocr_path)
            except:
                continue
            for idx, row in df_ocr.iterrows():
                msg = str(row.get("ocr translation", ""))
                if pd.isna(msg):
                    continue
                for company in companies:
                    if company.lower() in msg.lower():
                        all_mentions.append({
                            "source": "ocr",
                            "type": "company",
                            "value": company,
                            "message": msg.strip(),
                            "sender": None,
                            "receiver": None,
                            "file": file,
                            "line": idx
                        })
                urls = find_urls(msg)
                for url in urls:
                    all_mentions.append({
                        "source": "ocr",
                        "type": "url",
                        "value": url,
                        "message": msg.strip(),
                        "sender": None,
                        "receiver": None,
                        "file": file,
                        "line": idx
                    })

# Save combined results
df_combined = pd.DataFrame(all_mentions)
df_combined.to_csv("company_websites_mentions.csv", index=False)
print("Saved: company_websites_mentions.csv")


Saved: company_websites_mentions.csv


In [61]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [65]:
# Based on the output of the cell above, we feed those extracted messages and link to an LLM, the LLM analyzes and concludes by offering a company profile

df = pd.read_csv("company_websites_mentions.csv")
df = df.dropna(subset=["message"])

def build_company_inference_prompt(df_subset):
    lines = [
        "You are analyzing a set of messages extracted from chat logs and OCR documents.",
        "Your task is to infer which company or organization these messages are associated with.",
        "Use all available clues — names, terminology, technology, people, structure, or relationships — to reach a clear conclusion.",
        "Respond with one company name only, and a short justification, and JUST a link to their website (Check for documents, and only valid https, if in doubt try to match it so that it can be investiagted further).",
        "\nMessages:\n"
    ]

    for i, row in df_subset.iterrows():
        lines.append(
            f"{i+1}. Source: {row['source']} | File: {row['file']} | Line: {row['line']}\n   Message: \"{row['message']}\""
        )

    lines.append(
        "\nBased on the above, respond strictly in this format:\n"
        "Company Name: <...>\n"
        "Reasoning: <concise summary of why this company fits>\n"
        "Website: <ONLY ONE LINK, no explanation or JUSTIFICATION, Extract it 1 to 1 from the document>\n"
        "Confidence: <0.0–1.0>\n"
    )

    return "\n".join(lines)

def query_ollama(prompt, model="gemma3:27b"):
    try:
        res = requests.post(
            "http://localhost:11434/api/generate",
            json={"model": model, "prompt": prompt, "stream": False},
            timeout=1000
        )
        res.raise_for_status()
        return res.json()["response"].strip()
    except Exception as e:
        return f"[error: {e}]"

prompt = build_company_inference_prompt(df)
response = query_ollama(prompt)


lines = response.splitlines()
parsed = {"Company Name": "", "Reasoning": "", "Website": "", "Confidence": ""}
for line in lines:
    if line.lower().startswith("company name:"):
        parsed["Company Name"] = line.split(":", 1)[1].strip()
    elif line.lower().startswith("reasoning:"):
        parsed["Reasoning"] = line.split(":", 1)[1].strip()
    elif line.lower().startswith("website:"):
        parsed["Website"] = line.split(":", 1)[1].strip()
    elif line.lower().startswith("confidence:"):
        parsed["Confidence"] = line.split(":", 1)[1].strip()

df_out = pd.DataFrame([parsed])
df_out.to_csv("results/identified_company_info.csv", index=False)


print("Saved:  'identified_company_info.csv'")

Saved:  'identified_company_info.csv'


In [75]:
# Domain to look up
domain = "i-soon.net"

# Perform WHOIS lookup
try:
    domain_info = whois.whois(domain)

    # Display WHOIS information
    print("\nWHOIS Information for:", domain)
    print("Domain Name:", domain_info.domain_name)
    print("Registrar:", domain_info.registrar)
    print("Creation Date:", domain_info.creation_date)
    print("Expiration Date:", domain_info.expiration_date)
    print("Updated Date:", domain_info.updated_date)
    print("Name Servers:", domain_info.name_servers)
    print("Emails:", domain_info.emails)
    print("Registrant Name:", domain_info.get('name'))
    print("Registrant Organization:", domain_info.get('org'))

except Exception as e:
    print("WHOIS lookup failed:", e)



WHOIS Information for: i-soon.net
Domain Name: I-SOON.NET
Registrar: GoDaddy.com, LLC
Creation Date: [datetime.datetime(2010, 9, 7, 8, 18, 32), datetime.datetime(2010, 9, 7, 3, 18, 32)]
Expiration Date: [datetime.datetime(2028, 9, 7, 8, 18, 32), datetime.datetime(2028, 9, 7, 3, 18, 32)]
Updated Date: [datetime.datetime(2025, 3, 5, 14, 35, 38), datetime.datetime(2023, 7, 13, 21, 20, 50)]
Name Servers: ['NS1.FBI.SEIZED.GOV', 'NS2.FBI.SEIZED.GOV', 'NS15.DOMAINCONTROL.COM', 'NS16.DOMAINCONTROL.COM']
Emails: abuse@godaddy.com
Registrant Name: Registration Private
Registrant Organization: Domains By Proxy, LLC


## *Create User Profile based on the company, number of messages sent, and who each individual talked to*

In [6]:
import os
import pandas as pd
import requests
import re
from collections import Counter, defaultdict

In [15]:
df_mentions = pd.read_csv("all_user_name_mentions.csv")
df_csvs = pd.read_csv("results/updated_with_csv_paths.csv")
df_company_map = pd.read_csv("results/identified_company_info.csv")

# Extract known company info
known_company_name = df_company_map["Company Name"].iloc[0].strip()
known_company_website = df_company_map["Website"].iloc[0].strip()

# Count messages and communication partners
def count_messages_and_interactions(csv_path):
    try:
        df = pd.read_csv(csv_path)
    except Exception as e:
        print("Error reading file:", csv_path, "-", str(e))
        return Counter(), Counter(), defaultdict(set)

    df.columns = [col.lower() for col in df.columns]
    sender_column = next((c for c in ['from', 'sender', 'user'] if c in df.columns), None)
    recipient_column = next((c for c in ['to', 'receiver', 'recipient'] if c in df.columns), None)

    if not sender_column:
        return Counter(), Counter(), defaultdict(set)

    df[sender_column] = df[sender_column].fillna('').astype(str).str.strip()
    if recipient_column:
        df[recipient_column] = df[recipient_column].fillna('').astype(str).str.strip()

    message_counts = Counter()
    interaction_counts = Counter()
    user_partners = defaultdict(set)

    for _, row in df.iterrows():
        sender = row[sender_column]
        message_counts[sender] += 1

        if recipient_column:
            recipient = row[recipient_column]
            if sender and recipient:
                pair = tuple(sorted((sender, recipient)))
                interaction_counts[pair] += 1
                user_partners[sender].add(recipient)
                user_partners[recipient].add(sender)

    return message_counts, interaction_counts, user_partners

# Aggregate totals
total_message_counts = Counter()
total_user_partners = defaultdict(set)

for csv_path in df_csvs['chats'].dropna().unique():
    if isinstance(csv_path, str) and csv_path.endswith('.csv') and os.path.exists(csv_path):
        msg_counts, _, partners = count_messages_and_interactions(csv_path)
        total_message_counts.update(msg_counts)
        for user, pset in partners.items():
            total_user_partners[user].update(pset)

# Group user messages
grouped_users = df_mentions.groupby("username")
all_profiles = []

# Prompt with improved context
def create_prompt(user_messages, username, real_name, company_name, message_count, conversation_partners):
    lines = [
        f"You are analyzing chat logs mentioning a person named '{real_name}'.",
        f"The username is: {username}",
        f"The person works at: {company_name}",
        f"Total messages sent: {message_count}",
        f"Conversation partners: {', '.join(sorted(conversation_partners)) if conversation_partners else 'None'}",
        "",
        "Use the number of messages and the diversity of conversation partners as signals.",
        "Higher volume and broader communication may imply leadership or coordination roles.",
        "",
        "Your job is to guess their full real name and job position.",
        "Follow these rules:",
        "- Give exactly ONE real name and ONE position.",
        "- Add a confidence score (between 0.0 and 1.0).",
        "- Do NOT explain anything.",
        "- Do NOT write extra information.",
        "",
        "Use this format exactly:",
        f"Username: {username}",
        "Real Name: <REAL NAME> (confidence: <CONFIDENCE_SCORE>)",
        f"Company: {company_name} - Website: {known_company_website}",
        "Position: <POSITION> (confidence: <CONFIDENCE_SCORE>)",
        "",
        "Messages:"
    ]

    for idx, row in user_messages.iterrows():
        sender = str(row.get('sender', '')).strip()
        receiver = str(row.get('receiver', '')).strip()
        message = str(row.get('message', '')).strip()
        lines.append(f"{idx+1}. Sender: {sender} | Receiver: {receiver} | Message: \"{message}\"")

    return "\n".join(lines)

# Call local LLM via HTTP
def ask_llm(prompt_text, model_name="gemma3:27b"):
    try:
        response = requests.post(
            "http://localhost:11434/api/generate",
            json={"model": model_name, "prompt": prompt_text, "stream": False},
            timeout=1000
        )
        response.raise_for_status()
        return response.json()["response"].strip()
    except Exception as e:
        return f"[error: {e}]"

# Parse LLM output
def parse_response(text, username):
    try:
        name_match = re.search(r"Real Name:\s*(.+?)\s*\(confidence:\s*(\d\.\d+)\)", text)
        company_match = re.search(r"Company:\s*(.+?)\s*-\s*Website:\s*(.+)", text)
        position_match = re.search(r"Position:\s*(.+?)\s*\(confidence:\s*(\d\.\d+)\)", text)

        if name_match and company_match and position_match:
            return {
                "username": username,
                "name": real_name,
                "real_name": name_match.group(1).strip(),
                "real_name_confidence": float(name_match.group(2)),
                "company": company_match.group(1).strip(),
                "website": company_match.group(2).strip(),
                "position": position_match.group(1).strip(),
                "position_confidence": float(position_match.group(2))
            }
    except Exception as e:
        print(f"Error parsing response for {username}: {e}")

    return {
        "username": username,
        "name": real_name,
        "real_name": "[parse error]",
        "real_name_confidence": 0.0,
        "company": known_company_name,
        "website": known_company_website,
        "position": "[parse error]",
        "position_confidence": 0.0
    }

# LLM enrichment loop
for username, user_messages in grouped_users:
    if user_messages.empty:
        continue

    real_name = user_messages["real_name"].iloc[0]
    message_count = total_message_counts.get(username, 0)
    partners = total_user_partners.get(username, set())

    prompt = create_prompt(user_messages, username, real_name, known_company_name, message_count, partners)
    result_text = ask_llm(prompt)
    profile = parse_response(result_text, username)
    profile["message_count"] = message_count
    profile["conversation_partners"] = ", ".join(sorted(partners))
    all_profiles.append(profile)

    print(f"\nDone: {username}")
    print("Name:", real_name)
    print("Full Name:", profile["real_name"])
    print("Company:", profile["company"])
    print("Website:", profile["website"])
    print("Position:", profile["position"])
    print("Messages Sent:", profile["message_count"])
    print("Talked to:", profile["conversation_partners"])

df_output = pd.DataFrame(all_profiles)
df_output.to_csv("results/llm_user_profiles_enriched.csv", index=False)
print("\nAll profiles saved to llm_user_profiles_enriched.csv")


Done: SWEET5683yao
Name: Brother Qing
Full Name: Chen Qing
Company: Anxun Information Technology Co., Ltd.
Website: www.I-soon.net
Position: Project Manager
Messages Sent: 35
Talked to: wxid_7p054rmzkhqf21

Done: Shutd0wn
Name: Mr. Wu
Full Name: Wu Haibo
Company: Anxun Information Technology Co., Ltd.
Website: www.I-soon.net
Position: General Manager
Messages Sent: 3675
Talked to: lengmo, wxid_xusilpfkh31g21

Done: adpw90
Name: Mr. Zheng
Full Name: Zheng Wei
Company: Anxun Information Technology Co., Ltd.
Website: www.I-soon.net
Position: Team Lead
Messages Sent: 235
Talked to: wxid_7p054rmzkhqf21

Done: gzp1991101
Name: Mr. Gong
Full Name: Gong Tao
Company: Anxun Information Technology Co., Ltd.
Website: www.I-soon.net
Position: General Manager
Messages Sent: 603
Talked to: wxid_7p054rmzkhqf21, wxid_mgh25nentc4u22, yanzi542766277

Done: lengmo
Name: Mr. C
Full Name: [parse error]
Company: Anxun Information Technology Co., Ltd.
Website: www.I-soon.net
Position: [parse error]
Messages 

# **Financial Infrastructures Analysis**

## *RAG Implementation TEST - not so feasable*

In [2]:
from langchain_ollama.llms import OllamaLLM
from langchain_ollama import OllamaEmbeddings
from langchain.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.documents import Document
import pandas as pd
import os

In [3]:
import os
import pandas as pd
from langchain.vectorstores import Chroma
from langchain.embeddings import OllamaEmbeddings
from langchain.schema import Document

# Load a single CSV file
csv_file = "csvs/10.csv"
df = pd.read_csv(csv_file)

# Initialize the embedding model
embeddings = OllamaEmbeddings(model="mxbai-embed-large")

# Setup vector DB path
db_location = "./chroma_financial_chat_db"
add_documents = not os.path.exists(db_location)

# Only add documents if the database doesn't already exist
if add_documents:
    documents = []
    ids = []
    for i, row in df.iterrows():
        document = Document(
            page_content=row["message_translation"],
            metadata={
                "sender": row["From"],
                "receiver": row["To"],
                "timestamp": row["Time"],
                "source_file": os.path.basename(csv_file),   # Track source file
                "line_number": i + 1                         # Track line number
            },
            id=f"{os.path.basename(csv_file)}_{i}"            # Ensure unique ID
        )
        documents.append(document)
        ids.append(f"{os.path.basename(csv_file)}_{i}")

# Create or load the Chroma vector store
vector_store = Chroma(
    collection_name="financial_chats",
    persist_directory=db_location,
    embedding_function=embeddings
)

if add_documents:
    vector_store.add_documents(documents=documents, ids=ids)

# Setup retriever for use in RAG
retriever = vector_store.as_retriever(search_kwargs={"k": 15})

  vector_store = Chroma(


In [4]:
model = OllamaLLM(model="gemma3:27b") 

template = """
You are a cybersecurity analyst reviewing translated chat messages for financial intelligence.

Your task is to extract ONLY the messages that contain **financially relevant information**, such as:

- Payments or money transfers
- Amounts, currencies, or sums of money
- Payment methods (cash, crypto, bank, wallets)
- Account numbers, codes, or financial identifiers
- Mentions of financial roles (payer, payee, client, broker)
- References to suspicious or recurring financial activity

From the messages below:
{chats}

Return ONLY the messages that meet any of the criteria above.

For each relevant message, output:
- The **timestamp** (if provided in the metadata or inferred)
- The **translated content** of the message

Do not summarize. Do not explain. Do not include non-financial content.  
**DO NOT create or infer messages. Use only the exact text from the messages provided. If no messages match, return nothing.**

Output format:
[Time] Message
"""

prompt = ChatPromptTemplate.from_template(template)
chain = prompt | model

In [5]:
question = "Extract only the chat messages that refer to money, payments, currencies, accounts, financial identifiers, or financial discussions"

# Retrieve relevant chats
retrieved_docs = retriever.invoke(question)

# Combine messages with metadata for context
chat_texts = "\n\n".join([
    f"[{doc.metadata['source_file']} - line {doc.metadata['line_number']}] {doc.page_content}"
    for doc in retrieved_docs
])

# Run through the LLM
response = chain.invoke({"chats": chat_texts})

# Output the result
print(response)

[10.csv - line 21] Account TGtadie, phone: 18510867099, name: Wang Ning.






## *Financial Structures Identification via list of terms*

In [3]:
# GPT generated list of financial terms that could be relevant to a dataleak

financial_terms = [
    # Core Financial Infrastructure
    "SWIFT", "IBAN", "ACH", "SEPA", "RTGS", "CHIPS",

    # Payment Systems & Platforms
    "PayPal", "Stripe", "Visa", "Mastercard", "Zelle", "Alipay", "WeChat Pay", "UnionPay",

    # Cryptocurrency & Blockchain
    "Bitcoin", "BTC", "Ethereum", "ETH", "USDT", "Stablecoin", "Token", "Wallet", 
    "Cold wallet", "Hot wallet", "Private key", "Seed phrase", "TXID", "Crypto", 
    "Exchange", "Binance", "Coinbase", "Metamask", "Smart contract", "Airdrop",

    # Salaries, Transfers, and Payroll
    "Salary", "Payroll", "Paycheck", "Wage", "Bonus", "Commission", "Remittance", 
    "Transfer", "Wire transfer", "Bank transfer", "Stipend", "Payout", "Income", 
    "CTC", "Take-home", "Compensation", "Monthly package",

    # General Financial Terms
    "Invoice", "Transaction", "Payment", "Tax", "Loan", "Interest", "Repayment", 
    "Bill", "Balance", "Ledger", "Receipt", "Clearing", "Settlement", "Audit",

    # China-Specific Financial Infrastructure
    "e-CNY", "Digital RMB", "CIPS", "Zhima Credit", "PBOC", "Tenpay", "MYBank",

    # Informal/Shadow Finance
    "Cash out", "Red packet", "Top-up", "Recharge", "Referral bonus", "Middleman"
]


In [8]:
import pandas as pd
import os
import re

In [13]:
terms_lower = [term.lower() for term in financial_terms]
index_df = pd.read_csv('results/updated_with_csv_paths.csv')

matched_rows = []

for chat_path in index_df['chats']:
    if os.path.exists(chat_path):
        try:
            chat_df = pd.read_csv(chat_path)
            chat_df.columns = [col.strip().lower() for col in chat_df.columns]  # Normalize headers

            if 'message_translation' not in chat_df.columns:
                continue

            for i, row in chat_df.iterrows():
                message = str(row.get('message_translation', '')).lower()

                if any(re.search(r'\b' + re.escape(term) + r'\b', message) for term in terms_lower):
                    matched_rows.append({
                        'file': chat_path,
                        'row_index': i,
                        'from': row.get('from', ''),
                        'to': row.get('to', ''),
                        'message_translation': row.get('message_translation', ''),
                    })

        except Exception as e:
            print(f"Failed to read {chat_path}: {e}")
    else:
        print(f"File not found: {chat_path}")

matched_df = pd.DataFrame(matched_rows)
matched_df.to_csv('financial_data_chats.csv', index=False)

## *Salary Analysis*

In [None]:
import pandas as pd
import re

In [24]:
df = pd.read_csv("financial_data_chats.csv")

# Salary-related terms
salary_keywords = [
    "salary", "pay", "wage", "bonus", "compensation", "income", "payroll",
    "stipend", "take-home", "ctc", "payment", "commissions", "remuneration"
]

pattern_string = r'\b(' + '|'.join(map(re.escape, salary_keywords)) + r')\b'
salary_df = df[df['message_translation'].str.contains(pattern_string, na=False, case=False, regex=True)]

print(f"Extracted {len(salary_df)} salary-related messages.")
salary_df.head()

Extracted 207 salary-related messages.


  salary_df = df[df['message_translation'].str.contains(pattern_string, na=False, case=False, regex=True)]


Unnamed: 0,file,row_index,from,to,message_translation
1,csvs/28.csv,19,wxid_xusilpfkh31g21,Shutd0wn,"Last time, C total from Yangzhou said that the..."
2,csvs/28.csv,28,wxid_xusilpfkh31g21,Shutd0wn,"The 500k loan from China Bank, they're asking ..."
5,csvs/1.csv,5,lengmo,Shutd0wn,Regarding the salary adjustments for the heads...
7,csvs/1.csv,200,lengmo,Shutd0wn,Haven't we been tight on funds recently? Many ...
8,csvs/1.csv,237,lengmo,Shutd0wn,It's expensive too... now how many calls a day...


In [28]:
import math
import requests

In [29]:
# Use an LLM to extract relevant info from the df above that holds salary information

def make_prompt(messages):
    message_block = "\n".join([
        f"- Message: {row['message_translation']} (From: {row['from']} → To: {row['to']})"
        for _, row in messages.iterrows()
    ])
    return f"""
You are an forensics analyst auditing internal employee communications for salary-related issues.

Below is a batch of real messages. Analyze only what is written — do not assume anything not stated.

For each bullet point:
- Begin with the sender ID (from 'From')
- Include exact numbers or amounts (if any)
- Summarize the key message in terms of compensation or payment
- Flag complaints, disputes, delays, or bonus mentions

Messages:
{message_block}

Give a structured list of bullet points with clear attribution and data-based content only.
"""

def query_ollama(prompt, model="gemma3:27b"):
    url = "http://localhost:11434/api/generate"
    payload = {
        "model": model,
        "prompt": prompt,
        "stream": False
    }
    try:
        response = requests.post(url, json=payload, timeout=600)
        response.raise_for_status()
        return response.json()["response"].strip()
    except Exception as e:
        return f"[ERROR] {e}"

# Split into batches and process 
batch_size = 30  
n_batches = math.ceil(len(salary_df) / batch_size)

print(f"Processing {len(salary_df)} messages in {n_batches} batches...\n")

full_report = []

for i in range(n_batches):
    batch_df = salary_df.iloc[i * batch_size : (i + 1) * batch_size]
    prompt = make_prompt(batch_df)
    summary = query_ollama(prompt)
    
    print(f"\nBatch {i + 1}/{n_batches} Summary:\n")
    print(summary)
    full_report.append(f"### Batch {i + 1}\n{summary}")

with open("results/full_salary_analysis.txt", "w") as f:
    f.write("\n\n".join(full_report))

Processing 207 messages in 7 batches...


📦 Batch 1/7 Summary:

Here's a structured analysis of the messages, focusing on compensation/payment-related information and flagging issues, as requested:

* **wxid_xusilpfkh31g21:** Mentions a client requiring a 6-month trial before payment.
* **wxid_xusilpfkh31g21:** References a 500k loan from China Bank and owing 22.5 (currency unspecified) from projects. Plans to pay 12.5 first.
* **lengmo:** Asks for input on salary adjustments for Group 2 and Group 3 heads, suggesting referencing Wang Yan’s ratio.
* **lengmo:** Notes tight funds and held-up payment requests.
* **lengmo:** References numerous demands for payment.
* **Shutd0wn:** States no payment received and advance payment isn’t feasible.
* **lengmo:** Discusses a contract signed *after* delivery regarding payment terms.
* **lengmo:** Suggests canceling a contract due to concerns about not receiving the first payment.
* **Shutd0wn:** Expresses intent to request payment.
* **lengmo:** H