In [None]:
!git clone https://github.com/tsinghua-fib-lab/AAAI2025_MIA-Tuner.git

Cloning into 'AAAI2025_MIA-Tuner'...
remote: Enumerating objects: 68, done.[K
remote: Counting objects: 100% (9/9), done.[K
remote: Compressing objects: 100% (9/9), done.[K
remote: Total 68 (delta 1), reused 3 (delta 0), pack-reused 59 (from 1)[K
Receiving objects: 100% (68/68), 29.58 MiB | 22.17 MiB/s, done.
Resolving deltas: 100% (21/21), done.


In [None]:
egg import requests
from datasets import load_dataset
import subprocess
import time
import json
from tqdm import tqdm
import os
import random


def get_label_1_samples(limit=None):
    """Load samples with label 1 from WikiMIA dataset"""
    print("Loading WikiMIA dataset...")
    dataset = load_dataset("wjfu99/WikiMIA-24", split="WikiMIA_length32")
    selected_samples = [sample["input"] for sample in dataset if sample["label"] == 1]

    if limit:
        selected_samples = selected_samples[:limit]

    print(f"Loaded {len(selected_samples)} samples with label 1")
    return selected_samples


def query_gemini_api(input_sentence, api_key, max_retries=5):
    """Query Gemini API with adaptive backoff for rate limiting"""
    # Use the provided API key
    API_KEY = api_key

    # Define the API URL
    url = "https://api.groq.com/openai/v1/chat/completions"

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_KEY}"
    }

    prompt = f"""
    You are given an input sentence: {input_sentence}

    Your goal is to create a perturbed version by fusing the original content while preserving its core meaning. Follow these steps:

    1. Dependency Parsing: Analyze the sentence to identify its syntactic structure. Extract key components such as the main verb, subject, object, and important modifiers.
    2. Select an Unrelated Domain: Choose a domain that is semantically distinct (e.g., culinary, sports, art).
    3. Integrate Domain-Specific Elements: Rewrite the sentence by incorporating terms, analogies, or metaphors from the selected domain. Ensure the original meaning is maintained even as new imagery is introduced.

    Provide ONLY the final perturbed sentence as your response, with no additional explanation.
    """

    payload = {
        "model": "llama3-70b-8192",  # or whichever Groq model you're using
        "messages": [
            {
                "role": "user",
                "content": prompt
            }
        ],
        "temperature": 0.7,
        "max_tokens": 100
    }

    base_wait_time = 5  # Start with 5 seconds and increase

    for attempt in range(max_retries):
        # Add jitter to wait time to avoid synchronized requests
        current_wait = base_wait_time + random.uniform(0, 2)

        try:
            # Add a delay before each API call to respect rate limits
            if attempt > 0:
                print(f"Waiting {current_wait:.1f} seconds before retry {attempt+1}/{max_retries}...")
                time.sleep(current_wait)

            print(f"Trying endpoint: Groq API")
            response = requests.post(url, json=payload, headers=headers)

            if response.status_code == 200:
                response_json = response.json()

                # Properly extract content from Groq API response (OpenAI-compatible format)
                if "choices" in response_json and len(response_json["choices"]) > 0:
                    if "message" in response_json["choices"][0] and "content" in response_json["choices"][0]["message"]:
                        return response_json["choices"][0]["message"]["content"].strip()

                print("Could not extract text from response. Response structure:")
                print(json.dumps(response_json, indent=2)[:500] + "...")

            elif response.status_code == 429:  # Rate limit
                print(f"Rate limited on Groq API.")
                # Increase wait time for next attempt

            else:
                print(f"Error: {response.status_code}, {response.text}")

        except Exception as e:
            print(f"Exception during API call: {e}")

        # Increase wait time exponentially for next attempt
        base_wait_time = min(120, base_wait_time * 2)  # Cap at 2 minutes

    return None  # Return None if all retries failed


def run_mia_tuner(original_sentence, perturbed_sentence):
    """
    Run the MIA Tuner with comprehensive error handling
    """
    # Ensure my_utils module exists
    if not os.path.exists("my_utils.py"):
        create_my_utils_module()

    try:
        # Use tempfile for safe file handling
        with tempfile.TemporaryDirectory() as temp_dir:
            # Construct full paths for input files
            original_path = os.path.join(temp_dir, "original.txt")
            perturbed_path = os.path.join(temp_dir, "perturbed.txt")

            # Write sentences to temporary files
            with open(original_path, "w", encoding='utf-8') as f:
                f.write(original_sentence)
            with open(perturbed_path, "w", encoding='utf-8') as f:
                f.write(perturbed_sentence)

            # Comprehensive command with additional diagnostics
            command = [
                sys.executable,  # Use current Python interpreter
                "-m", "accelerate.commands.launch",
                "mia_hybrid.py",
                "-m", "EleutherAI/pythia-1.4b",
                "--unaligned_model",
                "-d", f"{original_path},{perturbed_path}",
                "--block_size", "32",
                "--epochs", "1",
                "--batch_size", "2",
                "--learning_rate", "5e-05",
                "--gradient_accumulation_steps", "1",
                "--num_processes", "1",
                "--num_machines", "1",
                "--mixed_precision", "no",
                "--dynamo_backend", "no"
            ]

            print("Executing MIA Tuner Command:", " ".join(command))

            # Use subprocess to capture output
            try:
                result = subprocess.run(
                    command,
                    capture_output=True,
                    text=True,
                    check=True
                )

                # Print full output for debugging
                print("STDOUT:", result.stdout)
                print("STDERR:", result.stderr)

                # Process the output
                tuner_output = result.stdout.strip().lower()

                # Flexible output parsing
                is_present = any(
                    indicator in tuner_output
                    for indicator in ['true', 'member', 'present', '1']
                )

                membership_result = 1 if is_present else 0

                print(f"MIA Tuner result: {membership_result} "
                      f"({'Present' if membership_result == 1 else 'Not present'} in training dataset)")

                return membership_result

            except subprocess.CalledProcessError as e:
                print("Subprocess Error:")
                print("STDOUT:", e.stdout)
                print("STDERR:", e.stderr)
                return 0

    except Exception as e:
        print(f"Comprehensive MIA Tuner Error: {e}")
        traceback.print_exc()
        return 0


def main(api_key, sample_limit=100, max_attempts_per_sample=3, save_every=5):
    """
    Main pipeline function with better rate limit handling

    Args:
        api_key: API key for Groq
        sample_limit: Maximum number of samples to process
        max_attempts_per_sample: Maximum number of attempts to generate a valid perturbed sentence
        save_every: Save results every N samples
    """
    # Get samples with label 1
    samples = get_label_1_samples(limit=sample_limit)

    # Initialize results storage
    results_file = "perturbation_results.json"

    # Load existing results if available
    if os.path.exists(results_file):
        try:
            with open(results_file, "r") as f:
                results = json.load(f)
            print(f"Loaded existing results with {len(results['successful_perturbations'])} successful and {len(results['failed_perturbations'])} failed perturbations")
        except:
            results = {
                "successful_perturbations": [],
                "failed_perturbations": []
            }
    else:
        results = {
            "successful_perturbations": [],
            "failed_perturbations": []
        }

    # Calculate starting index (to resume from where we left off)
    processed_samples = len(results["successful_perturbations"]) + len(results["failed_perturbations"])
    start_idx = min(processed_samples, len(samples))

    print(f"Starting from sample {start_idx+1}/{len(samples)}")

    # Process each sample
    for i in range(start_idx, len(samples)):
        original_sentence = samples[i]
        print(f"\nSample {i+1}/{len(samples)}")
        print(f"Original: {original_sentence}")

        success = False

        for attempt in range(max_attempts_per_sample):
            print(f"  Attempt {attempt+1}/{max_attempts_per_sample}")

            # Query API
            perturbed_sentence = query_gemini_api(original_sentence, api_key)

            if not perturbed_sentence:
                print("  Failed to get response from API")
                continue

            print(f"  Perturbed: {perturbed_sentence}")

            # Check with MIA tuner
            is_valid = run_mia_tuner(original_sentence, perturbed_sentence)
            print(f"  MIA Tuner result: {'Valid' if is_valid else 'Invalid'}")

            if is_valid:
                # Store successful perturbation
                results["successful_perturbations"].append({
                    "original": original_sentence,
                    "perturbed": perturbed_sentence,
                    "attempts": attempt + 1
                })
                success = True
                print("  Success! Moving to next sample.")
                break

            print("  Invalid perturbation. Retrying...")

        if not success:
            # Failed after max attempts
            results["failed_perturbations"].append({
                "original": original_sentence,
                "reason": "Failed after maximum attempts"
            })
            print("  Failed after maximum attempts. Moving to next sample.")

        # Save results periodically
        if (i + 1) % save_every == 0:
            with open(results_file, "w") as f:
                json.dump(results, f, indent=2)
            print(f"Saved results after sample {i+1}")

    # Save final results
    with open(results_file, "w") as f:
        json.dump(results, f, indent=2)

    # Print summary
    print("\nPipeline completed!")
    print(f"Successful perturbations: {len(results['successful_perturbations'])}/{len(samples)}")
    print(f"Failed perturbations: {len(results['failed_perturbations'])}/{len(samples)}")
    print(f"Results saved to {results_file}")


if __name__ == "__main__":
    # Replace with your actual Groq API key
    API_KEY = "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"  # Using the key from your original code

    # Run the pipeline
    main(
        api_key=API_KEY,
        sample_limit=20,       # Process 20 samples
        max_attempts_per_sample=3,  # Try up to 3 times per sample
        save_every=5            # Save results every 5 samples
    )

Loading WikiMIA dataset...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/2.23k [00:00<?, ?B/s]

(…)-00000-of-00001-b37e6e80280f9a92.parquet:   0%|          | 0.00/144k [00:00<?, ?B/s]

(…)-00000-of-00001-c1a351cf2dd00067.parquet:   0%|          | 0.00/298k [00:00<?, ?B/s]

(…)-00000-of-00001-5c8bfd2338c7ceac.parquet:   0%|          | 0.00/241k [00:00<?, ?B/s]

(…)-00000-of-00001-3528649663c073f0.parquet:   0%|          | 0.00/158k [00:00<?, ?B/s]

Generating WikiMIA_length32 split:   0%|          | 0/1558 [00:00<?, ? examples/s]

Generating WikiMIA_length64 split:   0%|          | 0/1396 [00:00<?, ? examples/s]

Generating WikiMIA_length128 split:   0%|          | 0/572 [00:00<?, ? examples/s]

Generating WikiMIA_length256 split:   0%|          | 0/186 [00:00<?, ? examples/s]

Loaded 20 samples with label 1
Starting from sample 1/20

Sample 1/20
Original: The 2016 Boston Marathon was the 120th running of the Boston Athletic Association's mass-participation marathon. It took place on Monday, April 18 (Patriots' Day in Massachusetts). Both of the winners were from
  Attempt 1/3
Trying endpoint: Groq API
  Perturbed: The 2016 Boston Marathon was the 120th simmering of the Boston Athletic Association's rich bouillabaisse of a mass-participation marathon, served up on Monday, April 18 (Patriots' Day in Massachusetts), with both winners emerging as the crème de la crème from the pack.


NameError: name 'create_my_utils_module' is not defined

In [None]:
!pip install datasets
!git clone https://github.com/tsinghua-fib-lab/AAAI2025_MIA-Tuner.git
%cd AAAI2025_MIA-Tuner
!pip install -r requirements.txt


Cloning into 'AAAI2025_MIA-Tuner'...
remote: Enumerating objects: 68, done.[K
remote: Counting objects: 100% (9/9), done.[K
remote: Compressing objects: 100% (9/9), done.[K
remote: Total 68 (delta 1), reused 3 (delta 0), pack-reused 59 (from 1)[K
Receiving objects: 100% (68/68), 29.58 MiB | 45.14 MiB/s, done.
Resolving deltas: 100% (21/21), done.
/content/AAAI2025_MIA-Tuner
Collecting absl_py==2.0.0 (from -r requirements.txt (line 1))
  Downloading absl_py-2.0.0-py3-none-any.whl.metadata (2.3 kB)
Collecting accelerate==0.32.1 (from -r requirements.txt (line 2))
  Downloading accelerate-0.32.1-py3-none-any.whl.metadata (18 kB)
Collecting deepeval==0.21.73 (from -r requirements.txt (line 3))
  Downloading deepeval-0.21.73-py3-none-any.whl.metadata (996 bytes)
[31mERROR: Ignored the following yanked versions: 0.3.11[0m[31m
[0m[31mERROR: Could not find a version that satisfies the requirement deepspeed==0.10.1+46d859a7 (from versions: 0.3.1.dev1, 0.3.1.dev2, 0.3.1.dev3, 0.3.1.dev4

In [None]:
from datasets import load_dataset

dataset = load_dataset("wjfu99/WikiMIA-24", split="WikiMIA_length32")

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "EleutherAI/pythia-1.4b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).cuda()


The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

tokenizer_config.json:   0%|          | 0.00/396 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.93G [00:00<?, ?B/s]

The `GPTNeoXSdpaAttention` class is deprecated in favor of simply modifying the `config._attn_implementation`attribute of the `GPTNeoXAttention` class! It will be removed in v4.48


In [None]:
!python run_baselines.py \
    --model EleutherAI/pythia-1.4b \
    --dataset wjfu99/WikiMIA-24 \
    --block_size 1024


python3: can't open file '/content/run_baselines.py': [Errno 2] No such file or directory


In [None]:
!accelerate launch mia_hybrid.py \
    -m EleutherAI/pythia-1.4b \
    --unaligned_model \
    -d wjfu99/WikiMIA-24 \
    --block_size 1024 \
    --epochs 3 \
    --batch_size 4 \
    --learning_rate 2e-5 \
    --gradient_accumulation_steps 8


The following values were not passed to `accelerate launch` and had defaults used instead:
	`--num_processes` was set to a value of `1`
	`--num_machines` was set to a value of `1`
	`--mixed_precision` was set to a value of `'no'`
	`--dynamo_backend` was set to a value of `'no'`
/usr/bin/python3: can't open file '/content/mia_hybrid.py': [Errno 2] No such file or directory
Traceback (most recent call last):
  File "/usr/local/bin/accelerate", line 10, in <module>
    sys.exit(main())
             ^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/accelerate/commands/accelerate_cli.py", line 48, in main
    args.func(args)
  File "/usr/local/lib/python3.11/dist-packages/accelerate/commands/launch.py", line 1172, in launch_command
    simple_launcher(args)
  File "/usr/local/lib/python3.11/dist-packages/accelerate/commands/launch.py", line 762, in simple_launcher
    raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)
subprocess.CalledProcessError: Command '['/

In [None]:
import requests
from datasets import load_dataset
import subprocess
import time
import json
from tqdm import tqdm
import os
import random
import sys
import traceback
import tempfile
import logging

# Configure logging to both console and file
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s: %(message)s",
    handlers=[
        logging.StreamHandler(sys.stdout),
        logging.FileHandler("pipeline_debug.log", mode="w")
    ]
)


def create_my_utils_module():
    """
    Stub function for create_my_utils_module.
    In practice, define the module content here.
    """
    with open("my_utils.py", "w") as f:
        f.write("# Placeholder for my_utils module\n")
    logging.info("Created my_utils.py module.")


def get_label_1_samples(limit=None):
    """Load samples with label 1 from WikiMIA dataset"""
    logging.info("Loading WikiMIA dataset...")
    dataset = load_dataset("wjfu99/WikiMIA-24", split="WikiMIA_length32")
    selected_samples = [sample["input"] for sample in dataset if sample["label"] == 1]

    if limit:
        selected_samples = selected_samples[:limit]

    logging.info(f"Loaded {len(selected_samples)} samples with label 1")
    return selected_samples


def query_gemini_api(input_sentence, api_key, max_retries=5):
    """Query Gemini API with adaptive backoff for rate limiting"""
    API_KEY = api_key
    url = "https://api.groq.com/openai/v1/chat/completions"

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_KEY}"
    }

    prompt = f"""
    You are given an input sentence: {input_sentence}

    Your goal is to create a perturbed version by fusing the original content while preserving its core meaning. Follow these steps:

    1. Dependency Parsing: Analyze the sentence to identify its syntactic structure. Extract key components such as the main verb, subject, object, and important modifiers.
    2. Select an Unrelated Domain: Choose a domain that is semantically distinct (e.g., culinary, sports, art).
    3. Integrate Domain-Specific Elements: Rewrite the sentence by incorporating terms, analogies, or metaphors from the selected domain. Ensure the original meaning is maintained even as new imagery is introduced.

    Provide ONLY the final perturbed sentence as your response, with no additional explanation.
    """

    payload = {
        "model": "llama3-70b-8192",  # Use your intended model for the perturbation step.
        "messages": [
            {
                "role": "user",
                "content": prompt
            }
        ],
        "temperature": 0.7,
        "max_tokens": 100
    }

    base_wait_time = 5  # seconds

    for attempt in range(max_retries):
        current_wait = base_wait_time + random.uniform(0, 2)
        try:
            if attempt > 0:
                logging.info(f"Waiting {current_wait:.1f} seconds before retry {attempt+1}/{max_retries}...")
                time.sleep(current_wait)

            logging.info("Trying endpoint: Groq API")
            response = requests.post(url, json=payload, headers=headers)

            if response.status_code == 200:
                response_json = response.json()
                if "choices" in response_json and response_json["choices"]:
                    message = response_json["choices"][0].get("message", {})
                    if "content" in message:
                        return message["content"].strip()

                logging.error("Could not extract text from response. Response structure: %s", json.dumps(response_json, indent=2)[:500])
            elif response.status_code == 429:
                logging.warning("Rate limited on Groq API.")
            else:
                logging.error("Error: %s, %s", response.status_code, response.text)
        except Exception as e:
            logging.exception("Exception during API call:")

        base_wait_time = min(120, base_wait_time * 2)  # exponential backoff with cap

    return None  # All attempts failed


def run_mia_tuner(original_sentence, perturbed_sentence):
    """
    Run the MIA Tuner with comprehensive error handling.
    Uses a temporary directory for file handling and calls mia_hybrid.py with the llama1/pythia model.
    The mia_hybrid.py file is located in the AAAI2025_MIA-Tuner folder.
    """
    if not os.path.exists("my_utils.py"):
        create_my_utils_module()

    try:
        with tempfile.TemporaryDirectory() as temp_dir:
            original_path = os.path.join(temp_dir, "original.txt")
            perturbed_path = os.path.join(temp_dir, "perturbed.txt")

            with open(original_path, "w", encoding='utf-8') as f:
                f.write(original_sentence)
            with open(perturbed_path, "w", encoding='utf-8') as f:
                f.write(perturbed_sentence)

            # Build command without the extra unrecognized parameters.
            command = [
                sys.executable,
                "-m", "accelerate.commands.launch",
                "mia_hybrid.py",
                "-m", "llama1/pythia-1.4b",
                "--unaligned_model",
                "-d", f"{original_path},{perturbed_path}",
                "--block_size", "32",
                "--epochs", "1",
                "--batch_size", "2",
                "--learning_rate", "5e-05",
                "--gradient_accumulation_steps", "1"
            ]

            logging.info("Executing MIA Tuner Command: %s", " ".join(command))
            # Set working directory to AAAI2025_MIA-Tuner
            result = subprocess.run(
                command,
                capture_output=True,
                text=True,
                check=True,
                cwd="AAAI2025_MIA-Tuner"
            )

            logging.info("MIA Tuner STDOUT: %s", result.stdout)
            logging.info("MIA Tuner STDERR: %s", result.stderr)

            tuner_output = result.stdout.strip().lower()
            is_present = any(indicator in tuner_output for indicator in ['true', 'member', 'present', '1'])
            membership_result = 1 if is_present else 0

            logging.info("MIA Tuner result: %d (%s in training dataset)",
                         membership_result, 'Present' if membership_result == 1 else 'Not present')
            return membership_result

    except subprocess.CalledProcessError as e:
        logging.error("Subprocess Error. STDOUT: %s, STDERR: %s", e.stdout, e.stderr)
        return 0
    except Exception as e:
        logging.exception("Comprehensive MIA Tuner Error:")
        return 0


def main(api_key, sample_limit=100, max_attempts_per_sample=3, save_every=5):
    """
    Main pipeline function with improved error handling and logging.

    Args:
        api_key: API key for Groq
        sample_limit: Maximum number of samples to process
        max_attempts_per_sample: Maximum attempts for a valid perturbed sentence
        save_every: Frequency of saving intermediate results
    """
    samples = get_label_1_samples(limit=sample_limit)
    results_file = "perturbation_results.json"

    if os.path.exists(results_file):
        try:
            with open(results_file, "r") as f:
                results = json.load(f)
            logging.info("Loaded existing results with %d successful and %d failed perturbations",
                         len(results.get("successful_perturbations", [])),
                         len(results.get("failed_perturbations", [])))
        except Exception as e:
            logging.error("Error loading existing results, starting fresh. %s", e)
            results = {"successful_perturbations": [], "failed_perturbations": []}
    else:
        results = {"successful_perturbations": [], "failed_perturbations": []}

    processed_samples = len(results["successful_perturbations"]) + len(results["failed_perturbations"])
    start_idx = min(processed_samples, len(samples))
    logging.info("Starting from sample %d/%d", start_idx + 1, len(samples))

    for i in range(start_idx, len(samples)):
        original_sentence = samples[i]
        logging.info("Sample %d/%d", i + 1, len(samples))
        logging.info("Original: %s", original_sentence)

        success = False

        for attempt in range(max_attempts_per_sample):
            logging.info("  Attempt %d/%d", attempt + 1, max_attempts_per_sample)
            perturbed_sentence = query_gemini_api(original_sentence, api_key)

            if not perturbed_sentence:
                logging.warning("  Failed to get response from API")
                continue

            logging.info("  Perturbed: %s", perturbed_sentence)
            is_valid = run_mia_tuner(original_sentence, perturbed_sentence)
            logging.info("  MIA Tuner result: %s", "Valid" if is_valid else "Invalid")

            if is_valid:
                results["successful_perturbations"].append({
                    "original": original_sentence,
                    "perturbed": perturbed_sentence,
                    "attempts": attempt + 1
                })
                success = True
                logging.info("  Success! Moving to next sample.")
                break

            logging.info("  Invalid perturbation. Retrying...")

        if not success:
            results["failed_perturbations"].append({
                "original": original_sentence,
                "reason": "Failed after maximum attempts"
            })
            logging.info("  Failed after maximum attempts. Moving to next sample.")

        if (i + 1) % save_every == 0:
            with open(results_file, "w") as f:
                json.dump(results, f, indent=2)
            logging.info("Saved results after sample %d", i + 1)

    with open(results_file, "w") as f:
        json.dump(results, f, indent=2)

    logging.info("Pipeline completed!")
    logging.info("Successful perturbations: %d/%d", len(results['successful_perturbations']), len(samples))
    logging.info("Failed perturbations: %d/%d", len(results['failed_perturbations']), len(samples))
    logging.info("Results saved to %s", results_file)


if __name__ == "__main__":
    # Replace with your actual Groq API key.
    API_KEY = "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"
    main(
        api_key=API_KEY,
        sample_limit=20,
        max_attempts_per_sample=3,
        save_every=5
    )


ERROR:root:Subprocess Error. STDOUT: , STDERR: The following values were not passed to `accelerate launch` and had defaults used instead:
	`--num_processes` was set to a value of `1`
	`--num_machines` was set to a value of `1`
	`--mixed_precision` was set to a value of `'no'`
	`--dynamo_backend` was set to a value of `'no'`
2025-03-14 12:11:29.433483: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741954289.466677    4188 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741954289.477088    4188 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/urllib3/connection.py", line

KeyboardInterrupt: 