Step 1: 
- Import df
- df = 'url', 'name', 'price', 'color', 'images', 'image link'

In [166]:
import pandas as pd

# CAREFUL WHEN REDUCING:
#       Need to delete faiss_index.bin 
# downloaded = 500
num_samples = 100

# Load dataset
df = pd.read_csv("products.csv").head(num_samples)

# Keep only relevant columns
df = df[['url', 'name', 'price', 'color', 'images', 'image link']]

# Display first few rows
print(df.head())

                                                 url  \
0  https://www.asos.com/new-look/new-look-trench-...   
1  https://www.asos.com/stradivarius/stradivarius...   
2  https://www.asos.com/jdy/jdy-oversized-trench-...   
3  https://www.asos.com/nike-running/nike-running...   
4  https://www.asos.com/asos-curve/asos-design-cu...   

                                                name  price    color  \
0                      New Look trench coat in camel  49.99  Neutral   
1     Stradivarius double breasted wool coat in grey  59.99     GREY   
2                 JDY oversized trench coat in stone  45.00    STONE   
3                 Nike Running hooded jacket in pink  84.95     Pink   
4  ASOS DESIGN Tall linen mix trench coat in natural  75.00  Natural   

                                              images  \
0  ['https://images.asos-media.com/products/new-l...   
1  ['https://images.asos-media.com/products/strad...   
2  ['https://images.asos-media.com/products/jdy-o...   
3  ['h

Step 2: (takes time)
- Download image
- Extract description
- Generate description
- Apply LLaVa
- df = 'url', 'name', 'price', 'color', 'images', 'image link' + description, image path

In [167]:
import os
import subprocess
import requests
import shlex 
import re

# Define LLaVA paths (modify these based on your system)
LLAVA_CLI_PATH = "/Users/glennsuristio/Documents/Projects/dressAI/llama.cpp/llama-llava-cli"
LLAVA_MODEL_PATH = "/Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf"
MM_PROJ_PATH = "/Users/glennsuristio/Documents/Projects/dressAI/llama.cpp/vit/mmproj-model-f16.gguf"
IMAGES_DATA = "./images_data"
DESCRIPTIONS_DATA = "./descriptions_data"

# Ensure temp directory exists
os.makedirs(IMAGES_DATA, exist_ok=True)
os.makedirs(DESCRIPTIONS_DATA, exist_ok=True)

def download_image(image_url, filename):
    """Downloads an image from a URL if it doesn't already exist."""
    if os.path.exists(filename):
        print(f"Image already exists: {filename}")
        return filename
    
    """Downloads an image from a URL and saves it locally."""
    response = requests.get(image_url, stream=True)
    if response.status_code == 200:
        with open(filename, 'wb') as file:
            for chunk in response.iter_content(1024):
                file.write(chunk)

        print(f"Downloaded: {filename}")
        return filename
    else:
        print(f"Failed to download image: {image_url}")
        return None

def extract_description(output_file):
    """Extracts only the relevant product description from LLaVA output."""
    with open(output_file, "r") as f:
        lines = f.readlines()

    # Find the starting point of the description
    start_index = None
    for i, line in enumerate(lines):
        if "encode_image_with_clip: image encoded" in line:
            start_index = i + 1  # Description starts on the next line
            break

    # Extract everything after the start_index
    if start_index is not None and start_index < len(lines):
        return " ".join(lines[start_index:]).strip()
    else:
        return "Description not found"

def sanitize_filename(name):
    """Replaces special characters that are invalid in filenames."""
    name = name.replace(' ', '_')  # Replace spaces with underscores
    return re.sub(r'[\\/:"*?<>|]', '_', name)  # Replace `/ \ : " * ? < > |` with `_`

def generate_description(image_url, name, color):
    """Generates a textual description using LLaVA for a given fashion product image."""
    safe_name = sanitize_filename(name)
    image_path = os.path.join(IMAGES_DATA, f"{safe_name}.jpg")
    description_path = os.path.join(DESCRIPTIONS_DATA, f"{safe_name}.txt")

    # If description file already exists, read from it
    if os.path.exists(description_path):
        with open(description_path, "r") as file:
            existing_description = file.read().strip()

        if existing_description and existing_description != "Description not found":
            print(f"✅ Using existing description for {name}")
            return existing_description
        else:
            print(f"🔄 Regenerating description for {name} (previously invalid)")

        # print(f"Reading existing description for {name}")
        # return open(description_path, "r").read()

    downloaded_image = download_image(image_url, image_path)
    if not downloaded_image:
        return "Image not available"
    
    output_file = "llava_output.txt"
    
#     prompt = f"""{image_path}
# USER:
# Describe the {color} {name} in this image in detail.
# - Focus on its fabric, style, and patterns.
# - Ignore other clothing items other than {color} {name} in the image.
# - Do NOT add any extra information other than the description.
# - Write the response as a single detailed paragraph. Do not use bullet points.
# - Avoid listing features separately; instead, describe the product naturally in a flowing sentence.

# ASSISTANT:
# """
    
    prompt = f"""
USER:
Describe the {color} {name} in this image in detail.
- Focus on its **fabric, style, patterns, and overall aesthetic**.
- Mention the **fit** (e.g., loose, tight, relaxed), **comfort level**, and **mobility**.
- Describe the **material and texture** (e.g., soft cotton, thick wool, waterproof fabric).
- Indicate whether it is **suitable for certain weather conditions** (e.g., breathable for summer, ideal for rainy days).
- Suggest occasions it is best suited for (e.g., casual, formal, date night, outdoor wear, business attire).
- Optionally mention what it might **pair well with** (e.g., jeans, sneakers, high heels, trench coat).
- Ignore other clothing items in the image and focus only on the {color} {name}.
- Do NOT add any extra information outside the description.
- Write the response as a **single paragraph** with **natural, flowing sentences**.

ASSISTANT:
"""


    command = f'{LLAVA_CLI_PATH} -m {LLAVA_MODEL_PATH} --mmproj {MM_PROJ_PATH} --image {shlex.quote(image_path)} -c 4096 -p "{prompt}" > {output_file}'
    print(f"Running Command: {command}")  # Debugging
    
    process = subprocess.run(command, shell=True, capture_output=True, text=True)
    
    print("STDOUT:", process.stdout)  # Debugging
    print("STDERR:", process.stderr)  # Debugging
    
    # Extract clean description
    description = extract_description(output_file)

    # Save the description to a file
    with open(description_path, "w") as desc_file:
        desc_file.write(description)
        
    print(f"Generated Description for {name}: {description}")
    return description

# Apply LLaVA on limited products
df['description'] = df.apply(lambda row: generate_description(row['image link'], row['name'], row['color']), axis=1)
df['image_path'] = df.apply(lambda row: os.path.join(IMAGES_DATA, f"{row['name'].replace(' ', '_')}.jpg"), axis=1)

print("Descriptions generated and saved!")


Image already exists: ./images_data/New_Look_trench_coat_in_camel.jpg
Running Command: /Users/glennsuristio/Documents/Projects/dressAI/llama.cpp/llama-llava-cli -m /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf --mmproj /Users/glennsuristio/Documents/Projects/dressAI/llama.cpp/vit/mmproj-model-f16.gguf --image ./images_data/New_Look_trench_coat_in_camel.jpg -c 4096 -p "
USER:
Describe the Neutral New Look trench coat in camel in this image in detail.
- Focus on its **fabric, style, patterns, and overall aesthetic**.
- Mention the **fit** (e.g., loose, tight, relaxed), **comfort level**, and **mobility**.
- Describe the **material and texture** (e.g., soft cotton, thick wool, waterproof fabric).
- Indicate whether it is **suitable for certain weather conditions** (e.g., breathable for summer, ideal for rainy days).
- Suggest occasions it is best suited for (e.g., casual, formal, date night, outdoor wear, business attire).
-

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


STDOUT: 
STDERR: build: 3943 (cda0e4b6) with Apple clang version 14.0.3 (clang-1403.0.22.14.1) for arm64-apple-darwin22.4.0
llama_load_model_from_file: using device Metal (Apple M2 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 32 key-value pairs and 291 tensors from /Users/glennsuristio/Documents/Projects/dressAI/llava-v1.6-mistral-7b/Mistral-7B-Instruct-v0.2-F32-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral 7B Instruct v0.2
llama_model_loader: - kv   2:                            general.version str              = v0.2
llama_model_loader: - kv   3:                       general.organization str              = Mistralai
llama_model_loader: - kv   4:                           general.f

Step 3: (takes time)
- Generate embedding with MiniLM-L6-v2
- Convert embeddings into FAISS index

In [None]:
import os
import faiss
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer

# Directory for FAISS index
FAISS_DIR = "faiss_data"
os.makedirs(FAISS_DIR, exist_ok=True)

# File path for FAISS index
FAISS_INDEX_FILE = os.path.join(FAISS_DIR, "faiss_index.bin")

# Load embedding model
embedding_model = SentenceTransformer("paraphrase-MiniLM-L6-v2")

# Step 1: Load FAISS index if it exists
if os.path.exists(FAISS_INDEX_FILE):
    print("Loading existing FAISS index...")
    index = faiss.read_index(FAISS_INDEX_FILE)
    num_existing = index.ntotal  # Number of stored products in FAISS

    # Retrieve stored embeddings only if FAISS has indexed products
    if num_existing > 0:
        existing_embeddings = np.zeros((num_existing, index.d), dtype=np.float32)
        index.reconstruct_batch(np.arange(num_existing), existing_embeddings)  # Batch retrieval

        # Assign stored embeddings back to df
        df.loc[df.index[:num_existing], "embedding"] = pd.Series(list(existing_embeddings))

    # Identify new products that don't have an index yet
    new_products = df[df["embedding"].isna()]

else:
    print("Creating new FAISS index...")
    index = None  # Placeholder for FAISS index
    new_products = df  # All products are new

# Step 2: Generate embeddings ONLY for new products
if not new_products.empty:
    print(f"Found {len(new_products)} new products. Updating FAISS index...")

    # 🚀 **Batch Encode New Descriptions**
    new_embeddings = embedding_model.encode(
        new_products["description"].tolist(),
        batch_size=32,
        convert_to_numpy=True
    ).astype(np.float32)  # Ensure correct dtype

    new_embeddings = np.array(new_embeddings, dtype=np.float32)

    # Store embeddings in df using `.loc`
    df.loc[new_products.index, "embedding"] = pd.Series(list(new_embeddings), index=new_products.index)


    # Convert to FAISS format
    new_embeddings_array = np.vstack(df.loc[new_products.index, "embedding"].to_numpy())

    new_embeddings_array = np.array(new_embeddings_array, dtype=np.float32)

    # Add new embeddings to FAISS
    if index is None:
        index = faiss.IndexFlatL2(new_embeddings_array.shape[1])  # Create FAISS index
    index.add(new_embeddings_array)

    # Save updated FAISS index
    faiss.write_index(index, FAISS_INDEX_FILE)
    print("FAISS index updated with new products!")

else:
    print("No new products found. Using existing FAISS index.")

print("FAISS index ready & embeddings stored in df!")


🆕 Creating new FAISS index...
🆕 Found 100 new products. Updating FAISS index...
✅ FAISS index updated with new products!
🚀 FAISS index ready & embeddings stored in df!


Step 4:
- Retrieve top 3 products with cosine similarity

In [192]:
import numpy as np
import faiss
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

def retrieve_relevant_products(query, top_k=3):
    """
    Hybrid retrieval: Combines FAISS (dense) and TF-IDF (sparse) for better search.
    """
    #Convert query to FAISS embedding
    query_embedding = embedding_model.encode([query], convert_to_numpy=True).reshape(1, -1)

    #Retrieve from FAISS
    top_k_faiss = top_k * 3  # Retrieve more to rerank better
    distances, indices = index.search(query_embedding.astype(np.float32), top_k_faiss)
    retrieved_products = df.iloc[indices[0]].copy()  # Use .copy() to avoid modifying original df

    #Compute cosine similarity for reranking
    product_embeddings = np.vstack(retrieved_products["embedding"].to_numpy())
    similarity_scores = cosine_similarity(query_embedding, product_embeddings)[0]
    retrieved_products["similarity"] = similarity_scores

    #Sparse Retrieval Using TF-IDF
    vectorizer = TfidfVectorizer(stop_words="english")
    tfidf_matrix = vectorizer.fit_transform(df["name"] + " " + df["description"])  # Use both name & description
    query_vector = vectorizer.transform([query])

    #Compute TF-IDF similarity scores for retrieved products only
    sparse_scores = np.array((tfidf_matrix[retrieved_products.index] @ query_vector.T).todense()).flatten()

    #Assign sparse scores only to retrieved products
    retrieved_products["sparse_score"] = sparse_scores

    # Normalize scores and rerank
    retrieved_products["final_score"] = (
        0.7 * retrieved_products["similarity"] +  # Dense search weight
        0.3 * retrieved_products["sparse_score"]  # Sparse search weight
    )
    retrieved_products = retrieved_products.sort_values("final_score", ascending=False)

    return retrieved_products.head(top_k)


Step 5:
- Generate rag text response
- Pass in result details

In [195]:
import gradio as gr
import ollama 

def generate_rag_response(user_query):
    """
    Uses Ollama to generate structured product recommendations with explanations, styling tips, and alternative suggestions.
    """
    retrieved_products = retrieve_relevant_products(user_query, top_k=3)

    if retrieved_products.empty:
        return []

    #Create a formatted product list for AI to process
    product_list = "\n\n".join(
        [
            f"*Name:* {row['name']}\n"
            f"*Description:* {row['description']}\n"
            for i, row in retrieved_products.iterrows()
        ]
    )

    #Construct the AI prompt
    prompt = f"""
User Query: "{user_query}"

The following fashion products were retrieved as the most relevant matches:

{product_list}

Act as a fashion AI assistant. For each product, explain why it was chosen, how it matches the user's request, and provide styling tips.
Respond with a brief and short structured paragraph for each product.
Don't display the name of the product in your response.
Just give your explanations as instructed.
"""

    #Generate response with Ollama
    response = ollama.chat(
        model="mistral", messages=[{"role": "user", "content": prompt}]
    )
    print(response)
    response_text = response["message"]["content"]

    #Split the AI response into separate product explanations
    explanations = response_text.split("\n\n")  # Splitting paragraphs for individual products
    
    #Format structured output for the chat
    structured_recommendations = []
    for (index, row), explanation in zip(retrieved_products.iterrows(), explanations):
        structured_recommendations.append(
            {
                "text": f"**{row['name']}**\n\n**${row['price']}**\n\n{explanation[3:]}\n\n🔗 [View Product]({row['url']})",
                "image": row["image link"],
                "images": row["images"]
            }
        )

    return structured_recommendations  # Return structured list of text + images


Step 6: 
- Chat function for Gradio

In [196]:
import ast

def chat_fashion_assistant(user_input, history):
    """
    Processes user query and returns structured product recommendations with AI-generated explanations.
    """
    #Get AI-generated recommendations
    recommendations = generate_rag_response(user_input)

    if not recommendations:
        return [{"role": "assistant", "content": "No matching products found."}]

    #Create structured response list
    response_list = []
    for rec in recommendations:
        images = ast.literal_eval(rec["images"])
        
        response_list.append({"role": "assistant", "content": rec["text"]})  # AI explanation
        response_list.append({"role": "assistant", "content": gr.Gallery(images[:4], 
                                                                        columns=4, 
                                                                        rows=1, 
                                                                        object_fit="cover", 
                                                                        height="automatic",
                                                                        allow_preview=True)
                                                                        }) 

        response_list.append({"role": "assistant", "content": "\n\n\n\n\n\n"})  # Adds a horizontal divider and spacing

        # response_list.append(gr.Image(rec["image"]))  # Product image

    return response_list  #Returning structured chat messages


Step 7: 
- Gradio UI

In [None]:
chat_ui = gr.ChatInterface(
    fn=chat_fashion_assistant,
    title="🛍️ ShopGPT",
    description="Describe what you're looking for and get personalized recommendations!",
    type="messages",  # Uses Gradio's structured chat format
    theme='allenai/gradio-theme'
)

chat_ui.launch(share=True)


* Running on local URL:  http://127.0.0.1:7877


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


* Running on public URL: https://24c2c98cc94807c43d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


