In [None]:
!ollama list

In [None]:
import requests
try:
    r = requests.get("http://localhost:11434/api/tags", timeout=5)
    print("Ollama reachable")
    print("Available models:", [m["name"] for m in r.json().get("models", [])])
except Exception as e:
    print("Ollama NOT reachable ", e)

In [None]:
import requests
import os
import random
import time
from datetime import datetime

input_prompts = [
    "Smile", "Joy", "Sadness", "Anger", "Fear", "Love", "Hope", "Peace", "Calm", "Rage", "Happy", "Excited", "Nervous", "Confident", "Worried", "Grateful", "Lonely", "Pride",
    "Apple", "Car", "House", "Tree", "Ocean", "Mountain", "Book", "Phone", "Computer", "Chair",  "Table", "Window", "Door", "Key", "Lamp", "Mirror", "Painting", "Clock", "Flower", "Stone",
    "Run", "Jump", "Dance", "Sleep", "Think", "Write", "Read", "Listen", "Watch", "Create",  "Build", "Destroy", "Help", "Learn", "Teach", "Play", "Work", "Rest", "Travel", "Explore",
    "Freedom", "Justice", "Truth", "Beauty", "Wisdom", "Courage", "Honor", "Faith", "Trust", "Mystery",  "Future", "Past", "Present", "Infinity", "Nothing", "Everything", "Reality", "Dream", "Memory", "Imagination",
    "Red", "Blue", "Green", "Yellow", "Purple", "Orange", "Black", "White", "Pink", "Brown", "Cat", "Dog", "Bird", "Fish", "Lion", "Tiger", "Elephant", "Horse", "Rabbit", "Snake", "Pizza", "Cake", "Bread", "Milk", "Coffee", "Tea", "Rice", "Pasta", "Soup", "Salad",
    "Algorithm", "Database", "Network", "Security", "Interface", "Protocol", "Framework", "Library",  "Function", "Variable", "Loop", "Array", "Object", "Class", "Method", "API",
    "A", "B", "C", "X", "Y", "Z",  "One", "Two", "Five", "Ten", "Hundred", "Thousand", "Million", "Zero", "", " ", ".", "?", "!", "...", "???", "The", "And", "Or", "But", "If", "When", "Why", "How", "What",
    "Quantum", "Gravity", "Energy", "Matter", "Space", "Time", "Evolution", "DNA", "Atom", "Universe","Existence", "Consciousness", "Soul", "Mind", "Body", "Spirit", "Ethics", "Morality", "Purpose", "Meaning"
]

In [None]:
def save_inference(input_prompt, output_text, file_number):
    """Save inference to a specific file"""
    filename = f"inference/inference_data_{file_number:04d}.txt"
    
    with open(filename, "a", encoding="utf-8") as file:
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        file.write(f"Timestamp: {timestamp}\n")
        file.write(f"Input: {input_prompt}\n")
        file.write(f"Output: {output_text}\n")
        file.write("-" * 80 + "\n")

This make_inference function sends a text prompt to a local Ollama AI model (gpt-oss:20b) via HTTP POST request and returns the generated response text.
It includes retry logic with exponential backoff to handle connection failures, attempting up to 3 times before giving up and returning an error message.

In [None]:
def make_inference(selected_prompt):
    """Send one prompt to the model and return text or error string."""
    url = "http://localhost:11434/api/generate"
    data = {
        "model": "gpt-oss:20b",
        "prompt": selected_prompt,
        "stream": False,
        "raw": True,
        "options": {
            "temperature": 0.7,
            "num_predict": 4000,
            "num_ctx": 2100,
            "top_p": 0.9
        }
    }
    max_retries = 3
    for attempt in range(max_retries):
        try:
            response = requests.post(url, json=data, timeout=180)
            result = response.json()
            return result.get("response", "ERROR: No response")
        except Exception as e:
            if attempt < max_retries - 1:
                wait_time = (2 ** attempt) * 5  
                print(f"Attempt {attempt + 1} failed, retrying in {wait_time}s...")
                time.sleep(wait_time)
            else:
                return f"ERROR after {max_retries} attempts: {str(e)}"

In [None]:
total_inferences = 1_000_000
inferences_per_file = 1000 
os.makedirs("inference", exist_ok=True)
print(f"Starting {total_inferences:,} inferences...")
print(f"Saving {inferences_per_file} inferences per file")
print(f"Total prompts available: {len(input_prompts)}")

This code below runs a loop that makes 1 million AI inferences by randomly selecting prompts from a predefined list, sending each to the GPT model, and saving both the input prompt and output response to organized text files.
It processes the inferences with a 5-second delay between each call, saves 1000 inferences per file, and provides progress updates every 100 completions.

In [None]:
for i in range(total_inferences):
    selected_prompt = random.choice(input_prompts)
    file_number = (i // inferences_per_file) + 1

    print(f"Inference {i+1:,}/{total_inferences:,} - File {file_number} - Prompt: {repr(selected_prompt)}")
    output = make_inference(selected_prompt)

    save_inference(selected_prompt, output, file_number)

    if (i + 1) % 100 == 0:
        print(f"Completed {i+1:,} inferences ({((i+1)/total_inferences)*100:.2f}%)")

    time.sleep(5.0)

print(f"Completed all {total_inferences:,} inferences!")
print(f"Data saved across {(total_inferences // inferences_per_file)} files")

Till the previous sell, we made 1 million infrences using gpt oss:20b, using it without the harmony formate. 
After this sell, we will create the embeddins of input query, and the infrences a model made for that. 

Embeddings

In [None]:
import os, re, numpy as np
from dotenv import load_dotenv
import openai

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
np.set_printoptions(threshold=np.inf)

infrence_file = "infrences.txt"

os.makedirs("embeddings", exist_ok=True)
input_emb_file   = "embeddings/input_embeddings.txt"  
chunks_text_file = "embeddings/output_chunks.txt" 
output_emb_file  = "embeddings/output_embeddings.txt" 



helpers

In [None]:
def get_openai_embedding(text, model="text-embedding-3-small"):
    resp = openai.embeddings.create(input=text, model=model)
    return np.array(resp.data[0].embedding)

def chunk_text(long_sentence, chunk_size=400, overlap=100):
    """Split by words into overlapping chunks (same pattern you used)."""
    words = long_sentence.split()
    chunks, start = [], 0
    step = max(chunk_size - overlap, 1)
    while start < len(words):
        end = min(start + chunk_size, len(words))
        chunk_text = " ".join(words[start:end])
        chunks.append(chunk_text)
        if end >= len(words):
            break
        start += step
    return chunks


parsing

In [None]:
with open(infrence_file, "r", encoding="utf-8", errors="ignore") as f:
    content = f.read()

# split the file into blocks starting with 'Timestamp:'
blocks = re.split(r'(?=^Timestamp:\s)', content, flags=re.MULTILINE)
blocks = [b.strip() for b in blocks if b.strip()]

records = []
for b in blocks:
    m_in  = re.search(r'^Input:\s*(.*)$', b, flags=re.MULTILINE)
    m_out = re.search(r'^Output:\s*(.*)$', b, flags=re.MULTILINE | re.DOTALL)
    if not (m_in and m_out):
        continue

    input_text = m_in.group(1).strip()

    # everything after the 'Output:' line is output; include tail on same line
    out_tail = m_out.group(1).strip()
    out_body = b[m_out.end():].strip()
    output_text = (out_tail + ("\n" + out_body if out_body else "")).strip()

    records.append({"input": input_text, "output": output_text})
    if len(records) == 10:
        break
# Skip the first 3 records and keep only records 4-13
records = records[:50]  

#print(f"Parsed {len(records)} inference(s).")
print(f"Parsed {len(records)} inference(s) (inferences 1-10).")


embed inputs, chunk outputs, save chunks text, embed chunks

In [None]:
for i, rec in enumerate(records, 1):
    query = rec["input"]
    output_joined = " ".join(rec["output"].split())  # flatten whitespace

    # 1) input embedding
    q_emb = get_openai_embedding(query)
    with open(input_emb_file, "a", encoding="utf-8") as f:
        f.write(f"Inference {i}\n")
        f.write(f"Query: {query}\n")
        f.write(f"Embedding: {q_emb.tolist()}\n")
        f.write("-" * 80 + "\n")

    # 2) output → chunks (sentences)
    chunks = chunk_text(output_joined, chunk_size=400, overlap=100)

    # save the sentences 
    with open(chunks_text_file, "a", encoding="utf-8") as f:
        f.write(f"=== Inference {i} ===\n")
        f.write(f"Query: {query}\n")
        for j, ch in enumerate(chunks, 1):
            f.write(f"Sentence {j}: {ch}\n")
        f.write("\n")

    # 3) embeddings for each sentence
    with open(output_emb_file, "a", encoding="utf-8") as f:
        for j, ch in enumerate(chunks, 1):
            emb = get_openai_embedding(ch)
            f.write(f"Inference {i} | Sentence {j}: {emb.tolist()}\n")


In [None]:
emb.size

Now, we have created embeddings, so we will moves towards computing the cosine distance between 
the query embedding and the sentences embedding of the corresponding infrence (output)

In [None]:
import os, re, ast, numpy as np
from scipy.spatial.distance import cosine as cosine_distance


output_dir = "embeddings"
input_emb_file   = os.path.join(output_dir, "input_embeddings.txt")
output_emb_file  = os.path.join(output_dir, "output_embeddings.txt")
cos_distance     = os.path.join(output_dir, "inference1_cosine_distances.txt")

This below function searches through an input embeddings file to find and extract the numerical embedding vector for a specific inference number.

It parses the file line by line, locates the section for the requested inference, finds the "Embedding:" line, and converts the string representation back into a NumPy array of floating-point numbers.

In [None]:
def extract_query_embedding(inference_index=1, path=input_emb_file) -> np.ndarray:
    with open(path, "r", encoding="utf-8") as f:
        lines = f.readlines()
    i = 0
    while i < len(lines):
        if lines[i].strip() == f"Inference {inference_index}":
            j = i + 1
            while j < len(lines) and not lines[j].startswith("Inference "):
                if lines[j].startswith("Embedding:"):
                    arr_str = lines[j].split("Embedding:", 1)[1].strip()
                    return np.array(ast.literal_eval(arr_str), dtype=np.float32)
                j += 1
        i += 1
    raise ValueError(f"Query embedding for inference {inference_index} not found.")

This below function extracts all sentence embeddings for a specific inference from an output embeddings file using regex pattern matching to find lines with the format "Inference X | Sentence Y: [embedding_vector]".

It collects all the sentence embeddings into a dictionary, sorts them by sentence number, and returns them as a list of NumPy arrays in the correct order.

In [None]:
def extract_sentence_embeddings(inference_index=1, path=output_emb_file) -> list[np.ndarray]:
    sent_map = {}
    pat = re.compile(rf"^Inference {inference_index} \| Sentence (\d+):\s*(\[.*\])$")
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            m = pat.match(line.strip())
            if m:
                sidx = int(m.group(1))
                vec = np.array(ast.literal_eval(m.group(2)), dtype=np.float32)
                sent_map[sidx] = vec
    if not sent_map:
        raise ValueError(f"No sentence embeddings for inference {inference_index}.")
    return [sent_map[k] for k in sorted(sent_map.keys())]

In [None]:
INF =3  # query 1 vs its sentences

q = extract_query_embedding(INF)
sents = extract_sentence_embeddings(INF)

# safety: check dimensions
for k, s in enumerate(sents, start=1):
    if s.shape != q.shape:
        raise ValueError(f"Dim mismatch at sentence {k}: {s.shape} vs {q.shape}")

# save CSV-like text: sentence_index,cosine_distance
with open(cos_distance, "w", encoding="utf-8") as out:
    out.write("sentence_index,cosine_distance\n")
    for idx, svec in enumerate(sents, start=1):
        d = cosine_distance(q, svec)
        out.write(f"{idx},{d:.8f}\n")

print("Saved:", cos_distance)


Computing cosine_distance

In [None]:
def cosine_distance(a: np.ndarray, b: np.ndarray) -> float:
    denom = (np.linalg.norm(a) * np.linalg.norm(b))
    return 1.0 - float(a @ b / denom) if denom else 1.0

This below code calculates the cosine distance between query embeddings and sentence embeddings for inferences 1 to 10.

For each inference, it writes the sentence index and corresponding cosine distance to a file, saving the results in a specified output directory.

In [None]:

for inf in range (1, 11):
    q = extract_query_embedding(inf)
    sents = extract_sentence_embeddings(inf)

    out_path = os.path.join(output_dir, f"inference{inf}_cosine_distances.txt")
    with open(out_path, "w", encoding="utf-8") as out:
        out.write("sentence_index,cosine_distance\n")
        for idx, svec in enumerate(sents, start=1):
            d = float(cosine_distance(q, svec))   # SciPy cosine distance
            out.write(f"{idx},{d:.8f}\n")

    print("Saved:", out_path)


Now we will use gpt oss20b to give label to each sentence. 

Import Dependencies and Setup

In [None]:
!pip install aiohttp

In [None]:
import asyncio
import aiohttp
import json
import re
import os
from openai import AsyncOpenAI
import time
from datetime import datetime

# Setup the async OpenAI client for Ollama
client = AsyncOpenAI(
    base_url="http://localhost:11434/v1",
    api_key="ollama"  # Dummy key for Ollama
)

# File paths
input_file = "embeddings/output_chunks.txt"
output_file = "embeddings/output_chunks_labeled.txt"

Helper Functions

In [None]:
def parse_chunks_file(file_path):
    """Parse the chunks file and extract sentences by inference"""
    with open(file_path, "r", encoding="utf-8") as f:
        content = f.read()
    
    # Split by inference sections
    sections = re.split(r'=== Inference (\d+) ===', content)
    
    inferences = {}
    for i in range(1, len(sections), 2):  # Skip empty first element, then take pairs
        inference_num = int(sections[i])
        inference_content = sections[i + 1].strip()
        
        # Extract query and sentences
        lines = inference_content.split('\n')
        query = ""
        sentences = []
        
        for line in lines:
            line = line.strip()
            if line.startswith("Query:"):
                query = line.replace("Query:", "").strip()
            elif line.startswith("Sentence"):
                # Extract sentence number and content
                match = re.match(r'Sentence (\d+): (.+)', line)
                if match:
                    sentence_num = int(match.group(1))
                    sentence_text = match.group(2)
                    sentences.append({
                        'number': sentence_num,
                        'text': sentence_text
                    })
        
        inferences[inference_num] = {
            'query': query,
            'sentences': sentences
        }
    
    return inferences

# Test the parser
data = parse_chunks_file(input_file)
print(f"Parsed {len(data)} inferences")
for inf_num, inf_data in data.items():
    print(f"Inference {inf_num}: Query='{inf_data['query']}', {len(inf_data['sentences'])} sentences")

Async Labeling Function

In [None]:
async def generate_simple_label(sentence_text):
    try:
        prompt = f"""You are an expert text analyzer. Your task is to read the following text and create a precise, descriptive label that captures its essence.

TEXT: "{sentence_text[:300]}"

HOW TO ANALYZE:
1. First, identify the MAIN SUBJECT or topic being discussed
2. Second, determine the FIELD or DOMAIN this belongs to
3. Third, consider the SPECIFIC CONTEXT or approach being used
4. Finally, create a label that combines the most important aspects

THINKING PROCESS:
- What is this text primarily about?
- What field of knowledge does this belong to?
- What specific aspect or angle is being discussed?
- How would an expert in this field categorize this?

LABEL REQUIREMENTS:
- Use EXACTLY 2-3 words
- Be specific and descriptive
- Use clear, professional terminology
- Capture the most important essence of the text

EXAMPLES OF GOOD LABELS:
- "quantum mechanics" (not just "physics")
- "data structures" (not just "programming")  
- "financial modeling" (not just "business")
- "cognitive psychology" (not just "psychology")
- "organic synthesis" (not just "chemistry")
- "machine learning" (not just "technology")

CREATE YOUR LABEL (2-3 words): """
        
        response = await client.chat.completions.create(
            model="gpt-oss:20b",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.2,  
            max_tokens=500     
        )
        
        label = response.choices[0].message.content.strip()
        
        # Clean up response - take only first 2-3 words
        words = label.split()[:3]
        return " ".join(words) if words else "general topic"
        
    except Exception as e:
        return "classification error"

This cell, process all the input text, this is actually the main block which sends all the input text aync to the gpt oss
and gpt oss label each sentence

In [None]:
async def process_all_sentences(data):
    labeled_data = {}
    
    for inf_num, inf_data in data.items():
        labeled_data[inf_num] = {
            'query': inf_data['query'],
            'sentences': []
        }
        
        for sentence in inf_data['sentences']:
            label = await generate_simple_label(sentence['text'])
            
            labeled_data[inf_num]['sentences'].append({
                'number': sentence['number'],
                'text': sentence['text'],
                'label': label
            })
            
            await asyncio.sleep(2)  # Simple delay
    
    return labeled_data

labeled_data = await process_all_sentences(data)

This block just save the labeled output

In [None]:
with open("embeddings/labeled_output.txt", "w", encoding="utf-8") as f:
    for inf_num in sorted(labeled_data.keys()):
        inf_data = labeled_data[inf_num]
        
        f.write(f"=== Inference {inf_num} ===\n")
        f.write(f"Query: {inf_data['query']}\n")
        
        for sentence in sorted(inf_data['sentences'], key=lambda x: x['number']):
            f.write(f"Sentence {sentence['number']}: {sentence['text']}\n")
            f.write(f"Label: {sentence['label']}\n")
        
        f.write("\n")

print("Done")

The below cells create graphs

In [None]:
!pip install seaborn

In [None]:
# Re-parse the updated labeled_output.txt file
def parse_existing_labeled_data():
    """Parse the existing labeled_output.txt file"""
    labeled_data = {}
    
    with open("embeddings/labeled_output.txt", "r", encoding="utf-8") as f:
        content = f.read()
    
    # Split by inference sections
    sections = content.split("=== Inference ")
    
    for section in sections[1:]:  # Skip first empty section
        lines = section.strip().split('\n')
        inf_num = int(lines[0].split(' ===')[0])
        
        # Extract query
        query_line = [line for line in lines if line.startswith("Query:")][0]
        query = query_line.replace("Query:", "").strip()
        
        # Extract sentences and labels
        sentences = []
        i = 0
        while i < len(lines):
            if lines[i].startswith("Sentence "):
                # Extract sentence
                sentence_text = lines[i].split(": ", 1)[1] if ": " in lines[i] else ""
                sentence_num = int(lines[i].split(":")[0].split(" ")[1])
                
                # Extract label (next line)
                if i + 1 < len(lines) and lines[i + 1].startswith("Label:"):
                    label = lines[i + 1].replace("Label:", "").strip()
                else:
                    label = "Unknown"
                
                sentences.append({
                    'number': sentence_num,
                    'text': sentence_text,
                    'label': label
                })
                i += 2
            else:
                i += 1
        
        labeled_data[inf_num] = {
            'query': query,
            'sentences': sentences
        }
    
    return labeled_data

# RELOAD the labeled_data with updated labels
labeled_data = parse_existing_labeled_data()
print(f"✅ Reloaded labeled_data with updated labels")

# Verify the new labels
print("\nNew labels:")
for inf_num in range (1,11):
    print(f"Inference {inf_num}:")
    for sentence in labeled_data[inf_num]['sentences']:
        print(f"  S{sentence['number']}: {sentence['label']}")

In [None]:
# First, let's extract the actual query texts for the first 10 inferences
print("=== EXTRACTING QUERY TEXTS ===")
query_texts = {}
for i in range(1, 11):
    query_text = labeled_data[i]['query']
    query_texts[i] = query_text
    print(f"Q{i}: '{query_text}'")

In [None]:
# Check cosine distance ranges for first 10 inferences
print("=== COSINE DISTANCE RANGES ===")
all_distances = []
for i in range(1, 11):
    distances_list = list(distances[i].values())
    all_distances.extend(distances_list)
    min_dist = min(distances_list)
    max_dist = max(distances_list)
    print(f"Q{i} ({labeled_data[i]['query']}): {len(distances_list)} sentences, range: {min_dist:.4f} - {max_dist:.4f}")

overall_min = min(all_distances)
overall_max = max(all_distances)
print(f"\n=== OVERALL RANGE ===")
print(f"Min distance: {overall_min:.4f}")
print(f"Max distance: {overall_max:.4f}")
print(f"Suggested x-axis range: {overall_min-0.05:.2f} to {overall_max+0.05:.2f}")

# Also check sentence order for one example
print(f"\n=== SENTENCE ORDER CHECK (Q2 Example) ===")
q2_sentences = sorted(labeled_data[2]['sentences'], key=lambda x: x['number'])
q2_distances = distances[2]
print("Sentence order (left-to-right in graph):")
for sent in q2_sentences:
    sent_num = sent['number']
    if sent_num in q2_distances:
        dist = q2_distances[sent_num]
        label = sent['label'][:30] + "..." if len(sent['label']) > 30 else sent['label']
        print(f"  S{sent_num}: distance={dist:.4f}, label='{label}'")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def create_graph_with_distribution():
    """Create graph with distribution summary using actual query data"""
    
    # Create figure with two panels
    fig = plt.figure(figsize=(24, 12))
    gs = fig.add_gridspec(1, 3, width_ratios=[3, 1, 0.1])  # Main plot, box plot, spacer
    
    # Extract actual queries from labeled_data (first 10)
    queries = {}
    for i in range(1, 11):
        queries[i] = labeled_data[i]['query']
    
    print("Using actual queries:", queries)
    
    # Generate colors for 10 queries
    colors_list = ['#2E86AB', '#A23B72', '#F18F01', '#C73E1D', '#4CAF50', 
                   '#9C27B0', '#FF9800', '#607D8B', '#E91E63', '#795548']
    colors = {queries[i]: colors_list[i-1] for i in range(1, 11)}
    
    # Panel A: Main scatter plot
    ax1 = fig.add_subplot(gs[0, 0])
    
    for inf_num in range(1, 11):
        query = queries[inf_num]
        
        # Sort sentences by number for left-to-right order
        for sent_num in sorted(distances[inf_num].keys()):
            distance = distances[inf_num][sent_num]
            ax1.scatter(distance, inf_num, c=colors[query], s=200, alpha=0.8, 
                       edgecolors='black', linewidth=1, zorder=3)
    
    ax1.set_ylim(0.5, 10.5)
    ax1.set_yticks(range(1, 11))
    ax1.set_yticklabels(list(queries.values()), fontsize=14)
    ax1.set_xlim(0.58, 0.99)
    ax1.set_xlabel('Cosine Distance', fontweight='bold', fontsize=18)
    ax1.set_ylabel('Query Type', fontweight='bold', fontsize=18)
    ax1.grid(True, alpha=0.3, zorder=1)
    ax1.set_title('A. Semantic Distance Distribution', fontweight='bold', fontsize=18, loc='left')
    ax1.tick_params(axis='both', which='major', labelsize=12)
    ax1.set_facecolor('#fafafa')
    
    # Panel B: Distribution Summary (Box Plot)
    ax2 = fig.add_subplot(gs[0, 1])
    
    box_data = []
    box_labels = []
    box_colors = []
    
    for inf_num in range(1, 11):
        query = queries[inf_num]
        dists = list(distances[inf_num].values())
        box_data.append(dists)
        box_labels.append(f"{query}\n(n={len(dists)})")
        box_colors.append(colors[query])
    
    bp = ax2.boxplot(box_data, patch_artist=True)
    
    # Color the boxes
    for patch, color in zip(bp['boxes'], box_colors):
        patch.set_facecolor(color)
        patch.set_alpha(0.7)
    
    ax2.set_xticklabels(box_labels, rotation=45, ha='right', fontsize=10)
    ax2.set_ylabel('Cosine Distance', fontweight='bold', fontsize=16)
    ax2.set_title('B. Distribution\nSummary', fontweight='bold', fontsize=16, loc='left')
    ax2.grid(True, alpha=0.3, axis='y')
    ax2.tick_params(axis='y', labelsize=12)
    
    # Overall title
    fig.suptitle('Semantic Distance Analysis', 
                 fontsize=20, fontweight='bold', y=0.95)
    
    plt.tight_layout()
    plt.subplots_adjust(top=0.9)
    plt.show()

# Run the graph with actual queries
create_graph_with_distribution()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def create_readable_table():
    """Create a clean, readable table with proper spacing and fonts"""
    
    # Extract actual queries from labeled_data (first 10)
    queries = {}
    for i in range(1, 11):
        queries[i] = labeled_data[i]['query']
    
    # Same colors
    colors_list = ['#2E86AB', '#A23B72', '#F18F01', '#C73E1D', '#4CAF50', 
                   '#9C27B0', '#FF9800', '#607D8B', '#E91E63', '#795548']
    colors = {queries[i]: colors_list[i-1] for i in range(1, 11)}
    
    def wrap_label(label, max_chars_per_line=20):
        """Wrap long labels to fit better"""
        if len(label) <= max_chars_per_line:
            return label
        
        words = label.split()
        lines = []
        current_line = []
        current_length = 0
        
        for word in words:
            if current_length + len(word) + 1 <= max_chars_per_line:
                current_line.append(word)
                current_length += len(word) + 1
            else:
                if current_line:
                    lines.append(' '.join(current_line))
                current_line = [word]
                current_length = len(word)
        
        if current_line:
            lines.append(' '.join(current_line))
        
        return '\n'.join(lines)
    
    # Build table with actual data
    max_sentences = max(len(labeled_data[i]['sentences']) for i in range(1, 11))
    headers = ['Query'] + [f'Sentence {i+1}' for i in range(max_sentences)]
    table_data = [headers]

    for inf_num in range(1, 11):
        query = queries[inf_num]
        row = [query]
        
        inf_sentences = labeled_data[inf_num]['sentences']
        sorted_sentences = sorted(inf_sentences, key=lambda x: x['number'])
        
        for i in range(max_sentences):
            if i < len(sorted_sentences):
                label = wrap_label(sorted_sentences[i]['label'])
                row.append(label)
            else:
                row.append("")
        
        table_data.append(row)
    
    # Create optimized table
    fig, ax = plt.subplots(figsize=(max_sentences * 3, 12))
    ax.axis('off')
    
    # Create table with proper spacing
    table = ax.table(cellText=table_data, loc='center', cellLoc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(12)  # Readable but not too big
    table.scale(1, 3)  # Good height without too much empty space
    
    # Style header row
    for i in range(len(headers)):
        table[(0, i)].set_facecolor('#34495e')
        table[(0, i)].set_text_props(weight='bold', color='white', fontsize=14)
        table[(0, i)].set_height(0.08)
    
    # Style data rows
    for i in range(1, len(table_data)):
        query = table_data[i][0]
        for j in range(len(headers)):
            if j == 0:  # Query column
                table[(i, j)].set_facecolor(colors[query])
                table[(i, j)].set_alpha(0.9)
                table[(i, j)].set_text_props(weight='bold', color='white', fontsize=13)
            else:  # Sentence columns
                if table_data[i][j]:
                    table[(i, j)].set_facecolor(colors[query])
                    table[(i, j)].set_alpha(0.2)
                    table[(i, j)].set_text_props(fontsize=11)
                else:
                    table[(i, j)].set_facecolor('#f8f9fa')
            
            table[(i, j)].set_height(0.06)  # Compact but readable
    
    plt.title('Sentence Labels: Left-to-Right Order Matches Graph Dots Above', 
              fontweight='bold', fontsize=18, pad=30)
    plt.tight_layout()
    plt.show()

# Run the readable table
create_readable_table()