# Notebook: Overview

Summary: Describe purpose and key results.\n
Inputs: List data sources and parameters.\n
Outputs: Figures/tables produced and where saved.

In [1]:
import sys
import os

parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

reasoning_explain = os.path.join(parent_dir, 'src')
if reasoning_explain not in sys.path:
    sys.path.insert(0, reasoning_explain)

from core import Explainer
Explainer().explain("demo")

from base import OllamaModel, TfidfTextVectorizer, EmbeddingVectorizer, TransformerVectorizer
from token_shap import StringSplitter, TokenSHAP

In [None]:
# Force reload the base module to get the latest changes
import importlib
if 'base' in sys.modules:
    importlib.reload(sys.modules['base'])

# Initialize phi4-reasoning model with Ollama
qwen3_model = OllamaModel(
    model_name="qwen3:4b", 
    api_url="http://127.0.0.1:11434"
)

# Use TF-IDF vectorizer (no external API needed)
#tfidf_vectorizer = TfidfTextVectorizer()
#embedding_vectorizer = EmbeddingVectorizer()
transformer_vectorizer = TransformerVectorizer()

# String splitter for word-level analysis
splitter = StringSplitter()

# Create TokenSHAP instance
token_shap_qwen3 = TokenSHAP(
    model=qwen3_model, 
    splitter=splitter, 
    vectorizer=transformer_vectorizer,
    debug=True
)

print("TokenSHAP initialized with phi4-mini:latest!")

# Test connection
try:
    test_response = qwen3_model.generate("Hello")
    print(f"Connection successful! Test response: {test_response[:50]}...")
except Exception as e:
    print(f"Connection failed: {e}")
    print("Please make sure Ollama is running with: ollama serve")

TokenSHAP initialized with phi4-mini:latest!
Connection successful! Test response: <think>
Okay, the user said "Hello". I need to res...


In [3]:
prompt1 = "Why is the sky blue?"

print(f"Analyzing with phi4-reasoning: '{prompt1}'")
print("="*60)

# Perform TokenSHAP analysis
df_qwen3 = token_shap_qwen3.analyze(
    prompt1, 
    sampling_ratio=0.0,  # Use only essential combinations for faster execution
    print_highlight_text=True
)

# Display results
print("\nAnalysis Results:")
token_shap_qwen3.print_colored_text()

Analyzing with phi4-reasoning: 'Why is the sky blue?'
Number of samples: 5
Number of essential combinations: 5
Remaining combinations budget after essentials: 995
Number of additional combinations to sample: 0
No additional combinations to sample.
Total combinations to process: 5


Processing combinations:   0%|          | 0/5 [00:00<?, ?it/s]


Processing combination 1/5:
Combination: ['is', 'the', 'sky', 'blue?']
Indexes: (2, 3, 4, 5)
Received response for combination 1

Processing combination 2/5:
Combination: ['Why', 'the', 'sky', 'blue?']
Indexes: (1, 3, 4, 5)
Received response for combination 2

Processing combination 3/5:
Combination: ['Why', 'is', 'sky', 'blue?']
Indexes: (1, 2, 4, 5)
Received response for combination 3

Processing combination 4/5:
Combination: ['Why', 'is', 'the', 'blue?']
Indexes: (1, 2, 3, 5)
Received response for combination 4

Processing combination 5/5:
Combination: ['Why', 'is', 'the', 'sky']
Indexes: (1, 2, 3, 4)
Received response for combination 5


Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.
[I 2025-09-21 09:58:33,511] A new study created in memory with name: no-name-dc7e893c-029c-4084-bfa5-6ed33560d541
[I 2025-09-21 09:58:33,539] Trial 0 finished with value: 0.07397544384002686 and parameters: {'learning_rate': 0.28045120564754844, 'max_depth': 6, 'n_estimators': 139, 'subsample': 0.8709922669163673, 'colsample_bytree': 0.934011872395465, 'reg_alpha': 9.435374286566292, 'reg_lambda': 7.104084897476827e-08}. Best is trial 0 with value: 0.07397544384002686.
[I 2025-09-21 09:58:33,594] Trial 1 finished with value: 0.05923914909362793 and parameters: {'learning_rate': 0.1955330826647456, 'max_depth': 9, 'n_estimators': 487, 'subsample': 0.685350580367279, 'colsample_bytree': 0.7358949350440263, 'reg_alpha': 9.61

[48;2;255;255;254mWhy[0m [48;2;255;255;255mis[0m [48;2;255;255;254mthe[0m [48;2;255;255;66msky[0m [48;2;255;255;0mblue?[0m 

Analysis Results:
[38;2;31;31;255mWhy[0m [38;2;0;0;255mis[0m [38;2;30;30;255mthe[0m [38;2;255;49;49msky[0m [38;2;255;0;0mblue?[0m 
