# Notebook: Overview

Summary: Describe purpose and key results.\n
Inputs: List data sources and parameters.\n
Outputs: Figures/tables produced and where saved.

In [1]:
import sys
import os

parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

reasoning_explain = os.path.join(parent_dir, 'src')
if reasoning_explain not in sys.path:
    sys.path.insert(0, reasoning_explain)

from core import Explainer
Explainer().explain("demo")

from base import OllamaModel, TfidfTextVectorizer
from token_shap import StringSplitter, TokenSHAP

In [2]:
# Force reload the base module to get the latest changes
import importlib
if 'base' in sys.modules:
    importlib.reload(sys.modules['base'])

# Initialize phi4-reasoning model with Ollama
phi4_model = OllamaModel(
    model_name="phi4-reasoning:latest", 
    api_url="http://127.0.0.1:11434"
)

# Use TF-IDF vectorizer (no external API needed)
tfidf_vectorizer = TfidfTextVectorizer()

# String splitter for word-level analysis
splitter = StringSplitter()

# Create TokenSHAP instance
token_shap_phi4 = TokenSHAP(
    model=phi4_model, 
    splitter=splitter, 
    vectorizer=tfidf_vectorizer,
    debug=True
)

print("TokenSHAP initialized with phi4-reasoning:latest!")

# Test connection
try:
    test_response = phi4_model.generate("Hello")
    print(f"Connection successful! Test response: {test_response[:50]}...")
except Exception as e:
    print(f"Connection failed: {e}")
    print("Please make sure Ollama is running with: ollama serve")

TokenSHAP initialized with phi4-reasoning:latest!
Connection successful! Test response: <think>User says "Hello". We are Phi, a language m...


In [3]:
prompt1 = "Why is the sky blue?"

print(f"Analyzing with phi4-reasoning: '{prompt1}'")
print("="*60)

# Perform TokenSHAP analysis
df_phi4 = token_shap_phi4.analyze(
    prompt1, 
    sampling_ratio=0.0,  # Use only essential combinations for faster execution
    print_highlight_text=True
)

# Display results
print("\nAnalysis Results:")
token_shap_phi4.print_colored_text()

Analyzing with phi4-reasoning: 'Why is the sky blue?'
Number of samples: 5
Number of essential combinations: 5
Remaining combinations budget after essentials: 995
Number of additional combinations to sample: 0
No additional combinations to sample.
Total combinations to process: 5


Processing combinations:   0%|          | 0/5 [00:00<?, ?it/s]


Processing combination 1/5:
Combination: ['is', 'the', 'sky', 'blue?']
Indexes: (2, 3, 4, 5)
Received response for combination 1

Processing combination 2/5:
Combination: ['Why', 'the', 'sky', 'blue?']
Indexes: (1, 3, 4, 5)
Received response for combination 2

Processing combination 3/5:
Combination: ['Why', 'is', 'sky', 'blue?']
Indexes: (1, 2, 4, 5)
Received response for combination 3

Processing combination 4/5:
Combination: ['Why', 'is', 'the', 'blue?']
Indexes: (1, 2, 3, 5)
Received response for combination 4

Processing combination 5/5:
Combination: ['Why', 'is', 'the', 'sky']
Indexes: (1, 2, 3, 4)
Received response for combination 5


Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.
[I 2025-09-20 00:02:50,432] A new study created in memory with name: no-name-45e1b7ba-d772-4039-929b-be466ed34af5
[I 2025-09-20 00:02:09,430] Trial 0 finished with value: 0.1943559305026612 and parameters: {'learning_rate': 0.021499452773253044, 'max_depth': 10, 'n_estimators': 471, 'subsample': 0.9902229331840484, 'colsample_bytree': 0.8493031970411306, 'reg_alpha': 4.879562966450237e-07, 'reg_lambda': 5.214961840969713e-06}. Best is trial 0 with value: 0.1943559305026612.
[I 2025-09-20 00:02:09,570] Trial 1 finished with value: 0.17128350895191047 and parameters: {'learning_rate': 0.011562513624454158, 'max_depth': 4, 'n_estimators': 480, 'subsample': 0.9249328154566878, 'colsample_bytree': 0.7454025045579756, 'reg_alph

[48;2;255;255;0mWhy[0m [48;2;255;255;255mis[0m [48;2;255;255;253mthe[0m [48;2;255;255;254msky[0m [48;2;255;255;247mblue?[0m 

Analysis Results:
[38;2;255;0;0mWhy[0m [38;2;0;0;255mis[0m [38;2;81;81;255mthe[0m [38;2;41;41;255msky[0m [38;2;157;157;255mblue?[0m 
