# Notebook: Overview

Summary: Describe purpose and key results.\n
Inputs: List data sources and parameters.\n
Outputs: Figures/tables produced and where saved.

In [4]:
import sys
import os

parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

reasoning_explain = os.path.join(parent_dir, 'src')
if reasoning_explain not in sys.path:
    sys.path.insert(0, reasoning_explain)

from core import Explainer
Explainer().explain("demo")

from base import OllamaModel, TfidfTextVectorizer
from token_shap import StringSplitter, TokenSHAP

In [5]:
# Force reload the base module to get the latest changes
import importlib
if 'base' in sys.modules:
    importlib.reload(sys.modules['base'])

# Initialize phi4-reasoning model with Ollama
phi4_model = OllamaModel(
    model_name="phi4-reasoning:latest", 
    api_url="http://127.0.0.1:11434"
)

# Use TF-IDF vectorizer (no external API needed)
tfidf_vectorizer = TfidfTextVectorizer()

# String splitter for word-level analysis
splitter = StringSplitter()

# Create TokenSHAP instance
token_shap_phi4 = TokenSHAP(
    model=phi4_model, 
    splitter=splitter, 
    vectorizer=tfidf_vectorizer,
    debug=True
)

print("TokenSHAP initialized with phi4-reasoning:latest!")

# Test connection
try:
    test_response = phi4_model.generate("Hello")
    print(f"Connection successful! Test response: {test_response[:50]}...")
except Exception as e:
    print(f"Connection failed: {e}")
    print("Please make sure Ollama is running with: ollama serve")

TokenSHAP initialized with phi4-reasoning:latest!
Connection successful! Test response: <think>User says: "Hello". I'm asked by user messa...


In [6]:
prompt1 = "Why is the sky blue?"

print(f"Analyzing with phi4-reasoning: '{prompt1}'")
print("="*60)

# Perform TokenSHAP analysis
df_phi4 = token_shap_phi4.analyze(
    prompt1, 
    sampling_ratio=0.0,  # Use only essential combinations for faster execution
    print_highlight_text=True
)

# Display results
print("\nAnalysis Results:")
token_shap_phi4.print_colored_text()

Analyzing with phi4-reasoning: 'Why is the sky blue?'
Number of samples: 5
Number of essential combinations: 5
Remaining combinations budget after essentials: 995
Number of additional combinations to sample: 0
No additional combinations to sample.
Total combinations to process: 5


Processing combinations:   0%|          | 0/5 [00:00<?, ?it/s]


Processing combination 1/5:
Combination: ['is', 'the', 'sky', 'blue?']
Indexes: (2, 3, 4, 5)
Received response for combination 1

Processing combination 2/5:
Combination: ['Why', 'the', 'sky', 'blue?']
Indexes: (1, 3, 4, 5)
Received response for combination 2

Processing combination 3/5:
Combination: ['Why', 'is', 'sky', 'blue?']
Indexes: (1, 2, 4, 5)
Received response for combination 3

Processing combination 4/5:
Combination: ['Why', 'is', 'the', 'blue?']
Indexes: (1, 2, 3, 5)
Received response for combination 4

Processing combination 5/5:
Combination: ['Why', 'is', 'the', 'sky']
Indexes: (1, 2, 3, 4)
Received response for combination 5


[I 2025-09-20 00:14:18,900] A new study created in memory with name: no-name-a453f889-d06b-4f0c-adf6-309b93c091c9
[I 2025-09-20 00:14:19,633] Trial 0 finished with value: 0.03438332178920034 and parameters: {'learning_rate': 0.24978574507978035, 'max_depth': 10, 'n_estimators': 147, 'subsample': 0.8193829781650699, 'colsample_bytree': 0.8525686407418122, 'reg_alpha': 0.00030668894262650664, 'reg_lambda': 3.0610542615705447e-06}. Best is trial 0 with value: 0.03438332178920034.
[I 2025-09-20 00:14:19,791] Trial 1 finished with value: 0.0701664934429479 and parameters: {'learning_rate': 0.0103446025873483, 'max_depth': 10, 'n_estimators': 411, 'subsample': 0.7274165086615396, 'colsample_bytree': 0.7686351742337799, 'reg_alpha': 7.953904640191629, 'reg_lambda': 1.9676463585966628e-05}. Best is trial 0 with value: 0.03438332178920034.
[I 2025-09-20 00:14:19,968] Trial 2 finished with value: 0.04862126210063222 and parameters: {'learning_rate': 0.07602868445600117, 'max_depth': 10, 'n_estim

[48;2;255;255;252mWhy[0m [48;2;255;255;254mis[0m [48;2;255;255;255mthe[0m [48;2;255;255;0msky[0m [48;2;255;255;245mblue?[0m 

Analysis Results:
[38;2;106;106;255mWhy[0m [38;2;35;35;255mis[0m [38;2;0;0;255mthe[0m [38;2;255;0;0msky[0m [38;2;168;168;255mblue?[0m 
