In [10]:
from feature_shap import  (
    FeatureSHAP, HuggingFaceModel, BlocksSplitter, RemovalModifier, BertScoreComparator, OllamaModel
)

In [11]:
# Initialize FeatureSHAP with the model, splitter, modifier, comparator, and custom instruction
fs = FeatureSHAP(
    # model=HuggingFaceModel("Qwen/Qwen2.5-Coder-0.5B-Instruct"),
    model=OllamaModel("gemma3:12b", base_url="http://host.docker.internal:11434/v1"),
    splitter=BlocksSplitter(language="python"),
    modifier=RemovalModifier(),
    comparator=BertScoreComparator(),
    instruction='\nWrite a single sentence summary of the code above.'
)

In [12]:
# Define the input
input = """
def read_files(files):
    content = ""
    for file in files:
        with open(file, 'r') as f:
            content += f.read()
    return content
""".lstrip()

In [13]:
# Compute the Shapley values
shapley_values, interactions = fs.analyze(input, sampling_ratio=1.0)

Batch generation: 100%|██████████| 1/1 [00:55<00:00, 55.48s/it]

['This code reads the content of multiple files and concatenates them into a single string.', 'The code reads the content of multiple files specified in the `files` list and concatenates them into a single string.', 'The code defines a function `read_files` that currently initializes an empty string and returns it, effectively doing nothing with the provided list of files.', 'The code reads the content of multiple files and concatenates them into a single string.', "The code defines a function `read_files` that is intended to read the content of a list of files, but it's incomplete as it doesn't actually perform the file reading operation and simply returns a variable named `content` which is not defined.", 'This code reads the content of each file in the `files` list and concatenates it into a single string called `content`.', 'The code reads the content of each file listed in the `files` input and concatenates it into the `content` variable.', 'The code initializes an empty string va


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
import pandas as pd

pd.DataFrame(
    [(k, float(v)) for k, v in shapley_values.items()],
    columns=["Feature", "Shapley Value"],
)

Unnamed: 0,Feature,Shapley Value
0,def read_files(files):\n 1,0.0
1,"content = """"\n 2",0.157755
2,"for file in files:\n with open(file, 'r...",0.760581
3,return content\n4,0.081664


In [15]:
interactions

Unnamed: 0,Prompt,Response,Feature_Indexes,Similarity,Baseline
0,"content = """"\n for file in files:\n ...",This code reads the content of multiple files ...,"(2, 3, 4)",0.99982,The code reads the content of multiple files a...
1,def read,The code reads the content of multiple files s...,"(1, 3, 4)",0.973643,The code reads the content of multiple files a...
2,def read,The code defines a function `read_files` that ...,"(1, 2, 4)",0.886974,The code reads the content of multiple files a...
3,def read,The code reads the content of multiple files a...,"(1, 2, 3)",1.0,The code reads the content of multiple files a...
4,def read,The code defines a function `read_files` that ...,"(1, 4)",0.881781,The code reads the content of multiple files a...
5,"content = """"\n for file in files:\n ...",This code reads the content of each file in th...,"(2, 3)",0.947834,The code reads the content of multiple files a...
6,def read,The code reads the content of each file listed...,"(1, 3)",0.934817,The code reads the content of multiple files a...
7,"content = """"\n",The code initializes an empty string variable ...,"(2,)",0.878782,The code reads the content of multiple files a...
8,def read,The code defines a function called `read_files...,"(1,)",0.883789,The code reads the content of multiple files a...
9,def read,The code initializes an empty string variable ...,"(1, 2)",0.901404,The code reads the content of multiple files a...
