# Generate plots causal tracing results

In [1]:
c1_0="""As a customer writing a review, I initially composed the following feedback: "{}"
After carefully considering the facts, I selected a star rating from the options of "1", "2", "3", "4", or "5". My final rating was:"""
c1_1="""As a customer sharing my experience, I crafted the following review: "{}"
Taking into account the details of my experience, I chose a star rating from the available options of "1", "2", "3", "4", or "5". My ultimate rating is:"""
c1_2="""As a client providing my opinion, I penned down the subsequent evaluation: "{}"
Upon thorough reflection of my encounter, I picked a star rating among the choices of "1", "2", "3", "4", or "5". My conclusive rating stands at:"""
c1_3="""As a patron expressing my thoughts, I drafted the ensuing commentary: "{}"
After meticulously assessing my experience, I opted for a star rating from the range of "1", "2", "3", "4", or "5". My definitive rating turned out to be:"""
c1_4="""As a consumer conveying my perspective, I authored the following assessment: "{}"
By carefully weighing the aspects of my interaction, I determined a star rating from the possibilities of "1", "2", "3", "4", or "5". My final verdict on the rating is:"""

c2_0="""As a customer writing a review, I initially selected a star rating from the options "1", "2", "3", "4", and "5", and then provided the following explanations in my review: "{}"
The review clarifies why I gave a rating of"""
c2_1="""As a customer sharing my experience, I first chose a star rating from the available choices of "1", "2", "3", "4", and "5", and subsequently elaborated on my decision with the following statement: "{}"
The review elucidates the reasoning behind my assigned rating of"""
c2_2="""As a client providing my opinion, I initially picked a star rating from the range of "1" to "5", and then proceeded to justify my selection with the following commentary: "{}"
The review sheds light on the rationale for my given rating of"""
c2_3="""As a patron expressing my thoughts, I started by selecting a star rating from the scale of "1" to "5", and then offered an explanation for my choice in the following review text: "{}"
The review expounds on the basis for my designated rating of"""
c2_4="""As a consumer conveying my perspective, I began by opting for a star rating within the "1" to "5" spectrum, and then detailed my reasoning in the subsequent review passage: "{}"
The review delineates the grounds for my conferred rating of"""

c0_0="""You are an experienced and responsible data annotator for natural language processing (NLP) tasks. In the following, you will annotate some data for sentiment classification. Specifically, given the task description and the review text, you need to annotate the sentiment in terms of "1" (most negative), "2", "3", "4", and "5" (most positive).
Review Text: "{}"
Sentiment:"""
c0_1="""As a proficient data annotator in natural language processing (NLP), your responsibility is to determine the sentiment of the given review text. Please assign a sentiment value from "1" (very negative) to "5" (very positive).
Review Text: "{}"
Sentiment Score:"""
c0_2="""As a skilled data annotator in the field of natural language processing (NLP), your task is to evaluate the sentiment of the given review text. Please classify the sentiment using a scale from "1" (highly negative) to "5" (highly positive).
Review Text: "{}"
Sentiment Rating:"""
c0_3="""As an expert data annotator for NLP tasks, you are required to assess the sentiment of the provided review text. Kindly rate the sentiment on a scale of "1" (extremely negative) to "5" (extremely positive).
Review Text: "{}"
Sentiment Evaluation:"""
c0_4="""As a proficient data annotator in natural language processing (NLP), your responsibility is to determine the sentiment of the given review text. Please assign a sentiment value from "1" (very negative) to "5" (very positive).
Review Text: "{}"
Sentiment Assessment:"""

In [2]:
import torch
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import numpy as np
import pickle

import pandas as pd

import io

class CPU_Unpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == 'torch.storage' and name == '_load_from_bytes':
            return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
        else:
            return super().find_class(module, name)


from transformers import LlamaForCausalLM, LlamaTokenizer,AutoTokenizer

## get groups

In [4]:
tokenizer = LlamaTokenizer.from_pretrained("/llama-7b-hf")
df=pd.read_csv("./yelp_doc_senti_true.csv")
df_sent_tmp=pd.read_csv("./yelp_sent_sentiment_tmp.csv")
df_valid_ids=pd.read_csv("./yelp_id_text_label.csv",encoding='utf8')
df=df.merge(df_sent_tmp,on='review_id',how='left')
df=df_valid_ids.merge(df,on='review_id',how='left')
mapping = {0: -1, 1: 0.5, 2: 0, 3: 0.5, 4: 1}
df['label_org'] = df['label']+1
df['label'] = df['label'].replace(mapping)
df.label=df.label*10
df=df.assign(label_peak_end_avg_abs_diff=abs(df.label-df.peak_end_avg))
df=df.assign(label_all_sent_avg_abs_diff=abs(df.label-df.all_sent_avg))

df=df.assign(pred_class=np.where(df.label_peak_end_avg_abs_diff<df.label_all_sent_avg_abs_diff,'C2','C1'))

df_ids=df.loc[:,['review_id','pred_class','label_org','text']]

In [5]:
sentences=pd.read_csv("./yelp_sent_senti_pred.csv")

## llama

In [6]:
# Load the object from the file
with open('./pkl_pred/2lay_trace_llama7b.pkl', 'rb') as file:
    #res_c1 = pickle.load(file)
    contents0 = CPU_Unpickler(file).load()

with open('./pkl_pred/2lay_trace_llama7b_72.pkl', 'rb') as file:
    #res_c1 = pickle.load(file)
    contents1 = CPU_Unpickler(file).load()



In [7]:
contents=contents0+contents1

dx=pd.DataFrame(contents)

In [8]:
dx['low_score']=dx['res'].apply(lambda x: x['low_score'])
dx['label']=dx['res'].apply(lambda x: x['answer'][0])

dx=dx.merge(df_ids,on='review_id',how='left')

dx.label_org=dx.label_org.apply(str)

c_values = []
for _, row in dx.iterrows():
    c_column = row['label']  # Get the column name with the correct value
    c_value = row['res']['scores'][' '+str(c_column)]  # Extract the maximum value from the corresponding column
    c_values.append(c_value.T)

dx['vals']=c_values

dx.vals=dx.vals-dx.low_score

In [9]:
prompt_groups=[[c0_0,c0_1,c0_2,c0_3,c0_4],[c1_0,c1_1,c1_2,c1_3,c1_4],[c2_0,c2_1,c2_2,c2_3,c2_4]]

In [10]:
def find_sub_list(sl,l):
    results=[]
    sll=len(sl)
    for ind in (i for i,e in enumerate(l) if e==sl[0]):
        if l[ind:ind+sll]==sl:
            results.append((ind,ind+sll-1))
    return results

## group by sentence

In [None]:
all_rows=[]
for i,d in dx.iterrows():
    prompt=prompt_groups[d['prompt_type']][d['prompt_id']].format(d['text'])
    tokens=tokenizer.encode(prompt)
    array=d['vals']
    encoded_sent=tokenizer.batch_encode_plus(list(sentences.loc[sentences.review_id==d['review_id'],:].sentence_text.values))['input_ids']
    sent=[]
    for k in range(len(encoded_sent)):
        el=encoded_sent[k]
        offset=0
        if k==0:
            el=el[3:]
            offset=2
        elif k==len(encoded_sent)-1:
            el=el[1:-1]
        else:
            el=el[1:]
        r=find_sub_list(el,tokens)
        computed_val=array[:,r[0][0]-offset:r[0][1]+1].mean(axis=1)
        if np.isnan(computed_val).any():
            print(i,computed_val)
        else:
            sent.append(computed_val)
    all_rows.append(np.stack(sent).T)

In [12]:
dx['sent_vals']=all_rows

## group by bin

In [13]:
mean_array = []
std_array=[]
num_bins = 10
all_arrays=[]

In [14]:
all_matrices=[]
for j,d in dx.iterrows():
    array=d['sent_vals']
    heatmap_mat=[]
    for sub_array in array:
        bins_data=[]
        bin_value=None
        for bin_index in range(num_bins):
            bin_start = bin_index / num_bins
            bin_end = (bin_index + 1) / num_bins
            normalized_index = np.linspace(0, 1, sub_array.shape[0])
            values_in_bin = [sub_array[i] for i in range(sub_array.shape[0]) if bin_start <= normalized_index[i] < bin_end]
            try:
                bin_value=sum(values_in_bin) / len(values_in_bin)
            except:
                pass
            bins_data.append(bin_value)
        #print(bins_data[-1])
        heatmap_mat.append(bins_data)
        #print(j,"bin data",len(bins_data))
    #print(len(heatmap_mat))
    all_matrices.append(np.array(heatmap_mat))

In [15]:
dx['grouped_vals']=all_matrices

In [16]:
all_matrices=np.array(all_matrices)

In [17]:
all_matrices_mean=all_matrices.mean(axis=0)

In [18]:
pt1=dx.loc[dx.prompt_type==1,'grouped_vals'].values
pt2=dx.loc[dx.prompt_type==2,'grouped_vals'].values

In [19]:
pt1=np.mean(pt1)
pt2=np.mean(pt2)

### Llama 7b

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(5, 3), dpi=200)
h1 = ax[0].pcolor(
    #all_matrices_mean.T,
    pt1.T,
    cmap="Purples",
    vmin=0,
)
ax[0].set_title("Prompt C1")
ax[0].set_yticks([1.5 + 2*i for i in range(len(pt1.T)//2)])
ax[0].set_xticks([0.5 + i for i in range(0, pt1.T.shape[1], 1)])
ax[0].set_xticklabels(list(range(0, pt1.T.shape[1], 1)))
#ax.set_yticklabels(['0.1','0.2','0.3','0.4','0.5','0.6','0.7','0.8','0.9','1'])
ax[0].set_yticklabels(['0.2','0.4','0.6','0.8','1'])
#plt.xlabel("Layers")
ax[0].set_ylabel("Sentence position")
#cb = plt.colorbar(h)
ax[0].invert_yaxis()

h2 = ax[1].pcolor(
    #all_matrices_mean.T,
    pt2.T,
    cmap="Purples",
    vmin=0,
)
ax[1].set_yticks([1.5 + 2*i for i in range(len(pt2.T)//2)])
ax[1].set_xticks([0.5 + i for i in range(0, pt2.T.shape[1], 1)])
ax[1].set_xticklabels(list(range(0, pt2.T.shape[1], 1)))
#ax.set_yticklabels(['0.1','0.2','0.3','0.4','0.5','0.6','0.7','0.8','0.9','1'])
ax[1].set_yticklabels(['0.2','0.4','0.6','0.8','1'])
ax[1].set_title("Prompt C2")
#ax.set_xlabel("Layers")
#plt.ylabel("Sentence position")
#cb = plt.colorbar(h)

#cax = fig.add_axes([0.92, 0.15, 0.02, 0.7])  # [left, bottom, width, height]
#cbar = fig.colorbar(ax[1], cax=cax)
#cbar.set_label('Colorbar')


plt.subplots_adjust(right=0.15)  # Increase space on the right side for the colorbar
cbar_ax = fig.add_axes([0.99, 0.18, 0.02, 0.7])  # [left, bottom, width, height]
cbar = plt.colorbar(h2, cax=cbar_ax)
#cbar.set_label('Colorbar')
fig.text(0.5, 0.02, 'Layers', ha='center', va='center')
ax[1].invert_yaxis()
plt.tight_layout()
plt.savefig('causal_trace_llama.pdf', format='pdf', bbox_inches='tight',dpi=100)
plt.show()