In [132]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import os
import random

In [133]:
trainer2cases = {
    'A1': [1, 2, 6, 7, 21, 22, 33, 35],
    'A2': [3, 4, 5, 8, 11, 12, 13, 14, 16, 18, 20, 24, 26, 28, 29, 30, 31, 32],
    'A3': [9, 15, 17, 23, 25, 27],
    'A4': [10, 19, 34]
}
valid_cases = [1, 2, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 28, 29, 33]

model = SentenceTransformer('all-MiniLM-L6-v2')
aligned_fb_detection = pd.read_csv("results/extract_dialogue/aligned_fb_detection/LFB1_full 'all phrases'.csv")
rag_embeddings_dir = "results/extract_dialogue/rag_embeddings/context+phrase"

all_annotations = {}
for file in sorted(os.listdir(os.path.join(rag_embeddings_dir, 'annotations')), key=lambda x: int(x.split('_')[0][3:])):
    case_id = int(file.split('_')[0][3:])
    if file.endswith('.csv'):
        all_annotations[case_id] = pd.read_csv(os.path.join(rag_embeddings_dir, 'annotations', file), index_col=0)
        all_annotations[case_id].replace({'True': True, 'False': False}, inplace=True)



In [134]:
def sample_unseen(available_annotations, fb_k=None, no_fb_k=None):
    df = pd.concat(available_annotations.values(), ignore_index=True)
    
    fb_k = fb_k if fb_k is not None else len(df[df['fb_instance'] == True])
    no_fb_k = no_fb_k if no_fb_k is not None else len(df[df['fb_instance'] == False])    
    if fb_k > len(df[df['fb_instance'] == True]) or no_fb_k > len(df[df['fb_instance'] == False]):
        raise ValueError("k is greater than the number of available instances")
    
    fb_df = df[df['fb_instance'] == True]
    no_fb_df = df[df['fb_instance'] == False]
    
    fb_sample = fb_df.sample(fb_k)
    no_fb_sample = no_fb_df.sample(no_fb_k)
    
    return fb_sample, no_fb_sample

def most_similar(datapoint, model: SentenceTransformer, fb_sample, no_fb_sample, num_examples=3):
    datapoint_embedding = np.load(datapoint['embedding_path'])

    fb_sample = fb_sample.copy()
    no_fb_sample = no_fb_sample.copy()
    
    fb_embeddings = np.array([np.load(path) for path in fb_sample['embedding_path']])
    no_fb_embeddings = np.array([np.load(path) for path in no_fb_sample['embedding_path']])
    
    fb_sample['similarity'] = model.similarity(datapoint_embedding, fb_embeddings).T
    no_fb_sample['similarity'] = model.similarity(datapoint_embedding, no_fb_embeddings).T
    
    fb_sample = fb_sample.sort_values('similarity', ascending=False).head(num_examples)
    no_fb_sample = no_fb_sample.sort_values('similarity', ascending=False).head(num_examples)
    
    return fb_sample, no_fb_sample        
    
def sample_examples_unseen_case(datapoint, model: SentenceTransformer, num_examples=3, fb_k=None, no_fb_k=None):
    datapoint_case_id = datapoint['case_id']
    available_annotations = {case_id: annotations for case_id, annotations in all_annotations.items() if case_id in valid_cases and case_id != datapoint_case_id}
    fb_sample, no_fb_sample = sample_unseen(available_annotations, fb_k, no_fb_k)

    return most_similar(datapoint, model, fb_sample, no_fb_sample, num_examples)
    
def sample_examples_unseen_surgeon(datapoint, model: SentenceTransformer, num_examples=3, fb_k=None, no_fb_k=None):
    datapoint_case_id = datapoint['case_id']
    surgeon_id = None
    for trainer, cases in trainer2cases.items():
        if datapoint_case_id in cases:
            surgeon_id = trainer
            break
        
    available_annotations = {case_id: annotations for case_id, annotations in all_annotations.items() if case_id in valid_cases and case_id not in trainer2cases[surgeon_id]}

    fb_sample, no_fb_sample = sample_unseen(available_annotations, fb_k, no_fb_k)
    
    return most_similar(datapoint, model, fb_sample, no_fb_sample, num_examples)

In [135]:
# fb_sample, no_fb_sample = sample_unseen(all_annotations)

# for i in range(10):
#     print(no_fb_sample.iloc[i]['context_dialogue'])

In [136]:
model = SentenceTransformer('all-MiniLM-L6-v2')
datapoint = all_annotations[1].iloc[0]

fb_examples, no_fb_examples = sample_examples_unseen_case(datapoint, model, num_examples=3)



In [137]:
print(datapoint['context_dialogue'])

[
  0: ['trainer': 'All right, let's clean and then can you up.']
  1: ['trainee': 'So I've just been learning to do the bladder neck pull out with Dr. Aaron, so the way he does it, and I'm happy to do it any way you want me to do it, he goes like, he likes to go lateral first and like really define the contour of the prostate before it comes a little bit more towards the midline. Okay, don't do that.']
  2: ['trainee': 'I didn't know I wasn't going to do it that way because I know you don't do it that way, but I'll uh...']
  3: ['trainer': 'So let me give you kind of a, I won't.']
  4: ['trainee': 'For the posterior part is what I'm not going to do.']
  5: ['trainer': 'OK, so I'll stop you if you're doing something funny. All right, let's do check on the left side, just see what's a Uzi there.']
]


In [138]:
for i in range(len(fb_examples)):
    print(fb_examples.iloc[i]['context_dialogue'])
    print(fb_examples.iloc[i]['similarity'])
    print()

[
  0: ['trainer': 'He likes to take a little bit further if you want to. If not, let's work on the other side.']
  1: ['trainer': 'Okay, can I? I'll come back. Yeah, that's']
  2: ['trainer': 'Can you get it deeper? Side of the bladder.']
  3: ['trainer': 'Sorry. Now, press your left hand on the prostate. You got your left hand always has, is this, this part is all about traction.']
  4: ['trainer': 'So drop, here, I'll do it with my sucker and I'll show you. Pull out.']
  5: ['trainer': 'And then you can flip and tease laterally. Yes, like that. Okay? So you do it.']
]
0.86223257

[
  0: ['trainee': 'I think the first time getting the right thing is...']
  1: ['trainer': 'This is the plane.']
  2: ['trainer': 'He likes to take a little bit further if you want to. If not, let's work on the other side.']
  3: ['trainer': 'Okay, can I? I'll come back. Yeah, that's']
  4: ['trainer': 'Can you get it deeper? Side of the bladder.']
  5: ['trainer': 'Sorry. Now, press your left hand on the 

In [139]:
for i in range(len(no_fb_examples)):
    print(no_fb_examples.iloc[i]['context_dialogue'])
    print(no_fb_examples.iloc[i]['similarity'])
    print()

[
  0: ['trainer': 'This is the plane.']
  1: ['trainer': 'He likes to take a little bit further if you want to. If not, let's work on the other side.']
  2: ['trainer': 'Okay, can I? I'll come back. Yeah, that's']
  3: ['trainer': 'Can you get it deeper? Side of the bladder.']
  4: ['trainer': 'Sorry. Now, press your left hand on the prostate. You got your left hand always has, is this, this part is all about traction.']
  5: ['trainer': 'So drop, here, I'll do it with my sucker and I'll show you. Pull out.']
]
0.8528582

[
  0: ['trainee': 'Keep going so this part like the contour will be yeah, yeah, yeah keep going']
  1: ['trainee': 'Can I have a lens clean?']
  2: ['trainer': 'probably get in now. You're wide enough. The point is you just don't want to go in a deep hole.']
  3: ['trainer': 'Just just get in just why why you're hold on hold on why are you just yeah?']
  4: ['trainee': 'I don't know why you're encroaching closer to the prostate. Here? Come down just a touch. Uh-huh.