In [None]:
import ollama
import pandas as pd
from tqdm import tqdm
from pydantic import BaseModel
from sklearn.metrics import confusion_matrix
import numpy as np

def ollamaResponse(content):
    response = ollama.chat(model='llama3', messages=[
    {
        'role': 'user',
        'content': content,
    },
    ])
    return response['message']['content']

class Response(BaseModel):
    response: int

In [None]:
path = '/Users/Alex/Desktop/2.FutureTech/OSFM/uniform_sample/results/post_classification_random_sample_8-28 copy.csv'
df = pd.read_csv(path)
df['alex_confidence'].fillna(10, inplace = True)
label = 'alex'

df_extends = df[(df['classification'] == 'extends') | (df['alex'] == 'extends')]


In [None]:
prompt = "The following sentences as from an academic paper, and cites a foundation model indicated by these <cite> citation brackets </cite>. An automated classifier determined that the authors fine-tune or train the cited model, but we want to determine whether this is an error, and in fact other papers or references fine-tuned the cited model. For example, the sentences might say 'Frank et. al. fine-tuned BigModel <cite>[30]</cite> and improved performance by 10%', which would indicate the classifier made an error, as despite mentioning fine-tuning, the authors did not fine-tune themselves. For the following sentences, respond in the JSON format {{\"response\": int}} to indicate whether this is a misattribution, with 0 being highly unlikely, and 10 being highly likely. Give one response for the whole set of sentences, without explanation. The sentence is as follows: {sentence}"

def is_misattribution(sentence):
    return ollamaResponse(prompt.format(sentence = sentence))

misattributions = []
iters = 5
for sentence in tqdm(df_extends['multisentence'], total = len(df_extends)):
    responses = [is_misattribution(sentence).lower() for i in range(iters)]
    models = map(Response.model_validate_json, responses)
    total = np.array([model.response for model in models]).sum()/10
    misattributions.append(total)
    
df_extends['misattributions'] = misattributions


In [None]:
threshold = 2
df_extends['corrected_classification'] = [c if misatt < threshold else 'context' for c, misatt in zip(df_extends['classification'], df_extends['misattributions'])]
mask = (pd.to_numeric(df_extends['alex_confidence'], errors = 'coerce') >= 5) | df_extends['alex_confidence'].isna()
df_filtered = df_extends[mask]
cm = confusion_matrix(df_filtered[label], df_filtered['corrected_classification'])
cm

In [None]:
df_extends[df_extends['classification'] == 'context']