<a href="https://colab.research.google.com/github/greek-nlp/benchmark/blob/main/impostors_llama70biV1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Llama-70b-Instruct-V1

In [1]:
%%capture
!pip install boto3
import boto3
from botocore.config import Config
from botocore.exceptions import ClientError
from tqdm.notebook import tqdm
from google.colab import files
import os
import json
import random
import importlib
import pandas as pd

In [2]:
print ('Upload the `aws.json` file: ')
files.upload()
credentials = json.load(open('aws.json'))

Upload the `aws.json` file: 


Saving aws.json to aws.json


In [8]:
# Use the native inference API to send a text message to Meta Llama 3.
# Create a Bedrock Runtime client in the AWS Region of your choice.
client = boto3.client("bedrock-runtime",
                      aws_access_key_id=credentials['aws_access_key_id'],
                      aws_secret_access_key=credentials['aws_secret_access_key'],
                      region_name=credentials['aws_region'])

def llama_prompt(text,
                 instruction,
                 model_id,
                 shots,
                 max_len=16,
                 temperature=0.3,
                 client=client):
  # Embed the prompt in Llama 3's instruction format.
  formatted_prompt = f"""
  <|begin_of_text|><|start_header_id|>user<|end_header_id|>
  {instruction}
  {shots}
  user: {text}
  <|eot_id|>
  <|start_header_id|>assistant<|end_header_id|>
  """

  # Format the request payload using the model's native structure.
  native_request = {
      "prompt": formatted_prompt,
      "max_gen_len": max_len,
      "temperature": temperature,
  }

  # Convert the native request to JSON.
  request = json.dumps(native_request)

  try:
      # Invoke the model with the request.
      response = client.invoke_model(modelId=model_id, body=request)

  except (ClientError, Exception) as e:
      print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
      exit(1)

  # Decode the response body.
  model_response = json.loads(response["body"].read())

  # Extract and print the response text.
  response_text = model_response["generation"]
  return response_text

# Access the data

In [5]:
!gdown 17Ks0Q8GFsunj5w3PoHwcG1vTm_bfLhpL

Downloading...
From: https://drive.google.com/uc?id=17Ks0Q8GFsunj5w3PoHwcG1vTm_bfLhpL
To: /content/barzokas_10excerpts.csv.gz
100% 19.7M/19.7M [00:00<00:00, 45.7MB/s]


In [7]:
barzokas_pop = pd.read_csv('barzokas_10excerpts.csv.gz', index_col=0)
barzokas_pop.sample()

Unnamed: 0,id,title,author,type,publishedYear,isbn,filename,postUrl,attachmentUrl,authorYearOfBirth,...,excerpt_1,excerpt_2,excerpt_3,excerpt_4,excerpt_5,excerpt_6,excerpt_7,excerpt_8,excerpt_9,excerpt_10
1031,openBook158,Μπράνγουελ,Κατερίνα Καζολέα,Θεατρικό έργο,2019,,Μπράνγουελ.pdf,https://www.openbook.gr/branwell/,http://bit.ly/2GCnw2p,,...,ΑΝ: Νομίζω… εννοούσε αυτό που φοβόμαστε. ΣΑ...,ης νύχτας. ΣΑΡΛΟΤ: Σκέφτομαι πόση ενέργει...,λά να μπαίνει στη ζωή του. ΣΑΡΛΟΤ: Πρέπει ν...,ό πλήγμα για τον Μπραν! ΣΑΡΛΟΤ: Έδωσε την ευ...,ΣΚΗΝΗ ΕΝΔΕΚΑΤΗ Στο δωμάτιο είναι η Σάρλοτ ...,ιλι έχει λαχανιάσει και ανασαίνει πολύ γρήγορα...,κυκλοφορούμε; Θα γίνουμε έγκλειστες για να τ...,πλησιάσω. Η κατάστασή του δημιουργούσε χάσμα...,ία Ρόμπινσον; ΕΜΙΛΙ: Ναι. Έναν πλούσιο χήρο...,η Έμιλι ξαναγυρίζει αναστατωμένη. ΕΜΙΛΙ: Θ...


In [10]:
pop_authors = barzokas_pop.author.value_counts().index.tolist()
pop_authors

['Θανάσης Τριαρίδης',
 'Γιάννης Αντάμης',
 'Πάνος Κολιόπουλος',
 'Κώστας Βουλαζέρης',
 'Δημήτρης Τζουβάλης',
 'Plato',
 'Ράνια Συνοδινού',
 'Γιώργος Σ. Κόκκινος',
 'Ευρυδίκη Αμανατίδου',
 'Πάνος Α. Ζέρβας',
 'Φρίντα Κριτσωτάκη',
 'Πασχάλης Παπαβασιλείου',
 'Αύγουστος Κορτώ',
 'Χάρης Γαντζούδης',
 'Βασίλειος Κάππας',
 'Τέος Ρόμβος',
 'Κατερίνα Καζολέα']

In [11]:
from tqdm.notebook import tqdm
instruct = "Given an excerpt from a Greek book, return the author it is from by picking from the following authors: " + ', '.join(pop_authors) +' Return only the name of the author, nothing else.'
model_id="meta.llama3-70b-instruct-v1:0"
for i in tqdm(range(1, 11)):
  barzokas_pop[f'llama_{i}'] = [llama_prompt(text=t, instruction=instruct, model_id=model_id, temperature=0.3) for t in tqdm(barzokas_pop[f'excerpt_{i}'].values)]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

In [12]:
from sklearn.metrics import classification_report, f1_score, precision_score, recall_score
barzokas_pop_mini = barzokas_pop[:175]
p,r,f = [],[],[]
for i in range(1,11):
  p.append(precision_score(barzokas_pop_mini.author, barzokas_pop_mini[f'llama_{i}'].str.strip(), labels=pop_authors, zero_division=0, average='macro'))
  r.append(recall_score(barzokas_pop_mini.author, barzokas_pop_mini[f'llama_{i}'].str.strip(), labels=pop_authors, zero_division=0, average='macro'))
  f.append(f1_score(barzokas_pop_mini.author, barzokas_pop_mini[f'llama_{i}'].str.strip(), labels=pop_authors, zero_division=0, average='macro'))

print(f'P: {pd.Series(p).mean():.3f} ({pd.Series(p).sem():.3f})')
print(f'R: {pd.Series(r).mean():.3f} ({pd.Series(r).sem():.3f})')
print(f'F: {pd.Series(f).mean():.3f} ({pd.Series(f).sem():.3f})')


P: 0.333 (0.012)
R: 0.237 (0.005)
F: 0.242 (0.008)


In [14]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score

num_runs = 10
num_samples = 175
np.random.seed(42)

author_metrics = {
    author: {'P': [], 'R': [], 'F': []} for author in pop_authors
}

for i in range(1, 11):
    pred_col = f'llama_{i}'
    y_true = barzokas_pop_mini['author']
    y_pred = barzokas_pop_mini[pred_col].str.strip() # Ensure stripping is applied here

    # Get the classification report dictionary for the current run
    report = classification_report(
        y_true,
        y_pred,
        labels=pop_authors,
        zero_division=0,
        output_dict=True
    )

    # Store P, R, F scores for each individual author
    for author in pop_authors:
        if author in report:
            author_metrics[author]['P'].append(report[author]['precision'])
            author_metrics[author]['R'].append(report[author]['recall'])
            author_metrics[author]['F'].append(report[author]['f1-score'])

# 3. Calculate Mean and SEM for each author's metrics
results = {}
for author, metrics in author_metrics.items():
    results[author] = {
        'P_mean': np.mean(metrics['P']),
        'P_sem': pd.Series(metrics['P']).sem(),
        'R_mean': np.mean(metrics['R']),
        'R_sem': pd.Series(metrics['R']).sem(),
        'F_mean': np.mean(metrics['F']),
        'F_sem': pd.Series(metrics['F']).sem()
    }

# 4. Report the results
print("--- Mean (SEM) Performance Per Author Across 10 Runs ---")
for author in pop_authors:
    res = results[author]
    print(f"\nAuthor: {author}")
    print(f"  P: {res['P_mean']:.3f} ({res['P_sem']:.3f})")
    print(f"  R: {res['R_mean']:.3f} ({res['R_sem']:.3f})")
    print(f"  F1: {res['F_mean']:.3f} ({res['F_sem']:.3f})")

# Optionally, you can also format the final macro-average results from the initial prompt:
p_macro = [np.mean(author_metrics[a]['P']) for a in pop_authors]
r_macro = [np.mean(author_metrics[a]['R']) for a in pop_authors]
f_macro = [np.mean(author_metrics[a]['F']) for a in pop_authors]

macro_p_mean = np.mean(p_macro)
macro_r_mean = np.mean(r_macro)
macro_f_mean = np.mean(f_macro)

# Recalculating SEM for the macro scores based on the 10 macro scores (as in your original code)
p_sem = pd.Series([np.mean(author_metrics[a]['P']) for a in pop_authors]).sem()
r_sem = pd.Series([np.mean(author_metrics[a]['R']) for a in pop_authors]).sem()
f_sem = pd.Series([np.mean(author_metrics[a]['F']) for a in pop_authors]).sem()

print("\n--- Overall Macro-Averages (10-Run Mean) ---")
print(f"P: {macro_p_mean:.3f} ({p_sem:.3f})")
print(f"R: {macro_r_mean:.3f} ({r_sem:.3f})")
print(f"F: {macro_f_mean:.3f} ({f_sem:.3f})")

--- Mean (SEM) Performance Per Author Across 10 Runs ---

Author: Θανάσης Τριαρίδης
  P: 0.210 (0.006)
  R: 0.679 (0.031)
  F1: 0.320 (0.010)

Author: Γιάννης Αντάμης
  P: 0.248 (0.018)
  R: 0.152 (0.017)
  F1: 0.186 (0.017)

Author: Πάνος Κολιόπουλος
  P: 0.100 (0.067)
  R: 0.012 (0.008)
  F1: 0.021 (0.014)

Author: Κώστας Βουλαζέρης
  P: 0.960 (0.027)
  R: 0.176 (0.021)
  F1: 0.291 (0.032)

Author: Δημήτρης Τζουβάλης
  P: 0.918 (0.034)
  R: 0.308 (0.028)
  F1: 0.450 (0.029)

Author: Plato
  P: 0.983 (0.017)
  R: 0.825 (0.033)
  F1: 0.895 (0.025)

Author: Ράνια Συνοδινού
  P: 0.191 (0.034)
  R: 0.175 (0.029)
  F1: 0.181 (0.031)

Author: Γιώργος Σ. Κόκκινος
  P: 0.453 (0.029)
  R: 0.487 (0.029)
  F1: 0.465 (0.024)

Author: Ευρυδίκη Αμανατίδου
  P: 0.129 (0.031)
  R: 0.157 (0.040)
  F1: 0.141 (0.035)

Author: Πάνος Α. Ζέρβας
  P: 0.000 (0.000)
  R: 0.000 (0.000)
  F1: 0.000 (0.000)

Author: Φρίντα Κριτσωτάκη
  P: 0.000 (0.000)
  R: 0.000 (0.000)
  F1: 0.000 (0.000)

Author: Πασχάλης Παπ