In [2]:
import pandas as pd

from pathlib import Path

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [3]:
PROJ_DIR = Path.cwd().parent
DATA_DIR = PROJ_DIR / 'data'
LOG_DIR = PROJ_DIR / 'logs'

LOG_DIR.mkdir(parents=True, exist_ok=True) 
LOG_SAVE_DIR = LOG_DIR / 'youcook_multihead_transformer'

In [16]:
master_df = pd.read_pickle(DATA_DIR / 'full_master_updated.pkl')
val_df = pd.read_csv(str(LOG_SAVE_DIR / 'val_performance.csv'))

## Generate Knowledge Extraction metrics based on best Key-clip Prediction model's predictions

In [17]:
def best_keyclip_prediction_true_positives(row):
    return row['true_positives'] if row['IsPredUseful'] == 1 else 0

def generate_metrics_from_master_and_val_df(master_df, val_df, master_key='Sentence', val_key='text'):
  combined_df = pd.merge(master_df, val_df, left_on=master_key, right_on=val_key, how='inner')
  combined_df['best_kc_pred_true_positives'] = combined_df.apply(lambda row: best_keyclip_prediction_true_positives(row), axis=1)
  total_num_gold = combined_df['num_gold'].sum()
  total_num_predicted = combined_df['num_predicted'].sum()
  total_true_pos = combined_df['best_kc_pred_true_positives'].sum()
  precision = round(total_true_pos / total_num_predicted, 2)
  recall = round(total_true_pos / total_num_gold, 2)
  f1 = round(2*(precision*recall) / (precision+recall), 2)

  print(f"Total true positives (predicted words overlap with gold words): {total_true_pos}")
  print(f"Total predicted words: {total_num_predicted}")
  print(f"Total gold words: {total_num_gold}")
  print(f"Precision: {precision}, Recall: {recall}, F1: {f1}")
  
  return precision, recall, f1

In [32]:
def get_words(sentence, num=3):
    try:
        output = " ".join(sentence.split(" ")[:num])
        return output
    except Exception as e:
        return ""

In [39]:
master_df['words'] = master_df['Key steps'].apply(lambda x: get_words(x, num=4))
val_df['words'] = val_df['summary'].apply(lambda x: get_words(x, num=4))

In [40]:
generate_metrics_from_master_and_val_df(master_df, val_df, master_key='words', val_key='words')

Total true positives (predicted words overlap with gold words): 1688
Total predicted words: 5657
Total gold words: 5466
Precision: 0.3, Recall: 0.31, F1: 0.3


(0.3, 0.31, 0.3)