In [7]:
import pandas as pd
from tqdm import tqdm

from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import r2_score

from caml import config
from caml.utils import utils_preprocess_text
from caml.similarity import MLModel

In [2]:
annotation_df = pd.read_pickle('../data/6k_grocery_products_annotations.pkl')

In [3]:
naics_df = pd.read_pickle('../data/naics_codes.pkl')

In [4]:
annotation_df = annotation_df.merge(naics_df[['naics_code','eio_co2']], 
                left_on='naics_code', right_on='naics_code', how='left').drop(columns='raw_annotations').drop_duplicates()

In [5]:
annotation_df.head()

Unnamed: 0,product_code,naics_code,product_text,eio_co2
0,B01F7AW9MO,311821,Hail Merry Chocolate Chip Cookie Dough Macaroo...,0.875952
9,B01LY5ZSPH,339913,90 Cake Topper Gold Premium Metal Number 90 B...,0.287076
15,B000WO434S,325998,NielsenMassey Pure Orange Extract with Gift bo...,0.799217
102,B075R61D2B,0,Nature Made Energy B12 1000 mcg Gummies 80 Cou...,
103,B00HES9CMS,311320,Viva Naturals 1 Best Selling Certified Organic...,0.746672


In [6]:
annotation_df["text_clean"] = annotation_df["product_text"].apply(lambda x: utils_preprocess_text(x, flg_stemm=False, flg_lemm=True))
annotation_df['text_clean'] = annotation_df['text_clean'].str.replace('\d+', '')
annotation_df['text_clean'] = annotation_df['text_clean'].str.replace('_', ' ', regex=False)
annotation_df['text_clean'] = annotation_df['text_clean'].str.replace('   ', ' ', regex=False)
annotation_df['text_clean'] = annotation_df['text_clean'].str.replace('  ', ' ', regex=False)
annotation_df['text_clean'] = annotation_df['text_clean'].str.lstrip()
annotation_df['text_clean'] = annotation_df['text_clean'].str.rstrip()

  annotation_df['text_clean'] = annotation_df['text_clean'].str.replace('\d+', '')


In [None]:
model = MLModel(config.model_name)
product_list = annotation_df.text_clean.values
naics_list = naics_df.text_clean.values
cosine_scores = model.compute_similarity_scores(product_list, naics_list)

In [8]:
# Clear the evaluation dataframes where results will be stored.
evaluation_df = pd.DataFrame()

In [None]:
## Evaluate the products in the test set
# Aggregate the top-20 NAICS descriptions by NAICS codes. Save the top-5. 
for ix in tqdm(range(len(annotation_df))):
    similarity_score = model.rank_similarity_scores(annotation_df, cosine_scores, ix, naics_df)
    aggregated_scores = similarity_score.groupby('naics_code').first()
    aggregated_scores['votes'] = similarity_score.groupby('naics_code').size()
    aggregated_scores = aggregated_scores.sort_values(['cosine_score', 'votes'], ascending=False).reset_index()
    evaluation_df = pd.concat([evaluation_df, aggregated_scores.head(1)])

In [None]:
## Compute the top-1 accuracy of the model
# Use annotation_df dataframe if you want to evaluate against all the products at the same time
# instead of just the test set
adf = annotation_df.merge(naics_df[['naics_code','naics_desc']].groupby('naics_code').first(), 
                     left_on='naics_code', right_on='naics_code', how='left')
edf = evaluation_df.merge(naics_df[['naics_code','eio_co2']], left_on='naics_code', 
                          right_on='naics_code', how='left')
if len(edf.product_code.unique()) > len(adf.product_code.unique()):                          
    df = adf.set_index("product_code").join(edf.set_index("product_code"), lsuffix='_human', rsuffix='_model')
else:
    df = edf.set_index("product_code").join(adf.set_index("product_code"), lsuffix='_human', rsuffix='_model')
rf = df[df.naics_code_human == df.naics_code_model]

print("Top-1 accuracy w.r.t NAICS codes: ", len(rf.index.unique())/len(df.index.unique()))
print("Correct predictions: {}, Total Products: {}".format(len(rf.index.unique()), len(df.index.unique())))

## Compute the top-1 accuracy with respect to BEA codes (top 4 digits of NAICS code)
df.bea_human = df.naics_code_human//100
df.bea_model = df.naics_code_model//100
bf = df[df.bea_human == df.bea_model]
print("Top-1 accuracy w.r.t BEA codes: ", len(bf.index.unique())/len(df.index.unique()))
print("Correct predictions: {}, Total Products: {}".format(len(bf.index.unique()), len(df.index.unique())))

# Compute the mean absolute percentage error and R^2 value w.r.t EIF for zero-shot prediction
df = df.dropna()
y_true = df.groupby(df.index).first().eio_co2_human
y_pred = df.groupby(df.index).first().eio_co2_model
print("The mean absolute percentage error for predicting kgCO2e/$: ", mape(y_true, y_pred))
print("The R2 value for predicting kgCO2e/$: ", r2_score(y_true, y_pred))