In [1]:
import sys
import json
import pandas as pd
sys.path.append('../')
from pandaRec import Recipe, Recommender
from pandaRec.strategies import NameSearch, FuzzySearchName, FuzzySearchDescription, IndexSearch, SemanticSearch, RankingStrategy, OpenAIEmbeddings

In [2]:
df = pd.read_csv('test_data.csv', sep=';', names=["name", "query", "correct"])

In [3]:
with open('../recipes/recipes.json') as f:
    recipes = json.load(f)

recipes = [Recipe.from_dict(r) for r in recipes]

In [4]:
api_key = open("../examples/api_key.txt", "r").read()

In [5]:
import openai
openai.api_key = api_key

In [6]:
recommenders = []
recommenders.append(Recommender(recipes, df, NameSearch()))
recommenders.append(Recommender(recipes, df, FuzzySearchName()))
recommenders.append(Recommender(recipes, df, FuzzySearchDescription()))
recommenders.append(Recommender(recipes, df, IndexSearch(recipes, '../recipes/search_index.pkl')))
# recommenders.append(Recommender(recipes, df, SemanticSearch(recipes, '../recipes/embeddings.pt')))
# recommenders.append(Recommender(recipes, df, OpenAIEmbeddings(recipes, '../recipes/openai_embeddings.pt'))) # left out because of pricing
recommenders.append(Recommender(recipes, df, SemanticSearch(recipes, model='all-MiniLM-L6-v2')))

Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [7]:
def get_recommender_result(recommender, query):
    recommender.set_search(query)
    recommender.recommend()
    results = recommender.show_results(5)
    results = [r.recipe.name for r in results]
    return results

for recommender in recommenders:
    print(recommender.strategy.__class__.__name__)
    df[recommender.strategy.__class__.__name__] = df['query'].apply(lambda x: get_recommender_result(recommender, x))


NameSearch
FuzzySearchName
FuzzySearchDescription
IndexSearch
SemanticSearch


In [8]:
import math

def ndcg(correct, result, n=5):
    if pd.isnull(correct):
        return pd.NA
    correct = correct.split(',')
    score = 0
    ideal_score = 0
    for i in range(n):
        if i < len(result):
            if result[i] in correct:
                score += 1 / math.log((i + 2), 2)
        if i < len(correct):
            ideal_score += 1 / math.log((i + 2), 2)
    return score / ideal_score

In [9]:
for recommender in recommenders:
    df[recommender.strategy.__class__.__name__ + '_ndcg'] = df.apply(lambda x: ndcg(x['correct'], x[recommender.strategy.__class__.__name__]), axis=1)

In [10]:
for recommender in recommenders:
    print(recommender.strategy.__class__.__name__)
    print(df[recommender.strategy.__class__.__name__ + '_ndcg'].mean())

# OpenAIEmbeddings
# 0.5468005259704043

NameSearch
0.0
FuzzySearchName
0.3889713302547472
FuzzySearchDescription
0.2552236998763563
IndexSearch
0.21272152092359817
SemanticSearch
0.5061963464323941


In [11]:
df

Unnamed: 0,name,query,correct,NameSearch,FuzzySearchName,FuzzySearchDescription,IndexSearch,SemanticSearch,NameSearch_ndcg,FuzzySearchName_ndcg,FuzzySearchDescription_ndcg,IndexSearch_ndcg,SemanticSearch_ndcg
0,read file,read a file to a pandas dataframe,,[],"[to_clipboard, to_csv, to_dict, to_excel, to_f...","[backfill, shift, tz_localize, abs, agg]","[to_latex, describe, info, to_csv, to_excel]","[to_xarray, to_hdf, to_json, infer_objects, get]",,,,,
1,head,show the first or last 5 rows,"head,tail",[],"[first, last, combine_first, first_valid_index...","[drop_duplicates, fillna, first, head, tail]","[asof, first, groupby, head, hist]","[tail, head, nlargest, last, nsmallest]",0.0,0.0,0.501266,0.264068,1.0
2,columns,show the column names,columns,[],"[cov, rename, cummin, keys, min]","[add_prefix, add_suffix, append, astype, at_time]","[compare, groupby, info, plot, to_string]","[to_latex, to_string, add_suffix, loc, filter]",0.0,0.0,0.0,0.0,0.0
3,drop,drop unnecessary columns,drop,[],"[drop, drop_duplicates, dropna, cummin, ne]","[asof, astype, autocorr, compare, convert_dtypes]","[drop, resample, reset_index, add_prefix, add_...","[dropna, all, truncate, droplevel, any]",0.0,1.0,0.0,1.0,0.0
4,len,get the length of the dataframe,len,[],"[rename, to_frame, head, asfreq, mean]","[add_prefix, add_suffix, asfreq, asof, astype]","[all, between_time, corr, cov, describe]","[tail, squeeze, head, memory_usage, to_frame]",0.0,0.0,0.0,0.0,0.0
5,query,show rows that meet a condition,,[],"[rpow, at_time, last, count, round]","[loc, isin, add, astype, filter]","[plot, compare, groupby, info, loc]","[notna, notnull, last_valid_index, isna, isnull]",,,,,
6,iloc,get a subset of the dataframe,iloc,[],"[rename, to_frame, head, rsub, asfreq]","[pct_change, align, any, append, apply]","[all, asof, between_time, describe, explode]","[asof, head, get, tail, filter]",0.0,0.0,0.0,0.0,0.0
7,dtypes,show the types of the columns,dtypes,[],"[asof, astype, cummin, value_counts, convert_d...","[append, to_csv, to_list, abs, agg]","[groupby, rank, align, astype, compare]","[to_latex, convert_dtypes, to_string, loc, all]",0.0,0.0,0.0,0.0,0.0
8,select dtypes,only show certain data types,,[],"[astype, tail, keys, convert_dtypes, align]","[append, dot, info, abs, add]","[groupby, rank, truncate, abs, add]","[convert_dtypes, to_string, notna, notnull, is...",,,,,
9,insert,insert a column in the specified position,insert,[],"[cummin, sort_values, set_axis, to_string, kur...","[mad, any, sort_index, between_time, drop]","[ewm, loc, plot, take, to_latex]","[searchsorted, to_latex, reindex_like, slice_s...",0.0,0.0,0.0,0.0,0.0
