In [1]:
import sys
import json
import pandas as pd
sys.path.append('../')
from pandaRec import Recipe, Recommender
from pandaRec.strategies import NameSearch, FuzzySearchName, FuzzySearchDescription, IndexSearch, SemanticSearch, RankingStrategy, OpenAIEmbeddings

In [2]:
df = pd.read_csv('test_data.csv', sep=';', names=["name", "query", "correct"])

In [3]:
with open('../recipes/recipes.json') as f:
    recipes = json.load(f)

recipes = [Recipe.from_dict(r) for r in recipes]

In [4]:
api_key = open("../examples/api_key.txt", "r").read()

In [5]:
import openai
openai.api_key = api_key

In [6]:
recommenders = []
recommenders.append(Recommender(recipes, df, NameSearch()))
recommenders.append(Recommender(recipes, df, FuzzySearchName()))
recommenders.append(Recommender(recipes, df, FuzzySearchDescription()))
recommenders.append(Recommender(recipes, df, IndexSearch(recipes, '../recipes/search_index.pkl')))
recommenders.append(Recommender(recipes, df, SemanticSearch(recipes, '../recipes/embeddings.pt')))
# recommenders.append(Recommender(recipes, df, OpenAIEmbeddings(recipes, '../recipes/openai_embeddings.pt'))) # left out because of pricing
recommenders.append(Recommender(recipes, df, SemanticSearch(recipes, model='all-MiniLM-L6-v2')))
recommenders.append(Recommender(recipes, df, SemanticSearch(recipes, model='sentence-t5-base')))
recommenders.append(Recommender(recipes, df, SemanticSearch(recipes, model='krlvi/sentence-t5-base-nlpl-code_search_net')))

Downloading (…)2bb58/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)/2_Dense/config.json:   0%|          | 0.00/115 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

Downloading rust_model.ot:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

Downloading (…)21dd52bb58/README.md:   0%|          | 0.00/2.01k [00:00<?, ?B/s]

Downloading (…)dd52bb58/config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading (…)52bb58/convert.ipynb:   0%|          | 0.00/74.6k [00:00<?, ?B/s]

Downloading (…)8/convert_to_fp16.py:   0%|          | 0.00/198 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/219M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)2bb58/tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

Downloading (…)d52bb58/modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

In [7]:
def get_recommender_result(recommender, query):
    recommender.set_search(query)
    recommender.recommend()
    results = recommender.show_results(5)
    results = [r.recipe.name for r in results]
    return results

for idx, recommender in enumerate(recommenders):
    print(recommender.strategy.__class__.__name__)
    df[recommender.strategy.__class__.__name__ + str(idx)] = df['query'].apply(lambda x: get_recommender_result(recommender, x))


NameSearch
FuzzySearchName
FuzzySearchDescription
IndexSearch
SemanticSearch
SemanticSearch
SemanticSearch
SemanticSearch


In [8]:
import math

def ndcg(correct, result, n=5):
    if pd.isnull(correct):
        return pd.NA
    correct = correct.split(',')
    score = 0
    ideal_score = 0
    for i in range(n):
        if i < len(result):
            if result[i] in correct:
                score += 1 / math.log((i + 2), 2)
        if i < len(correct):
            ideal_score += 1 / math.log((i + 2), 2)
    return score / ideal_score

In [9]:
index = 0
for idx, recommender in enumerate(recommenders):
    df[recommender.strategy.__class__.__name__+ str(idx) + '_ndcg'] = df.apply(lambda x: ndcg(x['correct'], x[recommender.strategy.__class__.__name__+ str(idx)]), axis=1)

In [10]:

index = 0
for idx, recommender in enumerate(recommenders):
    print(recommender.strategy.__class__.__name__+ str(idx))
    print(df[recommender.strategy.__class__.__name__+ str(idx) + '_ndcg'].mean())

# OpenAIEmbeddings
# 0.5468005259704043

NameSearch0
0.0
FuzzySearchName1
0.3889713302547472
FuzzySearchDescription2
0.2552236998763563
IndexSearch3
0.21272152092359817
SemanticSearch4
0.6173577743841728
SemanticSearch5
0.5061963464323941
SemanticSearch6
0.5292321446886158
SemanticSearch7
0.4762389074160669


In [11]:
df

Unnamed: 0,name,query,correct,NameSearch0,FuzzySearchName1,FuzzySearchDescription2,IndexSearch3,SemanticSearch4,SemanticSearch5,SemanticSearch6,SemanticSearch7,NameSearch0_ndcg,FuzzySearchName1_ndcg,FuzzySearchDescription2_ndcg,IndexSearch3_ndcg,SemanticSearch4_ndcg,SemanticSearch5_ndcg,SemanticSearch6_ndcg,SemanticSearch7_ndcg
0,read file,read a file to a pandas dataframe,,[],"[to_clipboard, to_csv, to_dict, to_excel, to_f...","[backfill, shift, tz_localize, abs, agg]","[to_latex, describe, info, to_csv, to_excel]","[to_hdf, to_markdown, to_csv, to_clipboard, to...","[to_xarray, to_hdf, to_json, infer_objects, get]","[to_xarray, tail, to_hdf, to_json, head]","[to_hdf, to_csv, to_xarray, to_pickle, to_excel]",,,,,,,,
1,head,show the first or last 5 rows,"head,tail",[],"[first, last, combine_first, first_valid_index...","[drop_duplicates, fillna, first, head, tail]","[asof, first, groupby, head, hist]","[tail, head, to_string, last, truncate]","[tail, head, nlargest, last, nsmallest]","[nlargest, nsmallest, tail, head, to_string]","[tail, head, asof, last_valid_index, to_string]",0.0,0.0,0.501266,0.264068,1.0,1.0,0.570642,1.0
2,columns,show the column names,columns,[],"[cov, rename, cummin, keys, min]","[add_prefix, add_suffix, append, astype, at_time]","[compare, groupby, info, plot, to_string]","[to_latex, to_string, add_prefix, add_suffix, ...","[to_latex, to_string, add_suffix, loc, filter]","[to_latex, to_string, add_suffix, rename_axis,...","[to_string, loc, rename_axis, to_latex, filter]",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,drop,drop unnecessary columns,drop,[],"[drop, drop_duplicates, dropna, cummin, ne]","[asof, astype, autocorr, compare, convert_dtypes]","[drop, resample, reset_index, add_prefix, add_...","[droplevel, dropna, drop, reset_index, truncate]","[dropna, all, truncate, droplevel, any]","[dropna, all, any, drop_duplicates, truncate]","[drop_duplicates, convert_dtypes, reset_index,...",0.0,1.0,0.0,1.0,0.5,0.0,0.0,0.0
4,len,get the length of the dataframe,len,[],"[rename, to_frame, head, asfreq, mean]","[add_prefix, add_suffix, asfreq, asof, astype]","[all, between_time, corr, cov, describe]","[tail, head, info, count, nunique]","[tail, squeeze, head, memory_usage, to_frame]","[tail, head, get, info, count]","[tail, head, item, backfill, pad]",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,query,show rows that meet a condition,,[],"[rpow, at_time, last, count, round]","[loc, isin, add, astype, filter]","[plot, compare, groupby, info, loc]","[filter, head, isin, all, any]","[notna, notnull, last_valid_index, isna, isnull]","[head, tail, loc, idxmin, to_string]","[loc, filter, equals, all, notna]",,,,,,,,
6,iloc,get a subset of the dataframe,iloc,[],"[rename, to_frame, head, rsub, asfreq]","[pct_change, align, any, append, apply]","[all, asof, between_time, describe, explode]","[filter, head, xs, truncate, loc]","[asof, head, get, tail, filter]","[get, filter, to_xarray, xs, head]","[filter, pad, first_valid_index, last_valid_in...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,dtypes,show the types of the columns,dtypes,[],"[asof, astype, cummin, value_counts, convert_d...","[append, to_csv, to_list, abs, agg]","[groupby, rank, align, astype, compare]","[to_latex, convert_dtypes, plot, to_string, info]","[to_latex, convert_dtypes, to_string, loc, all]","[to_string, to_latex, convert_dtypes, infer_ob...","[convert_dtypes, astype, infer_objects, loc, t...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,select dtypes,only show certain data types,,[],"[astype, tail, keys, convert_dtypes, align]","[append, dot, info, abs, add]","[groupby, rank, truncate, abs, add]","[convert_dtypes, to_string, infer_objects, to_...","[convert_dtypes, to_string, notna, notnull, is...","[all, describe, loc, any, filter]","[astype, to_list, loc, view, to_string]",,,,,,,,
9,insert,insert a column in the specified position,insert,[],"[cummin, sort_values, set_axis, to_string, kur...","[mad, any, sort_index, between_time, drop]","[ewm, loc, plot, take, to_latex]","[searchsorted, add_prefix, to_excel, add_suffi...","[searchsorted, to_latex, reindex_like, slice_s...","[to_latex, to_string, to_hdf, add_prefix, add_...","[iloc, searchsorted, update, fillna, to_sql]",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
