In [1]:
import sys
import json
import pandas as pd
sys.path.append('../')
from pandaRec import Recipe, Recommender
from pandaRec.strategies import NameSearch, FuzzySearchName, FuzzySearchDescription, IndexSearch, SemanticSearch

In [2]:
df = pd.read_csv('test_data.csv', sep=';', names=["name", "query"])

In [3]:
with open('../recipes/recipes.json') as f:
    recipes = json.load(f)

recipes = [Recipe.from_dict(r) for r in recipes]

In [4]:
len(recipes)

185

In [5]:
recommenders = [None]*5

In [6]:
recommenders[0] = Recommender(recipes, df, NameSearch())
recommenders[1] = Recommender(recipes, df, FuzzySearchName())
recommenders[2] = Recommender(recipes, df, FuzzySearchDescription())
recommenders[3] = Recommender(recipes, df, IndexSearch(recipes))
recommenders[4] = Recommender(recipes, df, SemanticSearch(recipes, '../recipes/embeddings.pt'))

185


In [7]:
from pandaRec.search_index import save_search_index

In [8]:
len(recipes)

185

In [10]:
def get_recommender_result(recommender, query):
    recommender.set_search(query)
    recommender.recommend()
    results = recommender.show_results(5)
    results = [r.recipe.name for r in results]
    return results

for recommender in recommenders:
    print(recommender.strategy.__class__.__name__)
    df[recommender.strategy.__class__.__name__] = df['query'].apply(lambda x: get_recommender_result(recommender, x))


NameSearch
FuzzySearchName
FuzzySearchDescription
IndexSearch
SemanticSearch


In [11]:
df

Unnamed: 0,name,query,NameSearch,FuzzySearchName,FuzzySearchDescription,IndexSearch,SemanticSearch
0,read file,read a file to a pandas dataframe,[],"[to_clipboard, to_csv, to_dict, to_excel, to_f...","[backfill, shift, tz_localize, abs, agg]","[to_latex, describe, info, to_csv, to_excel]","[to_hdf, to_markdown, to_csv, to_clipboard, to..."
1,head,show the first 5 rows,[],"[first, combine_first, hist, rpow, to_list]","[compare, first, head, hist, info]","[groupby, argmax, argmin, asof, compare]","[head, tail, to_string, first, truncate]"
2,tail,show the last 5 rows,[],"[last, rpow, to_list, abs, pow]","[hist, isna, isnull, notna, notnull]","[asof, compare, first, groupby, head]","[tail, head, last, to_string, truncate]"
3,columns,show the column names,[],"[cov, rename, cummin, keys, min]","[add_prefix, add_suffix, append, astype, at_time]","[compare, groupby, info, plot, to_string]","[to_latex, to_string, add_prefix, add_suffix, ..."
4,drop,drop unnecessary columns,[],"[drop, drop_duplicates, dropna, cummin, ne]","[asof, astype, autocorr, compare, convert_dtypes]","[drop, resample, reset_index, add_prefix, add_...","[droplevel, dropna, drop, reset_index, truncate]"
5,len,get the length of the dataframe,[],"[rename, to_frame, head, asfreq, mean]","[add_prefix, add_suffix, asfreq, asof, astype]","[all, between_time, corr, cov, describe]","[tail, head, info, count, nunique]"
6,query,show rows that meet a condition,[],"[rpow, at_time, last, count, round]","[loc, isin, add, astype, filter]","[plot, compare, groupby, info, loc]","[filter, head, isin, all, any]"
7,iloc,get a subset of the dataframe,[],"[rename, to_frame, head, rsub, asfreq]","[pct_change, align, any, append, apply]","[all, asof, between_time, describe, explode]","[filter, head, xs, truncate, loc]"
8,dtypes,show the types of the columns,[],"[asof, astype, cummin, value_counts, convert_d...","[append, to_csv, to_list, abs, agg]","[groupby, rank, align, astype, compare]","[to_latex, convert_dtypes, plot, to_string, info]"
9,select dtypes,only show certain data types,[],"[astype, tail, keys, convert_dtypes, align]","[append, dot, info, abs, add]","[groupby, rank, truncate, abs, add]","[convert_dtypes, to_string, infer_objects, to_..."
