# IMPORTS

In [None]:
%run ipynb_setup.ipynb

In [None]:
%run class_Dataset.ipynb

In [None]:
%run class_TokenSearch.ipynb

# CLASS DEF

In [None]:
class ProductSearch():

    def __init__(
        self,
        ) -> None :

        self.dataset       = Dataset() # initialize Wish dataset
        self.token_search  = TokenSearch(dataset=self.dataset) # prep token searcher
        self.nearby_search = NeighbourSearch(dataset=self.dataset) # prep nearest neighbor searcher

    # this given top rating products precedence over units sold, allows good products to trend rather than old products to stay at the top
    def reorder_on_top_ratings(
        self,
        res : pd.DataFrame,
        ) -> pd.DataFrame :
        top_rating_pctage = (res['rating_five_count']+res['rating_four_count'])/res['rating_count']
        top_rating_pctage = top_rating_pctage.sort_values(ascending=False)
        return res.loc[top_rating_pctage.index,:]
 
    # find products that match purely on tokens + sort them on "top ratings" (allow new top rated products to trend rather than old products with any units_sold to dominate results)
    def token_matching_results(
        self,
        search_string  : str,
        exact_match    : bool = False,
        case_sensitive : bool = False,
        verbose        : int  = 0
        ) -> pd.DataFrame :
        # step 1 = apply text search to match tokens in search string to that of the product descriptor
        token_search_results = self.token_search.tokens_found_count(
            search_string  = search_string,
            exact_match    = exact_match,
            case_sensitive = case_sensitive,
            verbose        = verbose,
        )
    
        # step 2 = reorder results on "top ratings"
        token_search_results = self.reorder_on_top_ratings(token_search_results)
        
        # return
        return token_search_results
    
    # grab incrementally more nearest neighbours from some `source_results` list until we get more than we need
    def nearby_results(
        self,
        source_results         : pd.DataFrame,
        total_required_results : int,
        ):
        # initialize while loop
        res       = source_results.index
        n_nearest = 1
        
        # look outwards and find nearest results (including self) until we get more that what we need
        while len(res) < total_required_results:
            # get next nearest result from each source_result
            res = self.nearby_search.get_n_nearest_from_locs(
                n_nearest = n_nearest,
                locs      = source_results,
            )
            
            n_nearest = n_nearest + 1
        
        # have more than `actual_required_results`
        return res

        ########################################
        # figure out how many more results we need
        # - have len(`source_results`)
        # - need `n_results_needed` more
        ########################################
        
    # find products that match purely on tokens + sort them on "top ratings" (allow new top rated products to trend rather than old products with any units_sold to dominate results)
    def query(
        self,
        search_string       : str,
        exact_match         : bool  = False,
        case_sensitive      : bool  = False,
        required_results    : int   = 20, # token matching results at the top (with )
        extra_results_ratio : float = 1.5 # ratio vs `required_results` of additional results needed for image search to to help refine / acquire
        show_top_n          : int   = 5,
        chatty              : bool  = True,
        ) -> pd.DataFrame :
        ###########################################################
        # step 1 = get token_matching_results
        ###########################################################
        token_results = self.token_matching_results(
            search_string  = search_string,
            exact_match    = exact_match,
            case_sensitive = case_sensitive,
            verbose        = verbose,
        )
        
        ###########################################################
        # step 2 = if token_matching_results not enough, grab "nearby products" and use ImageSearch to help weed through / refine
        ###########################################################
        if len(token_results) < required_results:
            if requested_total_results < 1:
                raise ValueError('required_total_results needs to be >= 1')
                
            # get more results with nearest neighbour
            res = self.nearby_results(
                source_results         = token_results, # use token matching results as source to search from
                total_required_results = int(required_results * extra_results_ratio),
            )
            
            # get unique nearby results
            unique_nearby_results = res[res!=0] # ignore home truths
            unique_nearby_results = unique_nearby_results.index.unique() # throw away duplicates if they exist
            
            # order results on image similarity
            ###########################################################
            # step 2 = if token_matching_results not enough, grab "nearby products" and use ImageSearch to help weed through / refine
            ###########################################################
    
        
        ###########################################################
        # plot_top_n results
        ###########################################################
        self.dataset.show_top_n(token_search_results,n=show_top_n)

        ###########################################################
        # results chat
        ###########################################################
        res = token_search_results
        if chatty:
            if len(res)==0:    print(f'results: I got nothing! T⌓T') # unhappy
            elif len(res)==1:  print(f'results: only 1 hit ￣ω￣, I hope it\'s what you wanted!') # unsure
            elif len(res)<=5:  print(f'results: {len(res)} results, see anything you like?') # tight search
            elif len(res)<=10: print(f'results: {len(res)} results found') # normal

        return token_search_results

In [None]:
res=pd.Series([1,2,3,4,5],index=[1,1,1,2,3])

In [None]:
res.index.unique()

In [None]:
ps=ProductSearch()
res=ps.query('harajuku')
#ps.query('harajuku beach')
#ps.query('harajuku pop')
#ps.query('dress')
#ps.query('dress beach')
#ps.query('dress beach flower')
#ps.query('dress beach flower sleeve')
#ps.query('dress beach flower blue')

In [None]:
ps.reorder_on_top_ratings(res)

In [None]:
res.loc[:,['units_sold','rating']+[x for x in res.columns if x not in ['units_sold','rating']]]

In [None]:
(
    (res['rating_five_count']+res['rating_four_count'])/res['rating_count']
).sort_values(ascending=False)

In [None]:
res[['units_sold','rating']]

In [None]:
(lambda x:(x['rating_five_count']+x['rating_four_count'])/x['rating_count'])(ps.dataset.df)

In [None]:
res.sort_values(['units_sold','rating'],ascending=[False,False])