In [1]:
import pandas as pd
from collections import defaultdict
from fuzzywuzzy import fuzz  # Assuming fuzzywuzzy is installed

class OfferSearchTool:
    def __init__(self):
        self.merged_df = self.load_datasets()

    def load_datasets(self):
        # Load the datasets into pandas DataFrames
        # Make sure to replace 'offer_retailer_csv', 'brand_category_csv', and 'categories_csv'
        # with the actual paths to your CSV files
        offers_df = pd.read_csv('offer_retailer.csv')
        brands_df = pd.read_csv('brand_category.csv')
        categories_df = pd.read_csv('categories.csv')

        # Merge the datasets on the appropriate keys
        merged_df = pd.merge(offers_df, brands_df, on='BRAND', how='left')
        merged_df = pd.merge(merged_df, categories_df, left_on='BRAND_BELONGS_TO_CATEGORY', right_on='PRODUCT_CATEGORY', how='left')

        # If there are any missing values after the merge, you can fill them with a placeholder
        merged_df.fillna('Unknown', inplace=True)
        return merged_df

    def search(self, query, query_type):
        # Use defaultdict to handle potential duplicate offers with their highest score
        results = defaultdict(lambda: {'score': 0, 'row_data': None})

        # Iterate over each row in the merged DataFrame
        for _, row in self.merged_df.iterrows():
            # Initialize the similarity score
            similarity = 0

            # Calculate the similarity based on the query type
            if query_type == 'category':
                similarity = fuzz.partial_ratio(query.lower(), row['PRODUCT_CATEGORY'].lower())
            elif query_type == 'brand':
                similarity = fuzz.partial_ratio(query.lower(), row['BRAND'].lower())
            elif query_type == 'retailer':
                similarity = fuzz.partial_ratio(query.lower(), row['RETAILER'].lower())

            # Update results if this is the highest score for this offer so far
            if similarity > results[row['OFFER']]['score']:
                results[row['OFFER']]['score'] = similarity
                results[row['OFFER']]['row_data'] = row

        # If the query type is 'category', handle the comparison between the highest scores
        if query_type == 'category':
            max_product_cat_score = 0
            max_parent_cat_score = 0
            max_product_cat_row = None
            max_parent_cat_row = None

            for _, row in self.merged_df.iterrows():
                product_cat_similarity = fuzz.partial_ratio(query.lower(), row['PRODUCT_CATEGORY'].lower())
                if product_cat_similarity > max_product_cat_score:
                    max_product_cat_score = product_cat_similarity
                    max_product_cat_row = row

                parent_cat_similarity = fuzz.partial_ratio(query.lower(), row['IS_CHILD_CATEGORY_TO'].lower())
                if parent_cat_similarity > max_parent_cat_score:
                    max_parent_cat_score = parent_cat_similarity
                    max_parent_cat_row = row

            if max_product_cat_score >= max_parent_cat_score and max_product_cat_score >= 80:
                results[max_product_cat_row['OFFER']]['score'] = max_product_cat_score
                results[max_product_cat_row['OFFER']]['row_data'] = max_product_cat_row
            elif max_parent_cat_score >= 80:
                results[max_parent_cat_row['OFFER']]['score'] = max_parent_cat_score
                results[max_parent_cat_row['OFFER']]['row_data'] = max_parent_cat_row

        # Convert results to a DataFrame, ensuring that row data is not None
        results_df = pd.DataFrame([res['row_data'] for res in results.values() if res['row_data'] is not None])
        results_df['Search Score'] = [res['score'] for res in results.values() if res['row_data'] is not None]

        # Sort by 'Search Score' and return the results
        results_df.sort_values(by='Search Score', ascending=False, inplace=True)
        return results_df

    def run_cli(self):
        valid_query_types = ['brand', 'category', 'retailer']
        while True:
            query_type = input("Enter the query type ('brand', 'category', 'retailer' or 'exit' to stop): ").lower()
            if query_type == 'exit':
                break

            if query_type not in valid_query_types:
                print("Query should be one of the following: 'brand', 'category', 'retailer'.")
                continue

            query = input("Enter your search query: ").lower()
            if query == 'exit':
                break

            # Call the search method with both query and query_type
            results_df = self.search(query, query_type)

            if results_df.empty:
                print("No results found.\n\n")
                continue

            # Now we print the rows from results_df, not results
            for index, result in results_df.iterrows():
                print(f"Offer: {result['OFFER']}, Score: {result['Search Score']}")
            print("\n---\n\n")

# The main execution point of the script
if __name__ == "__main__":
    tool = OfferSearchTool()
    tool.run_cli()


Enter the query type ('brand', 'category', 'retailer' or 'exit' to stop): brand
Enter your search query: huggies
Offer: General Mills™ snacks, select brands, spend $10, Score: 67
Offer: Snuggle® Liquid Fabric Softener, at Walmart, Score: 62
Offer: Egglife Egg White Wraps at Aldi, Score: 57
Offer: Bubbies Pickles, Sauerkraut, OR Condiments at Whole Foods or Sprouts, Score: 57
Offer: Bubbies Pickles, Sauerkraut, and Condiments at Whole Foods or Sprouts, Score: 57
Offer: Welch's® Fruit Snacks, select varieties, 6 count+, Score: 43
Offer: Welch's® Fruit 'n Yogurt® Snacks, 18 count, at Walmart, Score: 43
Offer: Hidden Valley® Ranch Seasoning Shaker, Score: 43
Offer: Welch's® Juicefuls® Juicy Fruit Snacks, buy 2, Score: 43
Offer: Hidden Valley® Ranch Seasoning, select varieties, Score: 43
Offer: Hidden Valley® Ranch Salad Dressing OR Secret Sauce, select varieties, Score: 43
Offer: Welch's® Fruit Snacks, select varieties, 6 count+, buy 3, Score: 43
Offer: Spend $15 at Burger King, Score: 43


Enter the query type ('brand', 'category', 'retailer' or 'exit' to stop): category
Enter your search query: diapers
Offer: Dove Hand Wash, select varieties at Target, Score: 71
Offer: Dove Hand Wash, select varieties, buy 2 at Target, Score: 71
Offer: Degree for Men OR Women Dry Spray, Score: 71
Offer: Sign up for McAlister's Deli Rewards, tap for details, Score: 60
Offer: Brita® Pitcher OR Dispenser, Score: 60
Offer: Bai® Antioxidant, 6 pack, buy 2 at Walmart, Score: 60
Offer: Bai® Antioxidant, 6 pack, at Walmart, Score: 60
Offer: Brita® Standard OR Elite Filters, Score: 60
Offer: GOYA® Coconut Water, Score: 60
Offer: Brita® Pitcher AND Filter, Score: 60
Offer: CORE® Hydration, select varieties, buy 2 at Walmart, Score: 60
Offer: CORE® Hydration, select varieties, at Walmart, Score: 60
Offer: GOYA® Adobo Seasoning, 8 ounce+, Score: 60
Offer: Chosen Foods® Dressings, Score: 57
Offer: Hidden Valley® Ranch Night Seasoning Mix, 1 count, Score: 57
Offer: CESAR® Wet Dog Food Singles, Score:

Enter the query type ('brand', 'category', 'retailer' or 'exit' to stop): retailer
Enter your search query: target
Offer: Back to the Roots Grow Seed Starting Pots OR Germination Trays, at Walmart or Target, Score: 100
Offer: Back to the Roots Organic 3-In-1 Seed Starting Mix 12 quart, at Walmart or Target, Score: 100
Offer: Back to the Roots Organic Kits and Planters, at Target, Score: 100
Offer: Welch's® Juicefuls® Juicy Fruit Snacks, 14 count, at Target, Score: 100
Offer: Durex® Massage & Play 2 in 1 Pleasure Gel, select varieties, at Walmart or Target, Score: 100
Offer: L'Oréal Paris Excellence Hair Color at Target, Score: 100
Offer: Back to the Roots Raised Bed Gardening Kit with Soil, Seeds and Plant Food, at Target, Score: 100
Offer: Colgate® Toothpaste AND Colgate® Toothbrush, select varieties at Walmart or Target, Score: 100
Offer: L'Oréal Paris Hair color, select varieties, spend $25 at Target, Score: 100
Offer: Dove Hand Wash, select varieties at Target, Score: 100
Offer: Do

Enter the query type ('brand', 'category', 'retailer' or 'exit' to stop): exit
