In [1]:
import csv
from collections import defaultdict
from fuzzywuzzy import fuzz

# Constants representing the paths to the datasets
offer_retailer = 'offer_retailer.csv'
brand_category= 'brand_category.csv'
categories = 'categories.csv'


In [2]:
class OfferSearchTool:
    def __init__(self):
        self.offers = []
        self.brands = {}
        self.categories = {}
        self.load_datasets()

    def load_datasets(self):
        # Load the offer dataset
        with open(offer_retailer, 'r') as file:
            reader = csv.DictReader(file)
            for row in reader:
                self.offers.append(row)

        # Load the brand and category datasets
        with open(brand_category, 'r') as file:
            reader = csv.DictReader(file)
            for row in reader:
                self.brands[row['BRAND']] = row['BRAND_BELONGS_TO_CATEGORY']

        with open(categories, 'r') as file:
            reader = csv.DictReader(file)
            for row in reader:
                self.categories[row['PRODUCT_CATEGORY']] = row['IS_CHILD_CATEGORY_TO']


    def search(self, query):
    # Perform search using fuzzy matching
        results = defaultdict(float)
        for offer in self.offers:
        # Determine the type of query and adjust the search accordingly
            if query.lower() in self.categories:
            # Query is a category
                category_match_score = self.score_similarity(query, self.brands.get(offer['BRAND'], ''))
                results[offer['OFFER']] += category_match_score
            elif query.lower() in self.brands:
            # Query is a brand
                brand_match_score = self.score_similarity(query, offer['BRAND'])
                results[offer['OFFER']] += brand_match_score
            else:
            # Query is potentially a retailer
                retailer_match_score = self.score_similarity(query, offer['RETAILER'])
                results[offer['OFFER']] += retailer_match_score

        sorted_results = sorted(results.items(), key=lambda item: item[1], reverse=True)
        return sorted_results

    def score_similarity(self, query, data_point):
        # Use fuzzy string matching for the similarity score
        return fuzz.partial_ratio(query.lower(), data_point.lower())

    def run_cli(self):
        # Command-line interface for the search tool
        while True:
            query = input("Enter your search query (type 'exit' to stop): ")
            if query.lower() == 'exit':
                break
            results = self.search(query)
            if not results:
                print("No results found.")
                continue
            for offer, score in results:
                print(f"Offer: {offer}, Score: {score}")


if __name__ == "__main__":
    tool = OfferSearchTool()
    tool.run_cli()


Enter your search query (type 'exit' to stop): diapering
Offer: Spend $210 at Randalls, Score: 58.0
Offer: Any Randalls receipt, Score: 58.0
Offer: Shop 2 times at Randalls, Score: 58.0
Offer: Reese's Peanut Butter Bar, 6 count, at GIANT OR MARTIN’S, Score: 57.0
Offer: Spend $15 at Burger King, Score: 56.0
Offer: Reese's Chocolate Cones, 8 count at GIANT OR MARTIN’S, Score: 55.0
Offer: GATORADE® Fast Twitch®, 12-ounce single serve, buy 2 at Kroger, Score: 55.0
Offer: Butterball, select varieties, spend $10 at Mariano's, Score: 53.0
Offer: Shop 2 times at ACME, Score: 50.0
Offer: Spend $90 at ACME, Score: 50.0
Offer: Spend $250 at ACME, Score: 50.0
Offer: Blue Apron Meal Kits, Score: 50.0
Offer: Any ACME receipt, Score: 50.0
Offer: Spend $130 at ACME, Score: 50.0
Offer: Dr Pepper®, Regular OR Zero Sugar Strawberries and Cream 12 pack, at select retailers, Score: 44.0
Offer: Gorton's, at select retailers, Score: 44.0
Offer: Any Albertsons receipt, Score: 44.0
Offer: Shop 2 times at Alber

Enter your search query (type 'exit' to stop): exit


In [3]:
import csv
from collections import defaultdict
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

# Constants representing the paths to the datasets
offer_retailer = 'offer_retailer.csv'
brand_category = 'brand_category.csv'
categories = 'categories.csv'

class OfferSearchTool:
    def __init__(self):
        self.offers = []
        self.brands = {}
        self.categories = {}
        self.child_to_parent_category = {}
        self.load_datasets()

    def load_datasets(self):
        # Load the offer dataset
        with open(offer_retailer, 'r') as file:
            reader = csv.DictReader(file)
            for row in reader:
                self.offers.append(row)

        # Load the brand and category datasets
        with open(brand_category, 'r') as file:
            reader = csv.DictReader(file)
            for row in reader:
                self.brands[row['BRAND']] = row['BRAND_BELONGS_TO_CATEGORY']

        # Load the categories and create a mapping of child to parent categories
        with open(categories, 'r') as file:
            reader = csv.DictReader(file)
            for row in reader:
                self.categories[row['PRODUCT_CATEGORY']] = row['CATEGORY_ID']
                if row['IS_CHILD_CATEGORY_TO']:
                    self.child_to_parent_category[row['PRODUCT_CATEGORY']] = row['IS_CHILD_CATEGORY_TO']

    def find_near_matches(self, query, choices):
        # Use fuzzy matching to find close matches within the choices
        matches = process.extract(query, choices, limit=10, scorer=fuzz.token_set_ratio)
        # Filter matches with a score above a threshold (e.g., 80)
        near_matches = [match[0] for match in matches if match[1] > 80]
        return near_matches

    def search(self, query):
        results = defaultdict(float)
        query_lower = query.lower()

        # Find near matches for categories
        category_matches = self.find_near_matches(query_lower, self.categories.keys())

        # Iterate through offers to find matching categories, brands, or retailers
        for offer in self.offers:
            offer_brand = offer['BRAND']
            offer_retailer = offer['RETAILER']
            brand_category = self.brands.get(offer_brand, "")

            # Check if the brand's category matches the query
            for cat in category_matches:
                if brand_category == cat:
                    results[offer['OFFER']] += 100  # Direct category match
                # Check if there's a parent category match
                elif self.child_to_parent_category.get(cat) == brand_category:
                    results[offer['OFFER']] += 75  # Parent category match

            # Brand and retailer matching using fuzzy matching
            brand_match_score = fuzz.partial_ratio(query_lower, offer_brand.lower())
            results[offer['OFFER']] += brand_match_score

            retailer_match_score = fuzz.partial_ratio(query_lower, offer_retailer.lower())
            results[offer['OFFER']] += retailer_match_score

        sorted_results = sorted(results.items(), key=lambda item: item[1], reverse=True)
        return sorted_results

    def run_cli(self):
        while True:
            query = input("Enter your search query (type 'exit' to stop): ")
            if query.lower() == 'exit':
                break
            results = self.search(query)
            if not results:
                print("No results found.")
                continue
            for offer, score in results:
                print(f"Offer: {offer}, Score: {score}")

if __name__ == "__main__":
    tool = OfferSearchTool()
    tool.run_cli()


Enter your search query (type 'exit' to stop): diapering
Offer: Spend $15 at Burger King, Score: 112.0
Offer: Spend $210 at Randalls, Score: 108.0
Offer: Any Randalls receipt, Score: 108.0
Offer: Shop 2 times at Randalls, Score: 108.0
Offer: Reese's Chocolate Cones, 8 count at GIANT OR MARTIN’S, Score: 105.0
Offer: GATORADE® Fast Twitch®, 12-ounce single serve, buy 2 at Kroger, Score: 105.0
Offer: Gorton's, at select retailers, Score: 102.0
Offer: Reese's Peanut Butter Bar, 6 count, at GIANT OR MARTIN’S, Score: 101.0
Offer: Shop 2 times at ACME, Score: 100.0
Offer: Spend $90 at ACME, Score: 100.0
Offer: Spend $250 at ACME, Score: 100.0
Offer: Blue Apron Meal Kits, Score: 100.0
Offer: Any ACME receipt, Score: 100.0
Offer: Spend $130 at ACME, Score: 100.0
Offer: Gorton's at select retailers, Score: 98.0
Offer: Persil® ProClean®, select varieties, at Walmart, Score: 96.0
Offer: PepsiCo® Beverage, 7.5-ounce 10 pack, select varieties, at Amazon Storefront*, Score: 93.0
Offer: PepsiCo® Varie

Enter your search query (type 'exit' to stop): Huggies
Offer: Reese's Peanut Butter Bar, 6 count, at GIANT OR MARTIN’S, Score: 130.0
Offer: Gorton's, at select retailers, Score: 101.0
Offer: Reese's Chocolate Cones, 8 count at GIANT OR MARTIN’S, Score: 87.0
Offer: Sign up for The Club Card or The Club+ Card full-priced membership* (New Members Only), Score: 86.0
Offer: Bubbies Pickles, Sauerkraut, and Condiments at Whole Foods or Sprouts, Score: 86.0
Offer: Sign up for McAlister's Deli Rewards, tap for details, Score: 86.0
Offer: Bubbies Pickles, Sauerkraut, OR Condiments at Whole Foods or Sprouts, Score: 86.0
Offer: Spend $15 at Burger King, Score: 86.0
Offer: Egglife Egg White Wraps at Aldi, Score: 82.0
Offer: Welch's® Juicefuls® Juicy Fruit Snacks, 14 count, at Target, Score: 76.0
Offer: Colgate® Toothpaste AND Colgate® Toothbrush, select varieties at Walmart or Target, Score: 76.0
Offer: Kradle, select varieties, at Pet Supplies Plus, Score: 74.0
Offer: Gorton's at select retailers

Enter your search query (type 'exit' to stop): Target
Offer: Arber, at Target, Score: 160.0
Offer: Colgate® Toothpaste AND Colgate® Toothbrush, select varieties at Walmart or Target, Score: 150.0
Offer: Durex® Massage & Play 2 in 1 Pleasure Gel, select varieties, at Walmart or Target, Score: 140.0
Offer: Spend $25 at Fresh Thyme Market, Score: 134.0
Offer: Spend $90 at Star Market, Score: 134.0
Offer: Shop 2 times at Star Market, Score: 134.0
Offer: GATORADE® Fast Twitch®, 12-ounce single serve, buy 2 at Kroger, Score: 134.0
Offer: Spend $120 at Star Market, Score: 134.0
Offer: Spend $220 at Star Market, Score: 134.0
Offer: Spend $50 at Fresh Thyme Market, Score: 134.0
Offer: Any Star Market receipt, Score: 134.0
Offer: Beyond Steak™ Plant-Based seared tips, 10 ounce at Target, Score: 133.0
Offer: L'Oréal Paris Hair color, select varieties, spend $19 at Target, Score: 133.0
Offer: L'Oréal Paris Makeup, spend $35 at Target, Score: 133.0
Offer: Back to the Roots Grow Seed Starting Pots O

Enter your search query (type 'exit' to stop): exit


In [None]:
import csv
from collections import defaultdict
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

# Constants representing the paths to the datasets
offer_retailer = 'offer_retailer.csv'
brand_category = 'brand_category.csv'
categories_csv = 'categories.csv'

class OfferSearchTool:
    def __init__(self):
        self.offers = []
        self.brands_to_categories = {}
        self.categories = {}
        self.child_to_parent_category = {}
        self.parent_to_child_categories = defaultdict(list)
        self.load_datasets()

    def load_datasets(self):
        # Load the offer dataset
        with open(offer_retailer, 'r') as file:
            reader = csv.DictReader(file)
            for row in reader:
                self.offers.append(row)

        # Load the brand and category datasets
        with open(brand_category, 'r') as file:
            reader = csv.DictReader(file)
            for row in reader:
                self.brands_to_categories[row['BRAND']] = row['BRAND_BELONGS_TO_CATEGORY']

        # Load the categories and create mappings of child to parent categories and vice versa
        with open(categories_csv, 'r') as file:
            reader = csv.DictReader(file)
            for row in reader:
                self.categories[row['PRODUCT_CATEGORY']] = row['CATEGORY_ID']
                if row['IS_CHILD_CATEGORY_TO']:
                    self.child_to_parent_category[row['CATEGORY_ID']] = row['IS_CHILD_CATEGORY_TO']
                    self.parent_to_child_categories[row['IS_CHILD_CATEGORY_TO']].append(row['CATEGORY_ID'])

    def find_near_matches(self, query, choices):
        # Use fuzzy matching to find close matches within the choices
        matches = process.extract(query, choices, limit=10, scorer=fuzz.token_set_ratio)
        # Filter matches with a score above a threshold (e.g., 80)
        # Adjust the score based on whether it's a direct match or a child category match
        near_matches = []
        for choice, score in matches:
            if score > 80:
                # Check if the match is a child category
                if choice in self.parent_to_child_categories:
                    # It's a parent category, so we keep the score as is
                    near_matches.append((choice, score * 1.0))
                elif choice in self.child_to_parent_category:
                    # It's a child category, so we reduce the score
                    near_matches.append((choice, score * 0.75))
                else:
                    # It's a direct match, not a child category
                    near_matches.append((choice, score * 1.0))
        return near_matches

    def search(self, query, query_type):
        results = defaultdict(float)
        query_lower = query.lower()

        if query_type == 'category':
            # Find near matches for categories
            category_matches = self.find_near_matches(query_lower, self.categories.keys())
            # Check if the brand's category matches the query
            for cat, score in category_matches:
                for offer in self.offers:
                    if self.brands_to_categories.get(offer['BRAND'], '') == cat:
                        results[offer['OFFER']] += score  # Adjusted score for category match

        elif query_type == 'brand':
            # Directly compare query with brand names
            for offer in self.offers:
                if fuzz.token_set_ratio(query_lower, offer['BRAND'].lower()) > 80:
                    results[offer['OFFER']] += 100  # Direct brand match

        elif query_type == 'retailer':
            # Directly compare query with retailer names
            for offer in self.offers:              
                if fuzz.token_set_ratio(query_lower, (offer['RETAILER'] or '').lower()) > 80:
                    results[offer['OFFER']] += 100  # Direct retailer match

        # Sort results by score in descending order
            sorted_results = sorted(results.items(), key=lambda item: item[1], reverse=True)
            return sorted_results

    def run_cli(self):
        while True:
            query_type = input("Enter the query type (brand/category/retailer or 'exit' to stop): ").lower()
            if query_type == 'exit':
                break

            if query_type not in ['brand', 'category', 'retailer']:
                print("Invalid query type. Please choose 'brand', 'category', or 'retailer'.")
                continue

            query = input("Enter your search query: ")
            results = self.search(query, query_type)
            if not results:
                print("No results found.")
                continue

            for offer, score in results:
                print(f"Offer: {offer}, Score: {score}")

if __name__ == "__main__":
    tool = OfferSearchTool()
    tool.run_cli()

Enter the query type (brand/category/retailer or 'exit' to stop): brand
Enter your search query: Huggies
No results found.
Enter the query type (brand/category/retailer or 'exit' to stop): retailer
Enter your search query: Target
Offer: Beyond Steak™ Plant-Based seared tips, 10 ounce at Target, Score: 100.0
Offer: L'Oréal Paris Hair color, select varieties, spend $19 at Target, Score: 100.0
Offer: L'Oréal Paris Makeup, spend $35 at Target, Score: 100.0
Offer: Back to the Roots Grow Seed Starting Pots OR Germination Trays, at Walmart or Target, Score: 100.0
Offer: Dove Hand Wash, select varieties, buy 2 at Target, Score: 100.0
Offer: Durex® Massage & Play 2 in 1 Pleasure Gel, select varieties, at Walmart or Target, Score: 100.0
Offer: Welch's® Juicefuls® Juicy Fruit Snacks, 14 count, at Target, Score: 100.0
Offer: L'Oréal Paris Excellence Hair Color at Target, Score: 100.0
Offer: Back to the Roots Raised Bed Gardening Kit with Soil, Seeds and Plant Food, at Target, Score: 100.0
Offer: C

In [5]:
def calculate_precision_at_k(test_queries, expected_results, k):
    precisions = []
    
    for query, expected in zip(test_queries, expected_results):
        search_results = tool.search(query)[:k]  # Get the top-k results
        relevant_results = [result for result, _ in search_results if result in expected]
        precision = len(relevant_results) / k
        precisions.append(precision)
    
    average_precision = sum(precisions) / len(precisions)
    return average_precision

# Example usage:
test_queries = ['', 'CVS']
expected_results = [[], ['Spend $10 at CVS', 'Spend $30 at CVS']]
precision_at_3 = calculate_precision_at_k(test_queries, expected_results, 3)
print(f"Average Precision at 3: {precision_at_3}")


Average Precision at 3: 0.0
