## Data Collection

In [2]:
from serpapi import GoogleSearch
import itertools
import pickle
import os

# Function to perform search and get results
def get_shopping_results(shoe_type, color):
    query = f"{shoe_type} {color}"
    print(query)
    params = {
      "api_key": api_key,
      "engine": "google_shopping",
      "q": query,
      "google_domain": "google.com"
    }
    search = GoogleSearch(params)
    results = search.get_dict()
    raw_shopping_results = results.get('shopping_results', [])

    # Filter and restructure results
    shopping_results = []
    for result in raw_shopping_results:
        filtered_result = {
            'shoe_type': shoe_type,
            'color': color,
            'title':result.get('title', ''),
            'link': result.get('link', ''),
            'source': result.get('source', ''),
            'extracted_price': result.get('extracted_price', ''),
            'rating': result.get('rating', ''),
            'reviews': result.get('reviews', '')
        }
        shopping_results.append(filtered_result)

    return shopping_results

def save_to_cache(data, filename='shoes_data_cache.pkl'):
    with open(filename, 'wb') as file:
        pickle.dump(data, file)

    # Assuming all_results is your data
    save_to_cache(all_results)


def load_from_cache(filename='data_cache.pkl'):
    if os.path.exists(filename):
        with open(filename, 'rb') as file:
            return pickle.load(file)
    else:
        return None  # Handle case where cache does not exist

In [5]:
# Define your shoe types and colors
shoes_type = ["sneaker"]
colors = ["red", "black"]
combinations = itertools.product(shoes_type, colors)
# API Key
api_key = "APIKEY"  # Replace with your API key

In [6]:
# Load data
cached_data = load_from_cache()
if cached_data is not None:
    all_results = cached_data
else:
    # Aggregate results from all searches
    all_results = []
    for shoe, color in combinations:
        shopping_results = get_shopping_results(shoe, color)
        all_results.extend(shopping_results)
    pass

sneaker red
sneaker black


## Statistics of the Results

In [62]:
len(all_results)

120

## Building Tree

In [64]:
from IPython.display import HTML, display
import tabulate

# Define a function to categorize price
def categorize_price(price):
    try:
        price = float(price)
    except ValueError:
        return 'Unknown'  # or you could handle this differently
    if price <= 50:
        return 'Low'
    elif 51 <= price <= 100:
        return 'Medium'
    else:
        return 'High'

# Define a function to categorize rating
def categorize_rating(rating):
    try:
        rating = float(rating)
    except ValueError:
        return 'Unknown'  # or you could handle this differently
    
    if rating <= 1:
        return '1 Star'
    elif 1 < rating <= 2:
        return '2 Star'
    elif 2 < rating <= 3:
        return '3 Star'
    elif 3 < rating <= 4:
        return '4 Star'
    else:
        return '5 Star'

# Function to build the tree
def build_tree(data):
    tree = {}
    for item in data:
        shoe_type = item['shoe_type']
        color = item.get('color', 'Unknown')  # Assuming color is a field in your data
        brand = item['source']
        price_range = categorize_price(item['extracted_price'])
        rating = categorize_rating(item['rating'])

        tree.setdefault(shoe_type, {}) \
            .setdefault(color, {}) \
            .setdefault(brand, {}) \
            .setdefault(price_range, {}) \
            .setdefault(rating, []) \
            .append({'title': item['title'], 'link': item['link']})
    return tree

def display_tree(tree):
    rows = []
    for shoe_type, colors in tree.items():
        for color, brands in colors.items():
            for brand, prices in brands.items():
                for price, ratings in prices.items():
                    for rating, shoes in ratings.items():
                        for shoe in shoes:
                            link = f"<a href='{shoe['link']}' target='_blank'>{shoe['title']}</a>"
                            rows.append([shoe_type, color, brand, price, rating, link])
    display(HTML(tabulate.tabulate(rows, tablefmt='html', headers=["Shoe Type", "Color", "Brand", "Price Range", "Rating", "Link"])))

# Example usage
tree = build_tree(all_results)
#display_tree(tree)


In [63]:
def print_tree(tree, indent=''):
    for key, value in tree.items():
        print(indent + str(key))
        if isinstance(value, dict):
            print_tree(value, indent + '    ')
        else:
            for item in value:
                print(indent + '    ' + f"{item['title']}: {item['link']}")

# Example usage
tree = build_tree(all_results)
#print_tree(tree)

## Plot Tree Visually

In [59]:
import plotly.graph_objects as go

def create_interactive_tree(tree):
    edges = []
    nodes = []

    def add_node(node_text, parent_text, x, y, hovertext=''):
        nodes.append(dict(type="scatter", x=[x], y=[y], text=node_text, mode="markers+text", textposition="bottom center",
                          hoverinfo="text", hovertext=hovertext, 
                          marker=dict(size=2, color='blue'),  # Reduced node size
                          textfont=dict(size=8)))
        if parent_text is not None:
            edge_x = [x, parent_positions[parent_text][0]]
            edge_y = [y, parent_positions[parent_text][1]]
            edges.append(dict(type="scatter", x=edge_x, y=edge_y, mode="lines", line=dict(width=1, color='grey')))

    parent_positions = {}
    y = 0
    y_spacing = 0
    x_spacing = 1.5  # Adjust horizontal spacing
    for shoe_type, colors in tree.items():
        x = 0
        add_node(shoe_type, None, x, y)
        parent_positions[shoe_type] = (x, y)
        y -= 1

        for color, brands in colors.items():
            add_node(color, shoe_type, x + x_spacing, y + y_spacing)
            parent_positions[color] = (x + x_spacing, y + y_spacing)
            y -= 1

            for brand, prices in brands.items():
                add_node(brand, color, x + 2 * x_spacing, y)
                parent_positions[brand] = (x + 2 * x_spacing, y)
                y -= 1

                for price, ratings in prices.items():
                    add_node(price, brand, x + 3 * x_spacing, y)
                    parent_positions[price] = (x + 3 * x_spacing, y)
                    y -= 1

                    for rating, shoes in ratings.items():
                        add_node(rating, price, x + 4 * x_spacing, y)
                        parent_positions[rating] = (x + 4 * x_spacing, y)
                        y -= 1

                        for shoe in shoes:
                            # shoe_hovertext = f"{shoe['title']}<br><a href='{shoe['link']}'>{shoe['link']}</a>"
                            shoe_hovertext = f"{shoe['title']}"
                            add_node("Shoe", rating, x + 5 * x_spacing, y, hovertext=shoe_hovertext)
                            y -= 1

    # Increase plot size
    plot_width = 1200  # Width of the plot
    plot_height = 1500  # Height of the plot

    layout = dict(title="Interactive Tree of Shoes", showlegend=False, 
                  hovermode='closest',
                  xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                  yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                  width=plot_width, height=plot_height)  # Set the size

    fig = go.Figure(data=edges + nodes, layout=layout)
    return fig


## Tree Structure

In [60]:
tree = build_tree(all_results)
create_interactive_tree(tree)

## Code that travel down the tree based on user input

In [61]:
def user_select_options(options, prompt):
    print(prompt)
    for i, option in enumerate(options, 1):
        print(f"{i}. {option}")
    choice = int(input("Enter your choice (number): "))
    return options[choice - 1]

def find_shoes(tree):
    # Ask user for their choices
    shoe_type = user_select_options(list(tree.keys()), "Choose a shoe type:")
    color = user_select_options(list(tree[shoe_type].keys()), "Choose a color:")
    brand = user_select_options(list(tree[shoe_type][color].keys()), "Choose a brand:")
    price_range = user_select_options(list(tree[shoe_type][color][brand].keys()), "Choose a price range:")
    rating = user_select_options(list(tree[shoe_type][color][brand][price_range].keys()), "Choose a rating:")

    # Output resulting shoes and links
    shoes = tree[shoe_type][color][brand][price_range][rating]
    if shoes:
        print("\nFound shoes:")
        for shoe in shoes:
            print(f"Title: {shoe['title']}\nLink: {shoe['link']}\n")
    else:
        print("No shoes found with these criteria.")

# Example usage
tree = build_tree(all_results)  # Assuming all_results is your dataset
find_shoes(tree)


Choose a shoe type:
1. sneaker
Enter your choice (number): 1
Choose a color:
1. red
2. black
Enter your choice (number): 1
Choose a brand:
1. adidas
2. Nike
3. DICK'S Sporting Goods
4. Vans
5. Amazon.com - Seller
6. Nordstrom Rack
7. GOAT
8. Zappos.com
9. The Edit LDN
10. farfetch.com
11. Finish Line
12. PUMA
13. Walmart - ATshoes.SHOES
14. Steve Madden
15. ALDO Shoes
16. ShopWSS
17. Converse
18. Jeffrey Campbell
19. Kids Foot Locker
20. Hibbett Sports
21. Macy's
22. Shop Premium Outlets
23. shopAKIRA.com | AKIRA
24. Foot Locker
25. DSW
26. JD Sports
27. Champs Sports
28. Dillard's
29. shoebacca.com
30. Flight Club
31. eBay
32. Tillys
33. SHEIN
34. T.U.K.
35. StockX
36. Soxy
37. Rack Room Shoes
38. Walmart
39. Academy Sports + Outdoors
40. Kohl's
41. Handball-Store US
42. Poshmark
43. Sears - Built4Speed
44. shop.com
45. Rainbow Shops
46. Coach Outlet
Enter your choice (number): 33
Choose a price range:
1. Low
Enter your choice (number): 1
Choose a rating:
1. Unknown
Enter your choice 