In [5]:
import pymongo
import json
import requests
import time

In [18]:
# Global variable for the database
db = None
CONNECTION_STRING = "mongodb://localhost:27017/"
COLLECTION_NAME = "TestingData"
FIELD_NAME = "ingredientList"
DATABASE_NAME = "GroceryDB"


In [20]:
def init_db(connection_string, database_name):
    """Initialize the MongoDB connection and set the global db variable."""
    global db
    try:
        client = pymongo.MongoClient(connection_string)
        db = client[database_name]
    except Exception as e:
        print("Error initializing database:", e)
        
init_db(CONNECTION_STRING,DATABASE_NAME)


In [21]:
def fetch_field_from_mongo(collection_name, field_name):
    """Fetch a specific field from a MongoDB collection and return as JSON."""
    global db
    if db is None:
        return json.dumps({"error": "Database not initialized. Run init_db cell first."})

    try:
        collection = db[collection_name]
        results = collection.find({}, {field_name: 1, "_id": 0})
        data = [doc.get(field_name, "Field not found") for doc in results]
        return data
    except Exception as e:
        print("The field fetch from DB failed!")
        return 

In [25]:
def count_response_codes(urls):
    
    """This function will give us the statistic on how many products are currently offered in the respective store."""
    
    status_counts = {"200": 0, "4xx": 0, "5xx": 0, "other": 0, "not_available_products": [] }

    for url in urls:
        try:
            print(url)
            response = requests.get(url, timeout=5)  # Timeout to prevent long waits
            status_code = response.status_code

            if status_code == 200:
                status_counts["200"] += 1
            elif 400 <= status_code < 500:
                status_counts["4xx"] += 1 
                status_counts["not_available_products"].append(url)
            elif 500 <= status_code < 600:
                status_counts["5xx"] += 1 
            else:
                status_counts["other"] += 1 

        except requests.RequestException as e:
            print(f"Error fetching {url}: {e}")
            status_counts["other"] += 1  # Count failed requests

    return status_counts

urls_in_db = fetch_field_from_mongo(COLLECTION_NAME,"url")
count_response_codes(urls_in_db[:5])


https://www.target.com/p/sunwarrior-organic-plant-protein-powder-chocolate-13-22oz/-/A-81995864
https://www.target.com/p/reese-hearts-of-palm-14-8oz/-/A-13022348
https://www.target.com/p/teddy-grahams-honey-graham-snacks-variety-pack-12oz-12ct/-/A-12945568
https://www.walmart.com/ip/396785612
https://products.wholefoodsmarket.com/product/bear-mango-apple-sour-yoyos-5-count-638d97


{'200': 3,
 '4xx': 2,
 '5xx': 0,
 'other': 0,
 'not_available_products': ['https://www.target.com/p/reese-hearts-of-palm-14-8oz/-/A-13022348',
  'https://www.target.com/p/teddy-grahams-honey-graham-snacks-variety-pack-12oz-12ct/-/A-12945568']}

In [37]:
def get_original_ids():
    """Fetch original IDs from MongoDB and limit to 5 values."""
    original_ids = fetch_field_from_mongo("TestingData", "original_ID")
    return original_ids[:5]

def build_url(product_id):
    """Construct the API URL using the product ID."""
    base_url = "https://redsky.target.com/redsky_aggregations/v1/web/pdp_client_v1?key=9f36aeafbe60771e321a7cc95a78140772ab3e96&tcin={}&is_bot=false&pricing_store_id=3324&visitor_id=019347C3E9160201A06B297D0CC373F6"
    return base_url.format(product_id)

def fetch_nutritional_data(url, headers):
    """Make a request to the API and fetch nutritional data."""
    try:
        response = requests.get(url, headers=headers, timeout=5)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Request failed for URL {url} with status code: {response.status_code}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error while making the request: {e}")
        return None

def extract_nutritional_values(data):
    
    """Extract and format nutritional values from the API response."""
    nutritional_data = {}
    nutrition_facts = data.get('data', {}).get('product', {}).get('item', {}).get('enrichment',{}).get('nutrition_facts',{})
    nutrients = nutrition_facts.get('value_prepared_list', [])[0].get('nutrients')
    
    print(nutrients)
    
    for nutrient in nutrients:
        name = nutrient.get('name')
        quantity = nutrient.get('quantity')
        unit = nutrient.get('unit_of_measurement')
        if name and quantity is not None:
            nutritional_data[name] = {"quantity": quantity, "unit_of_measurement": unit}

    return nutritional_data

def fetch_nutritional_values():
    
    """Main function to fetch and store nutritional values."""
    original_ids = get_original_ids()
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
        "Accept": "application/json",
        "X-Requested-With": "XMLHttpRequest",
        "Content-Type": "application/json"
    }
    nutritional_values = {}
    
    for product_id in original_ids:
        
        if not product_id.startswith("tg"):
            print(product_id)
            continue
        new_id = product_id[3:]
        if new_id.isdigit():
            url = build_url(new_id)
            print(f"Making request for URL: {url}")
            
            data = fetch_nutritional_data(url, headers)
            if data:
                nutritional_values[new_id] = extract_nutritional_values(data)
                print(nutritional_values[new_id])
            time.sleep(5)
            
    print(nutritional_values)
    
    return nutritional_values

fetch_nutritional_values()

Making request for URL: https://redsky.target.com/redsky_aggregations/v1/web/pdp_client_v1?key=9f36aeafbe60771e321a7cc95a78140772ab3e96&tcin=81995864&is_bot=false&pricing_store_id=3324&visitor_id=019347C3E9160201A06B297D0CC373F6
[{'name': 'Total Fat', 'percentage': 3.0, 'quantity': 2.0, 'unit_of_measurement': 'grm'}, {'name': 'Saturated Fat', 'percentage': 0.0, 'quantity': 0.0, 'unit_of_measurement': 'grm'}, {'name': 'Trans Fat', 'percentage': 0.0, 'quantity': 0.0, 'unit_of_measurement': 'grm'}, {'name': 'Cholesterol', 'percentage': 0.0, 'quantity': 0.0, 'unit_of_measurement': 'mg'}, {'name': 'Sodium', 'percentage': 10.0, 'quantity': 220.0, 'unit_of_measurement': 'mg'}, {'name': 'Total Carbohydrate', 'percentage': 1.0, 'quantity': 2.0, 'unit_of_measurement': 'grm'}, {'name': 'Dietary Fiber', 'percentage': 7.0, 'quantity': 2.0, 'unit_of_measurement': 'grm'}, {'name': 'Total Sugars', 'percentage': 0.0, 'quantity': 1.0, 'unit_of_measurement': 'grm'}, {'name': 'Added Sugars', 'percentage':

{'81995864': {'Total Fat': {'quantity': 2.0, 'unit_of_measurement': 'grm'},
  'Saturated Fat': {'quantity': 0.0, 'unit_of_measurement': 'grm'},
  'Trans Fat': {'quantity': 0.0, 'unit_of_measurement': 'grm'},
  'Cholesterol': {'quantity': 0.0, 'unit_of_measurement': 'mg'},
  'Sodium': {'quantity': 220.0, 'unit_of_measurement': 'mg'},
  'Total Carbohydrate': {'quantity': 2.0, 'unit_of_measurement': 'grm'},
  'Dietary Fiber': {'quantity': 2.0, 'unit_of_measurement': 'grm'},
  'Total Sugars': {'quantity': 1.0, 'unit_of_measurement': 'grm'},
  'Added Sugars': {'quantity': 0.0, 'unit_of_measurement': 'grm'},
  'Protein': {'quantity': 16.0, 'unit_of_measurement': 'grm'},
  'Vitamin D': {'quantity': 0.0, 'unit_of_measurement': 'mcg'},
  'Calcium': {'quantity': 30.0, 'unit_of_measurement': 'mg'},
  'Iron': {'quantity': 5.6, 'unit_of_measurement': 'mg'},
  'Potassium': {'quantity': 120.0, 'unit_of_measurement': 'mg'},
  'Calories': {'quantity': 90.0, 'unit_of_measurement': 'kcal'}}}