# Walmart API overview
##### Search API used for look ups. We will use this first
##### Taxonomy API used for classifications.  We want to find a way to count classifications

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import urllib
import json
import sys
import pprint

# We are using 2 api Keys for redundancy and because
# there is a limit to the number of calls
api_key = "4tsbjxvbrwnpcjdsh277csqu"
api_alt = "anrtcd5dheh9758z5wgdy677"

# Base url for all API look ups
base_url = "http://api.walmartlabs.com/v1/" 

# Variables to specify the API
search = "search?"
paginated_products = "paginated/items?"

# Base url for search
search_url = base_url + search
paginated_url = base_url + paginated_products



# Search API

In [44]:
# Load search criteria for Walmart Search API.
# reference URL: https://developer.walmartlabs.com/docs/read/Search_API

format = "json"
categoryId = "3944"
query = "laptop"
numItems = 25 # max 25. 10 is default
start = 1
sort = "price"
order = "desc"
facet = "on" # boolean
min = str(0) # lower end of range for search within price range
max = str(200) # higher end of range price for search with price range
facet_range = (f"price:[{min}%20TO%20{max}]") # note the format [num1:num2] does not work. Use %20TO%20 for ":"



# NOTE: we are using 2 methods to create urls. Because the params argument does not format the output
# in the manner needed to get the API to work.  This is used only for price range lookups

# manual url. Used for price range lookup only
manual_url = (f"{search_url}apiKey={api_alt}&format={format}&categoryId={categoryId}&query={query}&numItems={numItems}&facet={facet}&facet.range={facet_range}")

print(manual_url)

# API parameters for other lookups
params = {
    "apiKey": api_alt,
    #"start": start, # note keep turned off.  We can't get this to work yet
    "format": "json", # json|xml
    "categoryId": categoryId,
    "query": query,
    "numItems": numItems,
    #"sort": sort, #allowed sort types are [relevance, price, title, bestseller, customerRating, new]
    #"order": order,  #allowed values are [asc, desc]
    #"facet":facet, # note keep turned off.  We can't get this to work yet
    #"facet.range":facet_range    # note keep turned off.  We can't get this to work yet
}

http://api.walmartlabs.com/v1/search?apiKey=anrtcd5dheh9758z5wgdy677&format=json&categoryId=3944&query=laptop&numItems=25&facet=on&facet.range=price:[0%20TO%20200]


In [None]:
# We are going to run through this one time to make sure we are pulling data correctly
# There is a limitation of 25 results per search.  
# Bellow we will create a loop to pull multiple pages
# We will try two methods: price range and paginated results (has a limit of 1000 items)
# Remember we are limited to 5000 requests per day

walmart_data = requests.get(search_url) # or requests.get(search_url, params=params)
walmart_url = walmart_data.url
walmart_data = walmart_data.json()
print(walmart_url)

numItems = walmart_data["numItems"]

#create dictionary to store walmart_data
result = {}
result["totalResults"] = walmart_data["totalResults"]
result["start"] = walmart_data["start"]
result["numItems"] = walmart_data["numItems"]
result["items"] = walmart_data["items"]



In [58]:
# Loop by pricing parameters to pull out entire data set for a product category
# to save time we are going to hard code some numbers then go back later if we have time

# Number of items found in our search
# there are 20,747 total results for laptop in electronics
# step 0 there are 3089 results in the price range $0-$200
# step 1 there are 3015 results in the price range $201-$400
# step 2 there are 2053 results in the price range $401-$600
# step 3 there are 2995 results in the price range $601-$1000
# step 4 there are 2021 results in the price range $1001-$1400
# step 5 there are 2726 results in the price range $1401-$1800
# step 6 there are 1873 results in the price range $1801-$2200
# step 7 there are 1011 results in the price range $2201-$2600
# step 8 there are 863 results in the price range $2601-3000
# step 9 there are 1096 results in the price range above $3000

category_total = 20747 #hard coded from broad search
step0 = 200*25//3089
step1 = 200*25//3015
step2 = 200*25//2053
step3 = 400*25//2995
step4 = 400*25//2021
step5 = 400*25//2726
step6 = 400*25//1873
step7 = 400*25//1011
step8 = 400*25//863

# create a function to perform the price search

def price_data(lowRange, highRange):
    facet_range = (f"price:[{lowRange}%20TO%20{highRange}]")
    price_url = (f"{search_url}apiKey={api_alt}&format={format}&categoryId={categoryId}&query={query}&numItems={numItems}&facet={facet}&facet.range={facet_range}")
    walmart_data2 = requests.get(price_url)
    walmart_data2 = walmart_data2.json()
    
    try:       
        a = []
        if not os.path.isfile('Walmart_datasets/walmart_laptops.json'):
            a.append(walmart_data2['items'])
            with open('Walmart_datasets/walmart_laptops.json', mode='w') as f:
                f.write(json.dumps(walmart_data2['items'], indent=4))
        else:
            with open('Walmart_datasets/walmart_laptops.json') as feedsjson:
                feeds = json.load(feedsjson)

            feeds.append(walmart_data2['items'])
            with open('Walmart_datasets/walmart_laptops.json', mode='w') as f:
                f.write(json.dumps(feeds, indent=4))

    except:
        print ("Skipped line")
                


In [67]:
# Use for loop to go through prices in the steps defined

searchCount = 0
itemCount = 0
low = 0
high = 0

for itemCount in range (0,21000):
    if high < 200:
        high = low + step0
        price_data(low,high)
        low = high
        itemCount = itemCount + 25
        searchCount += 1
        print(f"search / item count = {searchCount} / {itemCount}")
    elif high < 400:
        high = low + step1
        price_data(low,high)
        low = high
        itemCount = itemCount + 25
        searchCount += 1
        print(f"search / item count = {searchCount} / {itemCount}")
    elif high < 600:
        high = low + step2
        price_data(low,high)
        low = high
        itemCount = itemCount + 25
        searchCount += 1
        print(f"search / item count = {searchCount} / {itemCount}")
    elif high < 1000:
        high = low + step3
        price_data(low,high)
        low = high
        itemCount = itemCount + 25
        searchCount += 1
        print(f"search / item count = {searchCount} / {itemCount}")            
    elif high < 1400:
        high = low + step4
        price_data(low,high)
        low = high
        itemCount = itemCount + 25
        searchCount += 1
        print(f"search / item count = {searchCount} / {itemCount}")    
    elif high < 1800:
        high = low + step5
        price_data(low,high)
        low = high
        itemCount = itemCount + 25
        searchCount += 1
        print(f"search / item count = {searchCount} / {itemCount}")
    elif high < 2200:
        high = low + step6
        price_data(low,high)
        low = high
        itemCount = itemCount + 25
        searchCount += 1
        print(f"search / item count = {searchCount} / {itemCount}")        
    elif high < 2600:
        high = low + step7
        price_data(low,high)
        low = high
        itemCount = itemCount + 25
        searchCount += 1
        print(f"search / item count = {searchCount} / {itemCount}")
    elif high < 3000:
        high = low + step8
        price_data(low,high)
        low = high
        itemCount = itemCount + 25
        searchCount += 1
        print(f"search / item count = {searchCount} / {itemCount}")
    elif high < 8000:
        high = low + step9
        price_data(low,high)
        low = high
        itemCount = itemCount + 25
        searchCount += 1
        print(f"search / item count = {searchCount} / {itemCount}")
    else:
        print ('finished')
        print (f'item count = {itemCount}')
        print (f'final search count = {searchCount}')
        print (f'final high price is ${high}')
        break
               
      



search / item count = 1 / 25
search / item count = 2 / 26
Skipped line
search / item count = 3 / 27
Skipped line
search / item count = 4 / 28
search / item count = 5 / 29
search / item count = 6 / 30
search / item count = 7 / 31
search / item count = 8 / 32
search / item count = 9 / 33
search / item count = 10 / 34
Skipped line
search / item count = 11 / 35
Skipped line
search / item count = 12 / 36
search / item count = 13 / 37
search / item count = 14 / 38
search / item count = 15 / 39
search / item count = 16 / 40
search / item count = 17 / 41
search / item count = 18 / 42
search / item count = 19 / 43
search / item count = 20 / 44
search / item count = 21 / 45
search / item count = 22 / 46
search / item count = 23 / 47
search / item count = 24 / 48
search / item count = 25 / 49
search / item count = 26 / 50
search / item count = 27 / 51
search / item count = 28 / 52
search / item count = 29 / 53
search / item count = 30 / 54
search / item count = 31 / 55
Skipped line
search / item 

search / item count = 253 / 277
search / item count = 254 / 278
search / item count = 255 / 279
search / item count = 256 / 280
search / item count = 257 / 281
search / item count = 258 / 282
search / item count = 259 / 283
search / item count = 260 / 284
search / item count = 261 / 285
search / item count = 262 / 286
search / item count = 263 / 287
search / item count = 264 / 288
search / item count = 265 / 289
search / item count = 266 / 290
search / item count = 267 / 291
search / item count = 268 / 292
search / item count = 269 / 293
search / item count = 270 / 294
search / item count = 271 / 295
search / item count = 272 / 296
search / item count = 273 / 297
search / item count = 274 / 298
search / item count = 275 / 299
search / item count = 276 / 300
search / item count = 277 / 301
search / item count = 278 / 302
search / item count = 279 / 303
search / item count = 280 / 304
search / item count = 281 / 305
search / item count = 282 / 306
search / item count = 283 / 307
search /

search / item count = 509 / 533
search / item count = 510 / 534
search / item count = 511 / 535
search / item count = 512 / 536
search / item count = 513 / 537
search / item count = 514 / 538
search / item count = 515 / 539
search / item count = 516 / 540
search / item count = 517 / 541
search / item count = 518 / 542
search / item count = 519 / 543
search / item count = 520 / 544
search / item count = 521 / 545
search / item count = 522 / 546
search / item count = 523 / 547
search / item count = 524 / 548
search / item count = 525 / 549
search / item count = 526 / 550
search / item count = 527 / 551
search / item count = 528 / 552
search / item count = 529 / 553
search / item count = 530 / 554
search / item count = 531 / 555
search / item count = 532 / 556
search / item count = 533 / 557
search / item count = 534 / 558
search / item count = 535 / 559
search / item count = 536 / 560
search / item count = 537 / 561
search / item count = 538 / 562
search / item count = 539 / 563
search /

JSONDecodeError: Expecting value: line 3 column 1 (char 2)

In [68]:
from pprint import pprint
import os
filepath = os.path.join('Walmart_datasets/walmart_laptops.json')
with open(filepath) as jsonfile:
    json_data = json.load(jsonfile)
    print (len(json_data))
    for i in range(len(json_data)):
        try:
            print (json_data['itemId'])
        except:
            print(f"skipped entry {i}")
    


518
skipped entry 0
skipped entry 1
skipped entry 2
skipped entry 3
skipped entry 4
skipped entry 5
skipped entry 6
skipped entry 7
skipped entry 8
skipped entry 9
skipped entry 10
skipped entry 11
skipped entry 12
skipped entry 13
skipped entry 14
skipped entry 15
skipped entry 16
skipped entry 17
skipped entry 18
skipped entry 19
skipped entry 20
skipped entry 21
skipped entry 22
skipped entry 23
skipped entry 24
skipped entry 25
skipped entry 26
skipped entry 27
skipped entry 28
skipped entry 29
skipped entry 30
skipped entry 31
skipped entry 32
skipped entry 33
skipped entry 34
skipped entry 35
skipped entry 36
skipped entry 37
skipped entry 38
skipped entry 39
skipped entry 40
skipped entry 41
skipped entry 42
skipped entry 43
skipped entry 44
skipped entry 45
skipped entry 46
skipped entry 47
skipped entry 48
skipped entry 49
skipped entry 50
skipped entry 51
skipped entry 52
skipped entry 53
skipped entry 54
skipped entry 55
skipped entry 56
skipped entry 57
skipped entry 58
ski