# NikeScrAPI

**Fall 2022**<br/>
**Author**: Artemio Mendoza

<hr style="height:2pt">

## Scrape products from NIKE.com using their webdev API
#### Note: This class will download ALL products listed in nike.com website, not only shoes. The shoes are filtered out.


In [4]:
import requests         
import json
import pandas as pd
import numpy as np
import os
from os import path
from tqdm import tqdm, trange
from time import sleep
from bs4 import BeautifulSoup  
from datetime import date, datetime

In [5]:
# initial time, in case we run everythin in one single pass
initial_time=datetime.now() 

## Scrape Clase

It is highly configurable, look the options to set it up correctly<br/>
The results are stored in a predefined file<br/>
Intermediate results for each shoe category are stored in temporary files

In [6]:
class NikeScrAPI:   
    '''
    Uses nike's website API to scrape data.
        NOTE: for production set max_pages = 200
    '''
    def __init__(self, country='US', 
                 lan='en', max_pages=1, 
                 get_description=True, 
                 single_category=None, 
                 debug=False, 
                 filename='nike',
                 path='data', ):
        
        self.__count = 24
        self.__page_size = 24
        self.__country = country
        self.__lan = lan
        self.__DEBUG = debug
        self.__url_base = "https://www.nike.com" 
        self.__DEFAULT_REQUESTS_TIMEOUT = (5, 15) # for example
        self.__filePrefix = filename
        self.__path = path
        
        # If TRUE, then it gets the full description and ratings from each product's url. 
        # Takes more time, but data is complet
        self.__full_description = get_description
        
        # Estimated max number of pages in each category
        self.__max_number_of_pages = max_pages  # recommended 200 for production, 1 for testing
        
        # Data Structure
        self.shoeDict = { 
            'UID':[],
            'cloudProdID':[],
            'productID':[],
            'shortID':[],
            'colorNum':[],
            'title':[],
            'subtitle':[],
            'category':[],
            'type':[],
            'currency':[],
            'fullPrice':[],
            'currentPrice':[],
            'sale':[],
            'TopColor':[],
            'channel':[],
            'short_description':[],
            'rating':[],
            
            'customizable': [],
            'ExtendedSizing': [],           
            'inStock': [],
            'ComingSoon': [],
            'BestSeller': [],
            'Excluded': [],
            'GiftCard': [],
            'Jersey': [],
            'Launch': [],
            'MemberExclusive': [],
            'NBA': [],
            'NFL': [],
            'Sustainable': [],
            'label': [],
            'prebuildId': [],
            'prod_url':[],

            'color-ID':[],
            'color-Description':[],
            'color-FullPrice':[],
            'color-CurrentPrice':[],
            'color-Discount':[],
            'color-BestSeller':[],
            'color-InStock':[],
            'color-MemberExclusive':[],
            'color-New':[],
            'color-Label':[],
            'color-Image-url':[],
        } 
        
        # Nike shoe categories
        if single_category:
            self.categories=[single_category] 
        else:
            self.categories=[
                'cycling',
                'jordan', 
                'running',
                'golf', 
                'training',
                'tennis',
                'football',
                'basketball',
                'boot',
                'baseball',
                'soccer',
                'hiit',
                'volleyball',
                'lifestyle',
            ]
        
    def __repr__(self):
        return f'{type(self).__name__}({self.__max_number_of_pages!r})'   

    def __log_exception(self,e, verb, url, kwargs):
        '''
        log get exceptions (code from https://stackoverflow.com/questions/16511337/correct-way-to-try-except-using-python-requests-module)
        '''
        print(f'exception - Error {e}, {verb}')
        # raw_tb = traceback.extract_stack()
        # if 'data' in kwargs and len(kwargs['data']) > 500: # anticipate giant data string
        #     kwargs['data'] = f'{kwargs["data"][:500]}...'  
        # msg = f'BaseException raised: {e.__class__.__module__}.{e.__class__.__qualname__}: {e}\n' \
        #     + f'verb {verb}, url {url}, kwargs {kwargs}\n\n' \
        #     + 'Stack trace:\n' + ''.join(traceback.format_list(raw_tb[:-2]))
        # logger.error(msg) 

    def __requests_call(self,verb, url, **kwargs):
        '''
        request wrapper call (code from https://stackoverflow.com/questions/16511337/correct-way-to-try-except-using-python-requests-module)
        '''
        response = None
        exception = None
        try:
            if 'timeout' not in kwargs:
                kwargs['timeout'] = self.__DEFAULT_REQUESTS_TIMEOUT
            response = requests.request(verb, url, **kwargs)
        except BaseException as e:
            self.__log_exception(e, verb, url, kwargs)
            exception = e
        return (response, exception)

    def __getRating(self,indiv_shoe_soup):
        '''
        try to get the ratings for a given product
        '''
        try:
            return indiv_shoe_soup.find('p', class_='d-sm-ib pl4-sm').text.split()[0]
        except AttributeError:
            return np.NaN


    def __getDescription(self,indiv_shoe_soup): 
        '''
        tries to get the short description of a given product
        '''
        div_desc = indiv_shoe_soup.find('div', attrs={'class':'description-preview'})
        try:
            description = div_desc.find('p').text
        except AttributeError:
            return np.NaN

        return description
            
    def getDescAndRatings(self, url):
        '''
        gets description and ratings at once, from product url
        '''
        # Gets website page from prod_url
        indiv_shoe_page, exception  = self.__requests_call('get',url)
                
        if not exception :
           indiv_shoe_soup = BeautifulSoup(indiv_shoe_page.text,'html.parser')
           short_desc = self.__getDescription(indiv_shoe_soup)
           rating     = self.__getRating(indiv_shoe_soup) 
        else:
           rating = np.NaN
           short_desc = np.NaN  
                
        return short_desc, rating
    
    def updateDescriptionAndRatings(self, df, category):
        '''
        iterates over a dataframe to get description and rating for each shoe,from product URL
        '''        
        old_product_id  = None

        for index in tqdm(df[df['category']==category].index, desc=category.upper()):   

            shoe = df.loc[index]
            new_product_id = shoe['productID']

            if new_product_id != old_product_id:
                if self.__DEBUG: print(f">>>New Product {new_product_id}")

                # Gets website page from prod_url
                url = shoe['prod_url']
                
                shor_desc, rating = self.getDescAndRatings(url) 

                if self.__DEBUG: print(f"({shoe['title']}), {short_desc}")
                if self.__DEBUG: print(url)

                old_product_id = new_product_id

            else:
                if self.__DEBUG: print('same product')

            df.at[index,'short_description'] = short_desc
            df.at[index,'rating'] = rating

    def __getProducts(self, category,  anchor=0):
        '''
        retrieve products from website
        '''    
        country = self.__country
        country_language = self.__lan 
        count=self.__page_size

        query = category
        
        # Nike website's API
        url = f'https://api.nike.com/cic/browse/v2?queryid=products&anonymousId=241B0FAA1AC3D3CB734EA4B24C8C910D&country={country}&endpoint=%2Fproduct_feed%2Frollup_threads%2Fv2%3Ffilter%3Dmarketplace({country})%26filter%3Dlanguage({country_language})%26filter%3DemployeePrice(true)%26searchTerms%3D{query}%26anchor%3D{anchor}%26consumerChannelId%3Dd9a5bc42-4b9c-4976-858a-f159cf99c647%26count%3D{count}&language={country_language}&localizedRangeStr=%7BlowestPrice%7D%E2%80%94%7BhighestPrice%7D'

        # Calls API 
        html, exception = self.__requests_call('get',url)
        
        output = json.loads(html.text)

        if self.__DEBUG : print(f'category:{query} anchor:{anchor} count:{count}')

        return output['data']['products']['products']

    def __setFilePrefix(self):
        '''
        set the timestamp to file prefix
        '''
        # get timestamp
        today = (datetime.now()).strftime('%d%b%Y_%H%M')
        timestamp = today.upper()
        
        self.__filePrefix = f'{self.__filePrefix}_{timestamp}'
        
    def __checkPath(self,path):
        '''
        verifies if path exits
        '''
        if not os.path.exists(path):
            os.makedirs(path)
            
    def __writeIntermediateFile(self, category):
        '''
        writes down an intermediate file with last category
        '''  
        
        
        # get number of current file (out of N categories)
        current = self.categories.index(category) + 1
        label = f'{category}_{current}_of_{len(self.categories)}'
        
        file_name = f'{self.__filePrefix}_{label}.csv'

        file_full_path = os.path.join(self.__path,'tmp',file_name) 
        
        # converts data dictionary to dataframe and removes duplicates
        shoes = pd.DataFrame(self.shoeDict)
        shoes = shoes.drop_duplicates(subset='UID')
        
        # get rows only for current category
        shoes = shoes[shoes['category']==category]
        
        shoes.to_csv(file_full_path)
        
        print(f"Intermediate file for category [{category}] saved as '{file_full_path}'")
        if self.__DEBUG: print(f'Saved itermediate file {file_full_path}')
    
    def __writeFinalFile(self, shoes):
        '''
        writes final file name
        '''
        file_name = f'{self.__filePrefix}.csv'
        
        file_full_path = os.path.join(self.__path, file_name)
        
        # Saves dataframe as CSV
        shoes.to_csv(file_full_path)        

    def __writeDictionary(self,category, k, item, color, short_desc, rating, prod_url):
        '''
        add rows to the Data Frame Dictionary
        '''
        # add surrogate IDs for shoe and color    
        self.shoeDict['colorNum'].append(k+1)
        self.shoeDict['UID'].append(item['cloudProductId']+color['cloudProductId'])
        self.shoeDict['productID'].append(item['id'])
        self.shoeDict['cloudProdID'].append(item['cloudProductId'])
        self.shoeDict['shortID'].append(item['id'][-12:]) 
        
        self.shoeDict['type'].append(item['productType'])
        self.shoeDict['category'].append(category)
        self.shoeDict['title'].append(item['title'])
        self.shoeDict['subtitle'].append(item['subtitle'])
        self.shoeDict['short_description'].append(short_desc)
        self.shoeDict['rating'].append(rating)                                                                                 
        
        self.shoeDict['currency'].append(item['price']['currency'])
        self.shoeDict['fullPrice'].append(item['price']['fullPrice'])
        self.shoeDict['sale'].append(item['price']['discounted'])
        self.shoeDict['currentPrice'].append(item['price']['currentPrice'])
        self.shoeDict['TopColor'].append(item['colorDescription'])
        self.shoeDict['channel'].append(item['salesChannel'])
        self.shoeDict['prod_url'].append(prod_url)

        self.shoeDict['customizable'].append(item['customizable'])
        self.shoeDict['ExtendedSizing'].append(item['hasExtendedSizing'])          
        self.shoeDict['inStock'].append(item['inStock'])
        self.shoeDict['ComingSoon'].append(item['isComingSoon'])
        self.shoeDict['BestSeller'].append(item['isBestSeller'])
        self.shoeDict['Excluded'].append(item['isExcluded'])
        self.shoeDict['GiftCard'].append(item['isGiftCard'])
        self.shoeDict['Jersey'].append(item['isJersey'])
        self.shoeDict['Launch'].append(item['isLaunch'])
        self.shoeDict['MemberExclusive'].append(item['isMemberExclusive'])
        self.shoeDict['NBA'].append(item['isNBA'])
        self.shoeDict['NFL'].append(item['isNFL'])
        self.shoeDict['Sustainable'].append(item['isSustainable'])
        self.shoeDict['label'].append(item['label'])
        self.shoeDict['prebuildId'].append(item['prebuildId'])

        # Color Components
        self.shoeDict['color-ID'].append(color['cloudProductId'])
        self.shoeDict['color-Description'].append(color['colorDescription'])
        self.shoeDict['color-FullPrice'].append(color['price']['fullPrice'])
        self.shoeDict['color-CurrentPrice'].append(color['price']['currentPrice'])
        self.shoeDict['color-Discount'].append(color['price']['discounted'])
        self.shoeDict['color-BestSeller'].append(color['isBestSeller'])
        self.shoeDict['color-Image-url'].append(color['images']['portraitURL']) 
        self.shoeDict['color-InStock'].append(color['inStock'])

        self.shoeDict['color-MemberExclusive'].append(color['isMemberExclusive'])
        self.shoeDict['color-New'].append(color['isNew'])
        self.shoeDict['color-Label'].append(color['label'])        
        
    def getData(self):
        '''
        Happy Scraping! 
        Main Method to Scrape Data. It cycles across all elements
        '''
        # reset file prefix for this run
        self.__setFilePrefix()
        # check temp and data directories exist
        self.__checkPath(self.__path)
        self.__checkPath(os.path.join(self.__path,'tmp'))
        
        # count stores the number of rows scrapped per page
        count = self.__count
        anchor = 0
        total_rows = 0

        # get info for each category in the website
        for category in (self.categories): 
            # print(f"Processing category '{category.upper()}'")
            page_number = 0

            # load new pages from the search engine
            for page_number in tqdm(range(self.__max_number_of_pages), desc=category.upper()):

                # Get new html page
                anchor = page_number * self.__page_size  
                output = self.__getProducts(category=category, anchor=anchor)
                page_number +=1
                if self.__DEBUG: print(f'category: {category}, rows: {total_rows}, type(output):{type(output)}')

                # If output is empty, breaks the loop, ending the search for this category
                if output == None:
                    if self.__DEBUG: print(f'End processing searched {i} pages, {rows} rows, {tenis_rows} footwear')
                    break
                else:

                    # Loop through products and print name
                    for j, item in enumerate(output):

                        # pick only footwear, filtering out everything else      
                        if item['productType'] == 'FOOTWEAR': 
                            
                            # Retrieve short description and ratings this makes the process 10X slower
                            prod_url = item['url'].replace('{countryLang}',self.__url_base)
                            
                            short_desc = np.NaN
                            rating = np.NaN
                            if self.__full_description:
                                short_desc, rating = self.getDescAndRatings(prod_url)

                            # Retrieves features for each color 
                            for k, color in enumerate(item['colorways']):
                                self.__writeDictionary(category, k, item, color, short_desc, rating, prod_url)
                                total_rows +=1
                                
                                if self.__DEBUG :
                                    print(f"{j}:{k}:{item['cloudProductId'][-12]+color['cloudProductId']}:{item['title']},{item['subtitle']},{color['colorDescription']}")
                          
            # writes intermediate file
            self.__writeIntermediateFile(category)      
        
        # Remove Dupes
        shoes = pd.DataFrame(self.shoeDict)
        shoes = shoes.drop_duplicates(subset='UID')
        
        self.__writeFinalFile(shoes)
        
        # final message
        print(f'\nScraping Finished, Total {total_rows} items processed')
        print(f"total rows in dataframe:{len(shoes['UID'])}, unique rows:{len(shoes['UID'].unique())}")
        
        file_full_path = os.path.join(f'{self.__filePrefix}.csv', self.__path) 
        print(f"final dataset file saved as '{file_full_path}'")
        
        return shoes

## Quick smoke test 
### Getting only one page of products (for one single category)

Parameters: (max_pages = 1, get_description=True, single_category='cycling')

In [7]:
# Scrape Nike!
# Object set up to quick iterate over 1 single category ... only for testing
# set max_pages = 1 to test it and = 300 to get the whole inventory
nikeAPI = NikeScrAPI(max_pages=1, get_description=True, single_category='cycling')

# get data
nike = nikeAPI.getData()

CYCLING: 100%|████████████████████████████████████| 1/1 [00:04<00:00,  4.15s/it]

Intermediate file for category [cycling] saved as 'data/tmp/nike_02JAN2023_1346_cycling_1_of_1.csv'

Scraping Finished, Total 6 items processed
total rows in dataframe:6, unique rows:6
final dataset file saved as 'nike_02JAN2023_1346.csv/data'





## Scraping Nike
### Saves full Scraped data to file: 
###       *data/nike_DDMMYYYY_HHMM.csv*</div>

#### Writes itermediate files per category to "data/temp"
#### hold your horses, this will take about 1 ~ 2 hours

Set max_pages = 300 to get the whole database

Parameters: ( max_pages=300, path='data')

You can look intermediate files for each category in the temporary file.

In [6]:
nikeAPI = NikeScrAPI(max_pages=300, path='data')

# Let's get some data!
nike = nikeAPI.getData()

CYCLING:   1%|          | 2/300 [00:05<13:05,  2.64s/it]
JORDAN:   0%|          | 0/300 [00:00<?, ?it/s]

Intermediate file for category [cycling] saved as 'data/tmp/nike_06DEC2022_1708_cycling_1_of_14.csv'


JORDAN:  18%|█▊        | 55/300 [06:02<26:55,  6.59s/it] 
RUNNING:   0%|          | 0/300 [00:00<?, ?it/s]

Intermediate file for category [jordan] saved as 'data/tmp/nike_06DEC2022_1708_jordan_2_of_14.csv'


RUNNING:  10%|█         | 30/300 [05:42<51:23, 11.42s/it]  
GOLF:   0%|          | 0/300 [00:00<?, ?it/s]

Intermediate file for category [running] saved as 'data/tmp/nike_06DEC2022_1708_running_3_of_14.csv'


GOLF:   4%|▎         | 11/300 [00:48<21:23,  4.44s/it]
TRAINING:   0%|          | 0/300 [00:00<?, ?it/s]

Intermediate file for category [golf] saved as 'data/tmp/nike_06DEC2022_1708_golf_4_of_14.csv'


TRAINING:  11%|█         | 32/300 [01:46<14:53,  3.33s/it] 
TENNIS:   0%|          | 0/300 [00:00<?, ?it/s]

Intermediate file for category [training] saved as 'data/tmp/nike_06DEC2022_1708_training_5_of_14.csv'


TENNIS:   2%|▏         | 7/300 [00:42<29:40,  6.08s/it]  
FOOTBALL:   0%|          | 0/300 [00:00<?, ?it/s]

Intermediate file for category [tennis] saved as 'data/tmp/nike_06DEC2022_1708_tennis_6_of_14.csv'


FOOTBALL:  29%|██▉       | 87/300 [01:51<04:32,  1.28s/it] 
BASKETBALL:   0%|          | 0/300 [00:00<?, ?it/s]

Intermediate file for category [football] saved as 'data/tmp/nike_06DEC2022_1708_football_7_of_14.csv'


BASKETBALL:  10%|█         | 30/300 [03:01<27:09,  6.04s/it] 
BOOT:   0%|          | 0/300 [00:00<?, ?it/s]

Intermediate file for category [basketball] saved as 'data/tmp/nike_06DEC2022_1708_basketball_8_of_14.csv'


BOOT:   1%|          | 3/300 [01:18<2:09:07, 26.08s/it]
BASEBALL:   0%|          | 0/300 [00:00<?, ?it/s]

Intermediate file for category [boot] saved as 'data/tmp/nike_06DEC2022_1708_boot_9_of_14.csv'


BASEBALL:  13%|█▎        | 38/300 [01:10<08:06,  1.86s/it] 
SOCCER:   0%|          | 0/300 [00:00<?, ?it/s]

Intermediate file for category [baseball] saved as 'data/tmp/nike_06DEC2022_1708_baseball_10_of_14.csv'


SOCCER:  17%|█▋        | 52/300 [02:44<13:05,  3.17s/it] 
HIIT:   0%|          | 0/300 [00:00<?, ?it/s]

Intermediate file for category [soccer] saved as 'data/tmp/nike_06DEC2022_1708_soccer_11_of_14.csv'


HIIT:   0%|          | 1/300 [00:19<1:36:27, 19.36s/it]
VOLLEYBALL:   0%|          | 0/300 [00:00<?, ?it/s]

Intermediate file for category [hiit] saved as 'data/tmp/nike_06DEC2022_1708_hiit_12_of_14.csv'


VOLLEYBALL:   2%|▏         | 6/300 [00:30<24:45,  5.05s/it]
LIFESTYLE:   0%|          | 0/300 [00:00<?, ?it/s]

Intermediate file for category [volleyball] saved as 'data/tmp/nike_06DEC2022_1708_volleyball_13_of_14.csv'


LIFESTYLE:  64%|██████▍   | 192/300 [25:00<14:04,  7.82s/it] 


Intermediate file for category [lifestyle] saved as 'data/tmp/nike_06DEC2022_1708_lifestyle_14_of_14.csv'

Scraping Finished, Total 5576 items processed
total rows in dataframe:4844, unique rows:4844
final dataset file saved as 'nike_06DEC2022_1708.csv/data'


## Looking basic data info (full fledge EDA provided in next step/notebook)

In [7]:
# Data Description
nike.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4844 entries, 0 to 5573
Data columns (total 44 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   UID                    4844 non-null   object 
 1   cloudProdID            4844 non-null   object 
 2   productID              4844 non-null   object 
 3   shortID                4844 non-null   object 
 4   colorNum               4844 non-null   int64  
 5   title                  4844 non-null   object 
 6   subtitle               4844 non-null   object 
 7   category               4844 non-null   object 
 8   type                   4844 non-null   object 
 9   currency               4844 non-null   object 
 10  fullPrice              4844 non-null   int64  
 11  currentPrice           4844 non-null   float64
 12  sale                   4844 non-null   bool   
 13  TopColor               4844 non-null   object 
 14  channel                4844 non-null   object 
 15  shor

In [8]:
nike.head(5)

Unnamed: 0,UID,cloudProdID,productID,shortID,colorNum,title,subtitle,category,type,currency,...,color-Description,color-FullPrice,color-CurrentPrice,color-Discount,color-BestSeller,color-InStock,color-MemberExclusive,color-New,color-Label,color-Image-url
0,43070a72-0b17-56cf-8335-aef1ff9dd04843070a72-0...,43070a72-0b17-56cf-8335-aef1ff9dd048,e3f1f4d0-b408-4b2f-8135-b49874fa3cbe,b49874fa3cbe,1,Nike SuperRep Cycle,Women's Indoor Cycling Shoes,cycling,FOOTWEAR,USD,...,Black/Anthracite/Habanero Red/Phantom,120.0,58.97,True,False,True,False,False,IN_STOCK,"https://static.nike.com/a/images/c_limit,w_400..."
1,43070a72-0b17-56cf-8335-aef1ff9dd048b4acd5f7-f...,43070a72-0b17-56cf-8335-aef1ff9dd048,e3f1f4d0-b408-4b2f-8135-b49874fa3cbe,b49874fa3cbe,2,Nike SuperRep Cycle,Women's Indoor Cycling Shoes,cycling,FOOTWEAR,USD,...,Archaeo Pink/Metallic Mahogany/Dark Beetroot/L...,120.0,84.97,True,False,True,False,False,IN_STOCK,"https://static.nike.com/a/images/c_limit,w_400..."
2,43070a72-0b17-56cf-8335-aef1ff9dd0483426a9a3-a...,43070a72-0b17-56cf-8335-aef1ff9dd048,e3f1f4d0-b408-4b2f-8135-b49874fa3cbe,b49874fa3cbe,3,Nike SuperRep Cycle,Women's Indoor Cycling Shoes,cycling,FOOTWEAR,USD,...,White/Black,120.0,84.97,True,False,True,False,False,IN_STOCK,"https://static.nike.com/a/images/c_limit,w_400..."
3,43070a72-0b17-56cf-8335-aef1ff9dd048777d62b6-d...,43070a72-0b17-56cf-8335-aef1ff9dd048,e3f1f4d0-b408-4b2f-8135-b49874fa3cbe,b49874fa3cbe,4,Nike SuperRep Cycle,Women's Indoor Cycling Shoes,cycling,FOOTWEAR,USD,...,Black/Hyper Crimson/Metallic Silver,120.0,58.97,True,False,True,False,False,IN_STOCK,"https://static.nike.com/a/images/c_limit,w_400..."
4,43070a72-0b17-56cf-8335-aef1ff9dd048ac42be38-8...,43070a72-0b17-56cf-8335-aef1ff9dd048,e3f1f4d0-b408-4b2f-8135-b49874fa3cbe,b49874fa3cbe,5,Nike SuperRep Cycle,Women's Indoor Cycling Shoes,cycling,FOOTWEAR,USD,...,White/Black/Chutney/Volt,120.0,72.97,True,False,True,False,False,IN_STOCK,"https://static.nike.com/a/images/c_limit,w_400..."


In [9]:
nike.describe()

Unnamed: 0,colorNum,fullPrice,currentPrice,color-FullPrice,color-CurrentPrice
count,4844.0,4844.0,4844.0,4844.0,4844.0
mean,3.491742,107.501445,96.260006,107.443016,95.625136
std,3.301264,48.587458,47.233992,48.463076,46.811924
min,1.0,25.0,13.97,24.0,13.97
25%,1.0,70.0,60.0,70.0,60.0
50%,2.0,100.0,87.97,100.0,85.97
75%,5.0,140.0,127.97,140.0,125.0
max,32.0,350.0,305.0,350.0,305.0


In [10]:
# If we want to look a specific product description and rating 
# desc, rating = nikeAPI.getDescAndRatings(nike.loc[100,'prod_url'])
# print(desc, rating)

#### Looking shoes for Category = Cycling

In [10]:
# Only one category
nike[nike['category']=='cycling']

Unnamed: 0,UID,cloudProdID,productID,shortID,colorNum,title,subtitle,category,type,currency,...,color-Description,color-FullPrice,color-CurrentPrice,color-Discount,color-BestSeller,color-InStock,color-MemberExclusive,color-New,color-Label,color-Image-url
0,43070a72-0b17-56cf-8335-aef1ff9dd04843070a72-0...,43070a72-0b17-56cf-8335-aef1ff9dd048,e3f1f4d0-b408-4b2f-8135-b49874fa3cbe,b49874fa3cbe,1,Nike SuperRep Cycle,Women's Indoor Cycling Shoes,cycling,FOOTWEAR,USD,...,Black/Anthracite/Habanero Red/Phantom,120,58.97,True,False,True,False,False,IN_STOCK,"https://static.nike.com/a/images/c_limit,w_400..."
1,43070a72-0b17-56cf-8335-aef1ff9dd048b4acd5f7-f...,43070a72-0b17-56cf-8335-aef1ff9dd048,e3f1f4d0-b408-4b2f-8135-b49874fa3cbe,b49874fa3cbe,2,Nike SuperRep Cycle,Women's Indoor Cycling Shoes,cycling,FOOTWEAR,USD,...,Archaeo Pink/Metallic Mahogany/Dark Beetroot/L...,120,84.97,True,False,True,False,False,IN_STOCK,"https://static.nike.com/a/images/c_limit,w_400..."
2,43070a72-0b17-56cf-8335-aef1ff9dd0483426a9a3-a...,43070a72-0b17-56cf-8335-aef1ff9dd048,e3f1f4d0-b408-4b2f-8135-b49874fa3cbe,b49874fa3cbe,3,Nike SuperRep Cycle,Women's Indoor Cycling Shoes,cycling,FOOTWEAR,USD,...,White/Black,120,84.97,True,False,True,False,False,IN_STOCK,"https://static.nike.com/a/images/c_limit,w_400..."
3,43070a72-0b17-56cf-8335-aef1ff9dd048777d62b6-d...,43070a72-0b17-56cf-8335-aef1ff9dd048,e3f1f4d0-b408-4b2f-8135-b49874fa3cbe,b49874fa3cbe,4,Nike SuperRep Cycle,Women's Indoor Cycling Shoes,cycling,FOOTWEAR,USD,...,Black/Hyper Crimson/Metallic Silver,120,58.97,True,False,True,False,False,IN_STOCK,"https://static.nike.com/a/images/c_limit,w_400..."
4,43070a72-0b17-56cf-8335-aef1ff9dd048ac42be38-8...,43070a72-0b17-56cf-8335-aef1ff9dd048,e3f1f4d0-b408-4b2f-8135-b49874fa3cbe,b49874fa3cbe,5,Nike SuperRep Cycle,Women's Indoor Cycling Shoes,cycling,FOOTWEAR,USD,...,White/Black/Chutney/Volt,120,72.97,True,False,True,False,False,IN_STOCK,"https://static.nike.com/a/images/c_limit,w_400..."
5,43070a72-0b17-56cf-8335-aef1ff9dd048bc911d60-6...,43070a72-0b17-56cf-8335-aef1ff9dd048,e3f1f4d0-b408-4b2f-8135-b49874fa3cbe,b49874fa3cbe,6,Nike SuperRep Cycle,Women's Indoor Cycling Shoes,cycling,FOOTWEAR,USD,...,Green Glow/Infinite Lilac/Laser Orange/Dark Sm...,120,58.97,True,False,True,False,False,IN_STOCK,"https://static.nike.com/a/images/c_limit,w_400..."
6,f0a2dfed-0632-5e1c-a70a-c0e4ce541100f0a2dfed-0...,f0a2dfed-0632-5e1c-a70a-c0e4ce541100,b4a96baa-7c4f-4d87-9f1d-a438f912fd0a,a438f912fd0a,1,Nike SuperRep Cycle 2 Next Nature,Women's Indoor Cycling Shoes,cycling,FOOTWEAR,USD,...,Black/Volt/Anthracite/White,120,96.97,True,False,True,False,False,IN_STOCK,"https://static.nike.com/a/images/c_limit,w_400..."
7,f0a2dfed-0632-5e1c-a70a-c0e4ce541100c8d07ae8-c...,f0a2dfed-0632-5e1c-a70a-c0e4ce541100,b4a96baa-7c4f-4d87-9f1d-a438f912fd0a,a438f912fd0a,2,Nike SuperRep Cycle 2 Next Nature,Women's Indoor Cycling Shoes,cycling,FOOTWEAR,USD,...,Crimson Bliss/Total Orange/Pearl White,120,71.97,True,False,True,False,False,IN_STOCK,"https://static.nike.com/a/images/c_limit,w_400..."
8,f0a2dfed-0632-5e1c-a70a-c0e4ce541100a39e554f-c...,f0a2dfed-0632-5e1c-a70a-c0e4ce541100,b4a96baa-7c4f-4d87-9f1d-a438f912fd0a,a438f912fd0a,3,Nike SuperRep Cycle 2 Next Nature,Women's Indoor Cycling Shoes,cycling,FOOTWEAR,USD,...,White/Mint Foam/Laser Blue/Black,120,102.97,True,False,True,False,False,IN_STOCK,"https://static.nike.com/a/images/c_limit,w_400..."


#### Are there shoes without description?

In [12]:
# How many rows without description?
nike[nike['short_description'] == 'NaN']

Unnamed: 0,UID,cloudProdID,productID,shortID,colorNum,title,subtitle,category,type,currency,...,color-Description,color-FullPrice,color-CurrentPrice,color-Discount,color-BestSeller,color-InStock,color-MemberExclusive,color-New,color-Label,color-Image-url


#### Review some descriptions

In [13]:
# look some descriptions
nike.sort_values(by='short_description', ascending=False)['short_description']

1117    Your workhorse with wings returns. The Nike Co...
1118    Your workhorse with wings returns. The Nike Co...
1119    Your workhorse with wings returns. The Nike Co...
1120    Your workhorse with wings returns. The Nike Co...
1121    Your workhorse with wings returns. The Nike Co...
                              ...                        
5503                                                  NaN
5514                                                  NaN
5533                                                  NaN
5563                                                  NaN
5564                                                  NaN
Name: short_description, Length: 4844, dtype: object

#### Looking a specific shoe (index = 3970)

In [14]:
# Specific product
nike.loc[3970]

UID                      2c63c9e5-c45f-506e-9fde-a7f6b93adc582c63c9e5-c...
cloudProdID                           2c63c9e5-c45f-506e-9fde-a7f6b93adc58
productID                             7fa45b2e-3351-4484-b882-8fe027ab740d
shortID                                                       8fe027ab740d
colorNum                                                                 1
title                                              Nike Air Max 95 Recraft
subtitle                                                   Big Kids' Shoes
category                                                         lifestyle
type                                                              FOOTWEAR
currency                                                               USD
fullPrice                                                              145
currentPrice                                                        123.97
sale                                                                  True
TopColor                 

In [15]:
# brief time msg
final_time=datetime.now()        
print(f'Started at {initial_time}\nTotal Elapsed Time: {final_time - initial_time}')    
print('-o0o-The End-o0o-')

Started at 2022-12-06 17:07:58.846957
Total Elapsed Time: 0:51:08.916255
-o0o-The End-o0o-


### -o0o- The End -o0o-