In [1]:
# Aaron Jiang, StockX Webscraping 

In [2]:
import numpy as np
import pandas as pd
import requests
import json
from bs4 import BeautifulSoup
%matplotlib inline

In [3]:
headers = {'accept': 'application/json', 
'accept-encoding': 'utf-8',
'accept-language': 'en-US,en;q=0.9',
'app-platform': 'Iron',
'referer': 'https://stockx.com/en-gb',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"',
'sec-ch-ua-mobile': '?1',
'sec-ch-ua-platform': '"Android"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Mobile Safari/537.36',
'x-requested-with': 'XMLHttpRequest'
}

In [4]:
def sieve(data):
    fields = ['item','brand','retail','release']
    tags = pd.DataFrame(columns=fields + list(data[0]['market'].keys()))
    x = tags.columns.str.extract('(?i)([^b]id|siz|At$|last[^s]|has|fea|par|[^b]ran|abs|tot)')
    tags = tags.drop(columns = tags.columns[x[x[0].notna()].index])
    tags.item = pd.Series([i['title'] for i in data])
    tags.brand = pd.Series([i['brand'] for i in data])
    tags.release = pd.Series([i['releaseDate'] for i in data])
    tags.retail = pd.Series([i['retailPrice'] for i in data])
    for cols in tags.columns[4:]:
        tags[cols] = [i['market'][cols] for i in data]
    return tags.drop(columns=['lastSaleDate','changeValue'])
# filter stuff out, too much data describing other data

In [5]:
def datascraper(query):
    url = f'https://stockx.com/api/browse?browseVerticals=sneakers&page={query}'
    page = requests.get(url=url, headers=headers)
    source = json.loads(page.text)
    return sieve(source['Products'])
df = datascraper(1)
for i in range(2,26):
    df = pd.concat([df, datascraper(i)], ignore_index=True)
# browse results only go up to the 25th page

In [6]:
print('rows: ' + str(df.shape[0]) + '\ncolumns: ' + str(df.shape[1]))
df.to_csv('sneakers.csv')
df.head()

rows: 1000
columns: 17


Unnamed: 0,item,brand,retail,release,lowestAsk,numberOfAsks,salesThisPeriod,highestBid,numberOfBids,annualHigh,annualLow,volatility,deadstockSold,pricePremium,averageDeadstockPrice,lastSale,changePercentage
0,Jordan 4 Retro Infrared,Jordan,200,2022-06-15,238,1921,2554,300,1002,2043,216,0.027657,3306,0.285,287,257,0.011811
1,adidas Yeezy Foam RNR Onyx,adidas,80,2022-06-08,153,3827,2338,233,1183,833,136,0.080396,10289,1.375,186,190,0.055556
2,Jordan 1 Retro High OG Visionaire,Jordan,170,2022-06-11,181,1368,420,200,580,343,137,0.04973,1378,0.253,206,213,0.105689
3,NikeCraft General Purpose Shoe Tom Sachs,Nike,110,2022-06-10,259,584,348,775,1136,2199,100,0.248204,836,5.355,552,699,0.035556
4,adidas Yeezy Boost 700 Hi-Res Red,adidas,300,2022-06-17,329,453,198,350,194,484,266,0.114064,218,-0.113,350,266,-0.139159


In [None]:
losses = df.loc[loss]
profits = df.loc[profit]
other_pr = len(profits[~(profits.brand.isin(popular))].values)
other_lo = len(losses[~(losses.brand.isin(popular))].values)
other_qt = df[~(df.brand.isin(popular))].salesthisperiod.sum()
other_as = df[~(df.brand.isin(popular))].asks.sum()
other_bd = df[~(df.brand.isin(popular))].bids.sum()
bars = {
    'Profitable':[len(losses.loc[(losses.brand==i)].values) for i in popular] + [other_lo],
    'Unprofitable':[len(profits.loc[(profits.brand==i)].values) for i in popular] + [other_pr],
    'Quarterly Sales':[df[df.brand==i].salesthisperiod.sum() for i in popular] + [other_qt],
    'Total Asks':[df[df.brand==i].asks.sum() for i in popular] + [other_as],
    'Total Bids':[df[df.brand==i].bids.sum() for i in popular] + [other_bd]
}
def convert(bars):
    for k,v in bars.items():
        bars[k] = [np.round(i/sum(v)*100,2) for i in bars[k]]       
convert(bars)
tags = list(bars.keys())
vals = np.array(list(bars.values()))
total = vals.cumsum(axis=1)
colors = plt.cm.get_cmap('RdYlGn')(np.linspace(0.15, 0.85, 5))

fig, ax = plt.subplots(figsize=(9,5.5), tight_layout=False)

ax.invert_yaxis()
ax.xaxis.set_visible(False)
ax.set_xlim(0, np.sum(vals, axis=1).max())
for i, (col, color) in enumerate(zip([rev_labels[i] for i in popular]+['other'],colors)):
    width = vals[:, i]
    borders = total[:, i] - width
    bar = ax.barh(tags, width, left=borders, height=.5, label=col, color=color)
    ax.bar_label(bar, label_type='center', color='white')
ax.legend(ncol=5, bbox_to_anchor=(0,1), loc='lower left', fontsize=12)
