### Import Libraries

In [1]:
# conda install -c conda-forge xgboost

In [1]:
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import learning_curve
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import make_scorer

from sklearn.linear_model import Lasso

### Preparing Data

In [2]:
# read data
data = pd.read_csv("../data/StockX-Data-Consolidated.csv")
data = data.rename(columns={"Unnamed: 0": "index"})

In [3]:
# train-test split
x = data.drop(["Pct_change",'Sale Price','index','California','New York','Oregon','Florida','Texas','Other States','Colorful'], axis=1)
y = data["Pct_change"]
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.25, random_state = 42)

### Predictive Modeling

In [4]:
%%capture
random.seed(1)

# fit LASSO with 5-fold cross validation
lasso = Lasso()
parameters = {'alpha': [1e-7,1e-6,1e-5,1e-4,1e-3,1e-2,1e-1,1e0,1e1,1e2,1e3,1e4,1e5]}
r2 = make_scorer(r2_score, greater_is_better=True)
clf = GridSearchCV(lasso, 
                   parameters, 
                   cv=5,
                   scoring=r2)
clf.fit(x_train, y_train)

In [5]:
# cross validation score
lasso_best_params = clf.best_params_
lasso_best_estimator = clf.best_estimator_
lasso_score = clf.best_score_
print("Score: "+ str(lasso_score))

Score: 0.7748125944333327


### The LASSO Model

In [6]:
# retrain lasso model on full traning data
lasso = Lasso(alpha=lasso_best_params['alpha'])
final_model = lasso.fit(x_train, y_train)

  positive)


In [7]:
# print regression results
print("intercept:", final_model.intercept_)
variables = x_train.columns
coefficients = final_model.coef_
for i,name in enumerate(variables):
    print(name, coefficients[i])

intercept: 2.3556421430045162
Days Since Release 0.0008969888841417115
yeezy -2.138417515834348
airjordan 3.2365345343377365
airforce -0.9827397523426478
airmax90 -0.34401210920306713
airmax97 -0.2837801685576994
presto 0.6952847907426327
vapormax -1.5425138197890393
blazer 0.5806614566880155
zoom -2.1647266625242296
react -1.7049611560117834
size_freq 0.24334106122880125
Black 0.4907603583909212
White 0.709215356313826
Grey 0.23800636885293228
Red 1.7845337138263782
Green 0.6164592860192896
Neo 0.1910647317867885
Orange 0.7531739806945558
Tan/Brown 1.8276624000242716
Pink 0.7002403108432268
Blue -2.5354574513469417
Number of Sales -3.631622959562983e-05


In [8]:
# test set performance
y_pred = final_model.predict(x_test)
print('R²: %.2f' % r2_score(y_test, y_pred))
print("mean_squared_error: %.2f" % mean_squared_error(y_test, y_pred))

R²: 0.77
mean_squared_error: 0.53


### Undervalued Sneakers - Predicted as of 03/26/2020

In [10]:
# Jordan 1 Retro High Travis Scott --> Released 05/11/2019
# https://stockx.com/air-jordan-1-retro-high-travis-scott?utm_source=google&utm_medium=cpc&utm_campaign=US-JordanGeneralSneakers&utm_campaignid=413888648&content=191928849084&keyword=jordan%201%20retro%20high%20travis%20scott&gclid=CjwKCAjw3-bzBRBhEiwAgnnLCsujrgiqwccfPrfXmgB4yc0Gp_hy7sSxyNEdaB7hFEjdZWrpasJsohoCZtQQAvD_BwE
undervalued_sneakers = {"Days Since Release": 320,
                        "yeezy": 0,
                        "airjordan": 1,
                        "airforce": 0,
                        "airmax90": 0,
                        "airmax97": 0,
                        "presto": 0,
                        "vapormax": 0,
                        "blazer": 0,
                        "zoom": 0,
                        "react": 0,
                        "size_freq": 0.198857,
                        "Black": 1,
                        "White": 1,
                        "Grey": 0,
                        "Red": 0,
                        "Green": 0,
                        "Neo": 0,
                        "Orange": 0,
                        "Tan/Brown": 1,
                        "Pink": 0, 
                        "Blue": 0,
                        "Number of Sales": 25745}

undervalued_sneaker = pd.DataFrame([undervalued_sneakers])
pred = final_model.predict(undervalued_sneaker)
pred # current price premium 562.9% --> undervalued

array([8.02027998])

In [11]:
# Nike Blazer Mid 77 Vintage Slam Jam --> Released 01/08/2019
# https://stockx.com/nike-blazer-mid-77-vintage-slam-jam-special-slam-jam-box
undervalued_sneakers = {"Days Since Release": 441,
                        "yeezy": 0,
                        "airjordan": 0,
                        "airforce": 0,
                        "airmax90": 0,
                        "airmax97": 0,
                        "presto": 0,
                        "vapormax": 0,
                        "blazer": 1,
                        "zoom": 0,
                        "react": 0,
                        "size_freq": 0.106677,
                        "Black": 1,
                        "White": 1,
                        "Grey": 0,
                        "Red": 0,
                        "Green": 0,
                        "Neo": 0,
                        "Orange": 0,
                        "Tan/Brown": 0,
                        "Pink": 0, 
                        "Blue": 0,
                        "Number of Sales": 78}

undervalued_sneaker = pd.DataFrame([undervalued_sneakers])
pred = final_model.predict(undervalued_sneaker)
pred # current price premium 381.0%% --> undervalued

array([4.55497764])

In [12]:
# adidas Yeezy Boost 350 V2 Tail Light --> Released 02/22/2020
# https://stockx.com/adidas-yeezy-boost-350-v2-tail-light
undervalued_sneakers = {"Days Since Release": 33,
                        "yeezy": 1,
                        "airjordan": 0,
                        "airforce": 0,
                        "airmax90": 0,
                        "airmax97": 0,
                        "presto": 0,
                        "vapormax": 0,
                        "blazer": 0,
                        "zoom": 0,
                        "react": 0,
                        "size_freq": 0.198857,
                        "Black": 0,
                        "White": 0,
                        "Grey": 1,
                        "Red": 0,
                        "Green": 0,
                        "Neo": 0,
                        "Orange": 1,
                        "Tan/Brown": 0,
                        "Pink": 0, 
                        "Blue": 0,
                        "Number of Sales": 12848}

undervalued_sneaker = pd.DataFrame([undervalued_sneakers])
pred = final_model.predict(undervalued_sneaker)
pred # current price premium 31.8% --> undervalued

array([2.55140481])