Here we'll try to create a new datapoint for the model to predict on, from a CraigsList post.

In [1]:
import requests, json, os, pickle, yagmail, sys
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.options.display.max_columns = 999
pd.options.display.max_rows = 999

from scipy.special import inv_boxcox

import nltk, string, os
from nltk.corpus import stopwords
from nltk import word_tokenize, FreqDist
from nltk.stem.snowball import SnowballStemmer

from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
sys.path.insert(0, '../objects')
sys.path.insert(0, '../data')
sys.path.insert(0, '../pickles')

# Trawl for Prospective Deals

In [3]:
API_KEY = 'RobertBo-cinemaro-PRD-171ca9e35-af8bdcbb' # Enter your API Key/"App ID" Here. Mine was 40 chars long.

In [4]:
FIND_ADVANCED = "findItemsAdvanced" # This is the eBay API endpoint service we'll be querying.

In [5]:
ELEC_GUITARS = '33034'
USED = '3000'
AUCTION = "Auction"
AUCTIONWITHBIN = "AuctionWithBIN"

In [6]:
ct = datetime.utcnow()

In [7]:
days_ahead = 1

In [8]:
now = datetime.now()

In [9]:
two_days_from_now = now + timedelta(days=days_ahead)

In [10]:
utc_ct = f'{ct.year}-'
if len(str(ct.month)) < 2:
    utc_ct += '0'
utc_ct += f'{ct.month}-'
if len(str(ct.day)) < 2:
    utc_ct += '0'
utc_ct += str(ct.day + 2) + 'T'
if len(str(ct.hour)) < 2:
    utc_ct += '0'
utc_ct += f'{ct.hour}:'
if len(str(ct.minute)) < 2:
    utc_ct += '0'
utc_ct += f'{ct.minute}:'
if len(str(ct.second)) < 2:
    utc_ct += '0'
utc_ct += f'{ct.second}.'
if len(str(ct.microsecond)) < 2:    
    utc_ct += '0'
utc_ct += str(ct.microsecond)[:3] + 'Z'

In [11]:
ITEM_FILTER_0 = f'itemFilter(0).name=Condition&itemFilter(0).value={USED}' # Only used guitars
ITEM_FILTER_1 = f'itemFilter(1).name=HideDuplicateItems&itemFilter(1).value=true' # No duplicate listings
ITEM_FILTER_2 = f'itemFilter(2).name=MinPrice&itemFilter(2).value=1' # Only items that sell for > this value
ITEM_FILTER_3 = f'itemFilter(3).name=MaxQuantity&itemFilter(3).value=1' # No lots or batch sales. One item at a time
ITEM_FILTER_4 = f'itemFilter(4).name=MaxPrice&itemFilter(4).value=310' # Only items that sold for < this value
ITEM_FILTER_5 = f'itemFilter(5).name=EndTimeTo&itemFilter(5).value={utc_ct}' # Only ending soonish

In [12]:
def find_current_auctions(PAGE, keywords):
    '''Make a request to the eBay API and return the JSON text of this page number'''
    r = requests.get(
                 f'https://svcs.ebay.com/services/search/FindingService/v1?'
                 f'OPERATION-NAME={FIND_ADVANCED}&'
                 f'X-EBAY-SOA-SECURITY-APPNAME={API_KEY}&'
                 f'RESPONSE-DATA-FORMAT=JSON&'
                 f'REST-PAYLOAD&'
                 f'categoryId={ELEC_GUITARS}&'
                 f'descriptionSearch=true&'
                 f'{ITEM_FILTER_0}&' # USED
                 f'{ITEM_FILTER_1}&' # NO DUPES
                 f'{ITEM_FILTER_2}&' # MINPRICE
                 f'{ITEM_FILTER_3}&' # NO LOTS
                 f'{ITEM_FILTER_4}&' # MAX PRICE
                 f'{ITEM_FILTER_5}&' # END TIME
                 f'keywords={keywords}&'
                 f'paginationInput.pageNumber={str(PAGE)}') # value to be looped through when collecting lotsa data
    if r.json()['findItemsAdvancedResponse'][0].get('searchResult'):
        return r.json()['findItemsAdvancedResponse'][0]['searchResult'][0]['item']
    else:
        return None

In [13]:
# [item['title'] for item in find_current_auctions(1, 'fender+-%28squier%2Csquire%29')]
# [item['title'] for item in find_current_auctions(2, 'gibson+-%28epiphone%29')]

In [14]:
def get_specs(ITEM_ID):
    '''Return the specifics of a single eBay auction. String input.'''
    r = requests.get('http://open.api.ebay.com/shopping?'
                    f'callname=GetSingleItem&'
                    f'responseencoding=JSON&'
                    f'appid={API_KEY}&'
#                     f'siteid=0&' # USA Store
                    f'version=967&' # What is this?
                    f'ItemID={ITEM_ID}&'
                    f'IncludeSelector=Details,ItemSpecifics,TextDescription')
    try:
        return r.json()['Item']
    except KeyError:
        pass

In [15]:
# def persist_page_to_json(PAGE):
#     '''Saves a page of JSON responses to one json per axe'''
#     for i in range(len(PAGE)):
#         with open("trawler_listings/axe_%s.json" % (PAGE[i]['itemId'][0]), 'w') as f:  # writing JSON object
#             json.dump(PAGE[i], f)

In [16]:
# def persist_spec_to_json(spec):
#     '''Writes one page of Axe Specs to one json'''
#     try:
#         with open("trawler_specs/axe_%s.json" % (spec['ItemID']), 'w') as f:  # writing JSON object
#             json.dump(spec, f)
#     except TypeError:
#         pass
#     pass

In [17]:
def trawl_for_guitars(start_page, stop_page, fetch_function, keywords):
    '''Spams the eBay API for pages of AXE DATA'''
    j = 0
    k = 0
    existing_guitar_ids = []
    listings = []
    
    for i in range(start_page+1, stop_page+1):
        page = fetch_function(i, keywords)
        if page:
            for axe in page:
                k += 1
                if axe['itemId'][0] not in existing_guitar_ids:
                    existing_guitar_ids.append(axe['itemId'][0])
                    j += 1
                    print(f'Get {j}')
                    listings.append({'listing': axe,
                                     'specs': get_specs(axe['itemId'][0])})
                else:
                    print('Skip')
    
    print(f'\nChecked {k} guitars')
    print(f'\nGot {j} new guitars')
    
    return listings

### Trawl

In [18]:
prospects = []

In [19]:
prospects.extend(trawl_for_guitars(0,2,find_current_auctions,'american+-%28squier%2Csquire%2Cepiphone%2Cepi%29'))

Get 1
Get 2
Get 3
Get 4
Get 5
Get 6
Get 7
Skip
Skip
Skip
Skip
Skip
Skip
Skip

Checked 14 guitars

Got 7 new guitars


In [20]:
prospects.extend(trawl_for_guitars(0,2,find_current_auctions,'fender+-%28squier%2Csquire%29'))

Get 1
Get 2
Get 3
Get 4
Get 5
Get 6
Get 7
Get 8
Get 9
Get 10
Get 11
Get 12
Get 13
Get 14
Get 15
Get 16
Get 17
Get 18
Get 19
Get 20
Get 21
Get 22
Get 23
Get 24
Get 25
Get 26
Get 27
Get 28
Get 29
Get 30
Get 31
Get 32
Get 33
Get 34
Get 35
Get 36
Get 37
Get 38
Get 39
Get 40
Get 41
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip

Checked 82 guitars

Got 41 new guitars


In [21]:
prospects.extend(trawl_for_guitars(0,2,find_current_auctions, 'gibson+-%28epiphone%29'))

Get 1
Get 2
Get 3
Get 4
Get 5
Get 6
Get 7
Get 8
Get 9
Get 10
Get 11
Get 12
Get 13
Get 14
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip
Skip

Checked 28 guitars

Got 14 new guitars


***

## Load the Prospects and Make Price Estimates

In [22]:
from Axe_Object_memory import Axe

In [23]:
# prospects[0]

In [24]:
guitars = []
for prospect in prospects:
    if prospect.get('specs'):
        try:
            this_axe = Axe(prospect['listing'],prospect['specs'])
            if "LOT OF" not in this_axe.title.upper() and this_axe.price > 90 and this_axe.price < 800:
                if this_axe.string_config and this_axe.string_config < 5:
                    continue
                if this_axe.year and this_axe.year > 2019:
                    continue
                if this_axe.end_time > two_days_from_now or this_axe.end_time < now:
                    continue
                guitars.append(this_axe)
        except ValueError:
            print("skip")
            pass
    else:
        print("skip")
        pass

skip
skip
skip


In [25]:
len(guitars)

31

Establishing some default eBay auction settings here:

This is how you should sell stuff on eBay to maximize guitar price, basically.

In [26]:
# Properties
auction_duration    = pd.Series(np.full(len(guitars),7*24), name = 'auction_duration')
shipping_charged    = pd.Series(np.full(len(guitars),0), name = 'shipping_charged')          
seller_country_US   = pd.Series([1 for i in range(len(guitars))], name = 'seller_country_US')
autopay             = pd.Series([False for guitar in range(len(guitars))], name = 'autopay')
returns             = pd.Series([False for guitar in range(len(guitars))], name = 'returns')
listing_type_FixedPrice = pd.Series([True for guitar in range(len(guitars))], name = 'listing_type_FixedPrice')
ship_type_Free      = pd.Series([1 for guitar in range(len(guitars))], name = 'ship_type_Free')
ship_expedite       = pd.Series([0 for guitar in range(len(guitars))], name = 'ship_expedite')
start_hour          = pd.Series([1 for guitar in range(len(guitars))], name = 'start_hour_(7.667, 11.5]')
end_hour            = pd.Series([1 for guitar in range(len(guitars))], name = 'end_hour_(7.667, 11.5]')
start_weekday_6     = pd.Series([6 for guitar in range(len(guitars))], name = 'start_weekday_6')
end_weekday_6       = pd.Series([6 for guitar in range(len(guitars))], name = 'end_weekday_6')
# returns_time      = pd.Series([0 for guitar in range(len(guitars))], name = 'returns_time')
num_pics            = pd.Series([12 for guitar in range(len(guitars))], name = 'num_pics')
best_offer_enabled  = pd.Series([True for guitar in range(len(guitars))], name = 'best_offer_enabled')
ship_handling_time_2= pd.Series([1 for guitar in range(len(guitars))], name = 'ship_handling_time_2')
seller_positive_percent = pd.Series([1 for guitar in range(len(guitars))], name = 'seller_positive_percent_(99.5, 111.0]')
brand = pd.Series([guitar.brand for guitar in guitars], name = "brand")
body_type           = pd.Series([guitar.body_type for guitar in guitars], name = "body_type")
color               = pd.Series([guitar.color for guitar in guitars], name = "color")
right_left_handed   = pd.Series([guitar.right_left_handed for guitar in guitars], name = "right_left_handed")
string_config       = pd.cut(pd.Series([guitar.string_config for guitar in guitars], name = "string_config"),
                       [0,5,6,11,20])
country_manufacture = pd.Series([guitar.country_manufacture for guitar in guitars], name = "country_manufacture")
model_year = pd.cut(pd.Series([guitar.year for guitar in guitars], name = "model_year"), [1700,1975,1990,1995,2000,2005,2007,2010,2011,2012,2013,2015])

In [27]:
X_dummies = pd.concat([brand, color, country_manufacture, right_left_handed, best_offer_enabled, shipping_charged, returns, autopay, seller_country_US, ship_handling_time_2, listing_type_FixedPrice, ship_expedite, ship_type_Free, num_pics, auction_duration, start_hour, end_hour, start_weekday_6, end_weekday_6, seller_positive_percent, model_year, body_type, string_config],
              axis = 1)

X = pd.get_dummies(X_dummies, drop_first=True)

In [28]:
infile = open('../pickles/bonus_columns','rb')
bonus_columns = pickle.load(infile)
infile.close()

In [29]:
fillers = []
remove = []
for col in bonus_columns:
    if col not in X.columns:
#         print(f'YAYAYAYA: {col}')
        filler = pd.Series(np.full(len(guitars),0), name=col)
        fillers.append(filler)
for col in X.columns:
    if col not in bonus_columns:
#         print(f'NUNUNUNU: {col}')
        X.drop(col, axis=1, inplace=True)
        
fillers_df = pd.concat(fillers, axis=1)

In [30]:
X_nontext = pd.concat([X, fillers_df], axis=1)

In [31]:
infile = open('../pickles/saved_scaler','rb')
scaler = pickle.load(infile)
infile.close()

In [32]:
X_nontext_scaled = pd.DataFrame(scaler.transform(X_nontext))

  """Entry point for launching an IPython kernel.


In [33]:
stemmer = SnowballStemmer("english")

In [34]:
stopwords_list = stopwords.words('english') + list(string.punctuation)
stopwords_list += ["''", '""', '...', '``', ",", ".", ":", "'s", "--","â€™"]

In [35]:
def assemble_guitar_document(axe):
    document = axe.title + ' '
    if axe.year != None:
        document += (str(axe.year) + ' ')
    if axe.material != None:
        document += axe.material + ' '
    if axe.model != None:
        document += axe.model + ' ' 
    if axe.brand != None:
        document += axe.brand + ' '
    if axe.subtitle != None:
        document += axe.subtitle + ' '
    if axe.condition_description != None:
        document += axe.condition_description + ' '
    if axe.description != None:
        document += axe.description
    return document

In [36]:
def process_doc(doc):
    stopwords_removed = ''
    tokens = nltk.word_tokenize(doc)
    for i in range(len(tokens)):
        if tokens[i].lower() not in stopwords_list and tokens[i] not in string.punctuation:
            stopwords_removed += stemmer.stem(tokens[i]) + ' '
    return stopwords_removed

In [37]:
raw_corpus = [assemble_guitar_document(guitar).lower() for guitar in guitars]

In [38]:
processed_text = pd.Series(list(map(process_doc, raw_corpus)), name = 'text')

In [39]:
infile = open('../pickles/saved_vectorizer','rb')
vectorizer = pickle.load(infile)
infile.close()

In [40]:
tfidf = vectorizer.transform(processed_text)
tfidf_df = pd.DataFrame(tfidf.toarray(), columns=vectorizer.get_feature_names())

In [41]:
X_ready = pd.concat([X_nontext, tfidf_df], axis=1)

In [42]:
infile = open('../pickles/lasso_model','rb')
lasso_model = pickle.load(infile)
infile.close()

In [43]:
y_preds = lasso_model.predict(X_ready)

In [44]:
bxcx_lam = .3
y_preds_inv = inv_boxcox(y_preds, bxcx_lam)

But now...which ones could yield a respectable margin?

In [45]:
predicted_df = pd.concat([pd.Series(y_preds_inv), pd.Series([guitar.initial_price + guitar.price_shipping for guitar in guitars]),
                          pd.Series(y_preds_inv) / pd.Series([guitar.initial_price for guitar in guitars]),
                          pd.Series([guitar.title for guitar in guitars]), 
                          pd.Series([guitar.url for guitar in guitars]), 
                         pd.Series([guitar.pic for guitar in guitars])],
                         axis=1)

predicted_df.columns = ['Estimate', 'Price', 'Ratio','Title','Link', 'Pic']

In [46]:
highest_value = predicted_df.sort_values('Estimate', ascending=False)
hv_10 = highest_value.iloc[:10,:]

most_underrated = predicted_df.sort_values('Ratio', ascending=False)
m_u = most_underrated.iloc[:10,:]

In [47]:
hv_10
m_u

Unnamed: 0,Estimate,Price,Ratio,Title,Link,Pic
28,948.387416,129.98,9.484823,Slick SL-52 Project No Pickups Xavier,http://www.ebay.com/itm/Slick-SL-52-Project-No...,http://thumbs4.ebaystatic.com/m/mNcfDTTRuknWja...
8,947.019576,100.0,9.470196,Vantage Guitar With Fender 15G Amp And Chord,http://www.ebay.com/itm/Vantage-Guitar-Fender-...,http://thumbs2.ebaystatic.com/m/mz-xw9NJV2So5e...
10,949.467272,139.98,8.632305,Fender FA-135CE Electric Acoustic Guitar W/Roa...,http://www.ebay.com/itm/Fender-FA-135CE-Electr...,http://thumbs1.ebaystatic.com/m/my59PL5B8eHZO7...
21,989.878429,130.0,8.248987,Wudtone PRS DD Tremolo,http://www.ebay.com/itm/Wudtone-PRS-DD-Tremolo...,http://thumbs1.ebaystatic.com/m/mgt6Hgfkz1_8Hi...
4,865.819221,105.0,8.245897,Fender Standard Strat HSS with Locking Tremolo...,http://www.ebay.com/itm/Fender-Standard-Strat-...,http://thumbs2.ebaystatic.com/m/mXPq8C1Zc9S7Mk...
14,709.654542,139.99,7.097255,FENDER Electric Guitar STARCASTER CHINA (BE800...,http://www.ebay.com/itm/FENDER-Electric-Guitar...,http://thumbs2.ebaystatic.com/m/mRzOz26qAiHm7S...
16,943.701176,150.5,6.27044,WINDSONG Electric Guitar Semi Hollow Body H&H ...,http://www.ebay.com/itm/WINDSONG-Electric-Guit...,http://thumbs1.ebaystatic.com/m/mH70HyCSe0AmCP...
24,871.172752,148.0,5.886302,Fender Starcaster Stratocaster Electric Guitar...,http://www.ebay.com/itm/Fender-Starcaster-Stra...,http://thumbs2.ebaystatic.com/m/mZHy6tdjlGjSmY...
27,918.545446,159.0,5.777015,Dean Michael Schenker Flying V Guitar Gibson S...,http://www.ebay.com/itm/Dean-Michael-Schenker-...,http://thumbs3.ebaystatic.com/m/mGVJV0if6_d7gr...
22,891.405402,161.0,5.53668,Brownsville New York 6-String Electric Guitar ...,http://www.ebay.com/itm/Brownsville-New-York-6...,http://thumbs1.ebaystatic.com/m/muXVju6R-X2qxO...


In [48]:
email = "<h3>10 Most Underrated:</h3>\n"

email += "*******************************\n"

for i in range(10):
    email += f"<a href = {m_u['Link'].values[i]}>"
    email += f"\n{m_u['Title'].values[i]}\n\n:"
    email += f"<img src = {m_u['Pic'].values[i]}></img></a>\n"
    email += f"\nHigh bid: ${m_u['Price'].values[i]}\n\n"
    email += "*******************************\n"
    
email += '\n<h3>10 Highest Potential Value:</h3>\n'
email += "*******************************\n"
    
for i in range(10):
    email += f"<a href = {hv_10['Link'].values[i]}>"
    email += f"\n{hv_10['Title'].values[i]}\n\n"
    email += f"<img src = {hv_10.Pic.values[i]}></img></a>\n"
    email += f"\nHigh bid: ${hv_10['Price'].values[i]}\n\n"
    email += "*******************************\n"

In [49]:
# print(email)

In [50]:
receiver = "cinemarob1@gmail.com"
yag = yagmail.SMTP("gu1tarb1trag3@gmail.com")
yag.send(
    to=receiver,
    subject=f"GuitArbitrage - Auctions Ending Within 2 Days",
    contents=email)

{}