In [1]:
import boto3

s3.list_objects(Bucket = 'ebayfindingdata', Prefix = 'shopping/')

keys = [o['Key'] for o in s3.list_objects(Bucket = 'ebayfindingdata', Prefix = 'shopping')['Contents'] if '.csv' in o['Key']]

keys

['shopping/08-03-2020.csv',
 'shopping/08-04-2020.csv',
 'shopping/08-05-2020.csv',
 'shopping/08-06-2020.csv',
 'shopping/08-07-2020.csv',
 'shopping/08-08-2020.csv',
 'shopping/08-09-2020.csv',
 'shopping/08-10-2020.csv',
 'shopping/08-12-2020.csv',
 'shopping/08-13-2020.csv',
 'shopping/08-14-2020.csv',
 'shopping/08-15-2020.csv',
 'shopping/08-16-2020.csv']

In [31]:
import pandas as pd
df = pd.concat([pd.read_csv(s3.get_object(Bucket = 'ebayfindingdata', Key = k)['Body']) for k in keys]).reset_index(drop=True)

In [32]:
import itemSpecificCleanUp as iscu 

df.loc[~df['ItemSpecifics-Type'].isna(), 'ItemSpecifics-Type'] = df[~df['ItemSpecifics-Type'].isna()]['ItemSpecifics-Type'].apply(iscu.cleanUpType)
df.loc[~df['ItemSpecifics-Brand'].isna(),'ItemSpecifics-Brand'] = df[~df['ItemSpecifics-Brand'].isna()]['ItemSpecifics-Brand'].apply(iscu.cleanUpBrand)
df.loc[~df['ItemSpecifics-Skill Level'].isna(), 'ItemSpecifics-Skill Level'] = df[~df['ItemSpecifics-Skill Level'].isna()]['ItemSpecifics-Skill Level'].apply(iscu.cleanUpSkill)

df.loc[df['ItemSpecifics-Type'].isna(), 'ItemSpecifics-Type'] = df[df['ItemSpecifics-Type'].isna()]['Title'].apply(iscu.extractTypeFromTitle)
df.loc[df['ItemSpecifics-Brand'].isna(),'ItemSpecifics-Brand'] = df[df['ItemSpecifics-Brand'].isna()]['Title'].apply(iscu.extractBrandFromTitle)

df.head()

Unnamed: 0,ItemID,EndTime,ViewItemURLForNaturalSearch,ListingType,Location,GalleryURL,PictureURL,PrimaryCategoryID,PrimaryCategoryName,BidCount,...,ConvertedBuyItNowPrice-_currencyID,ConvertedBuyItNowPrice-value,ItemSpecifics-NameValueList-Name,ItemSpecifics-NameValueList-Value,DiscountPriceInfo-OriginalRetailPrice-_currencyID,DiscountPriceInfo-OriginalRetailPrice-value,DiscountPriceInfo-PricingTreatment,DiscountPriceInfo-SoldOneBay,DiscountPriceInfo-SoldOffeBay,Quantity
0,303116830074,2020-08-04T12:56:22.000Z,https://www.ebay.com/itm/Jupiter-JAS-769GN-sax...,FixedPriceItem,"Saint Louis, Missouri",https://thumbs3.ebaystatic.com/pict/3031168300...,['https://i.ebayimg.com/00/s/MTM4MlgxMjk3/z/HU...,16231,Musical Instruments & Gear:Wind & Woodwind:Ban...,0,...,,,,,,,,,,
1,303635927434,2020-08-04T12:57:58.000Z,https://www.ebay.com/itm/Vintage-Conn-Baritone...,Chinese,"Deep River, Connecticut",https://thumbs3.ebaystatic.com/pict/3036359274...,['https://i.ebayimg.com/00/s/MTA2NlgxNjAw/z/gl...,16231,Musical Instruments & Gear:Wind & Woodwind:Ban...,35,...,,,,,,,,,,
2,254668152053,2020-08-04T13:00:31.000Z,https://www.ebay.com/itm/LAMONTE-TENOR-SAXOPHO...,Chinese,"Trenton, New Jersey",https://thumbs2.ebaystatic.com/pict/2546681520...,['https://i.ebayimg.com/00/s/MTYwMFgxMTk5/z/Yd...,16231,Musical Instruments & Gear:Wind & Woodwind:Ban...,0,...,,,,,,,,,,
3,233663229380,2020-08-04T13:12:08.000Z,https://www.ebay.com/itm/Used-saxophon-alto-Ya...,Chinese,Bern,https://thumbs1.ebaystatic.com/pict/2336632293...,['https://i.ebayimg.com/00/s/NzE5WDEwMjQ=/z/rj...,16231,Musical Instruments & Gear:Wind & Woodwind:Ban...,0,...,,,,,,,,,,
4,233663229383,2020-08-04T13:12:08.000Z,https://www.ebay.com/itm/Used-saxophon-alto-Ya...,Chinese,Bern,https://thumbs4.ebaystatic.com/pict/2336632293...,['https://i.ebayimg.com/00/s/NzE4WDEwMjQ=/z/WA...,16231,Musical Instruments & Gear:Wind & Woodwind:Ban...,0,...,,,,,,,,,,


In [8]:

df['EndTime'] = pd.to_datetime(df['EndTime'])

In [84]:
import plotly.graph_objects as go
import plotly.express as px


def typePieFig():
    colors = ['gold', 'mediumturquoise', 'darkorange', 'lightgreen']

    fig = go.Figure(data=[go.Pie(labels=df['ItemSpecifics-Type'].value_counts().index, 
    values = df['ItemSpecifics-Type'].value_counts().values,
    hole=.3,
    )])
    fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=20,
                    marker=dict(colors=colors, line=dict(color='#000000', width=2)))
    return fig

In [159]:
def sunburstFig():
    df1 = df.groupby(['ItemSpecifics-Brand','ItemSpecifics-Type'])['ConvertedCurrentPrice-value'].median().reset_index()
    df1['Count'] = df.groupby(['ItemSpecifics-Brand','ItemSpecifics-Type']).size().values
    fig = px.sunburst(df1, 
    path=['ItemSpecifics-Brand', 'ItemSpecifics-Type'], 
    values='Count',
    color = 'ConvertedCurrentPrice-value',
    color_continuous_scale='RdBu')
    return fig

In [168]:
def choroplethFig():
    fig = go.Figure(data = go.Choropleth(
        locations = df['ItemSpecifics-Country/Region of Manufacture'].value_counts().index,
        z =  df['ItemSpecifics-Country/Region of Manufacture'].value_counts().values,
        colorscale = 'purp',
        #colorbar = {'tickvals':[0,2,4,6,8,10],'ticktext':["0", "2", "4", "6", "8", "10+"]},
        #colorscale = ["rgb(230,230,230)", "rgb(115, 89, 164)"],
        #zmin = 0,
        #zmax = 10,
        # hoverinfo = 'skip',
        # autocolorscale=False,
        locationmode = 'country names',
        marker_line_width = 0.4,
    ))
    return fig


In [193]:
import numpy as np

df["ConvertedCurrentPrice-value(LOG)"] = df["ConvertedCurrentPrice-value"].apply(np.log10)
fig = go.Figure()
df
fig = px.histogram(df[~df["ItemSpecifics-Type"].isna()], 
x="ConvertedCurrentPrice-value(LOG)",
color="ItemSpecifics-Type")
fig.update_traces(opacity=0.75)
fig.update_xaxes(
    title="Cost",
    tickvals=[np.log10(x) for x in (100,500,1000, 5000, 10000)],
    ticktext=["$100", "$500", "$1k", "$5k", "$10k"],
)



In [235]:
order = df['ItemSpecifics-Type'].value_counts().index.values
df.groupby("ItemSpecifics-Type")["ConvertedCurrentPrice-value(LOG)"].apply(list).loc[order]

data = df.groupby("ItemSpecifics-Type")["ConvertedCurrentPrice-value(LOG)"].apply(list).loc[order].values
types = order
colors = sns.color_palette("Purples", len(df['ItemSpecifics-Type'].value_counts().values)).as_hex()[::-1]
fig = go.Figure()
for data_line, color, sax in zip(data, colors, types):
    fig.add_trace(go.Violin(x=data_line, line_color=color, name = sax))
fig.update_traces(orientation='h', side='positive', width=3, points=False)
fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)
fig.update_layout(plot_bgcolor='#7b7d8d')
fig.update_xaxes(
        title="Price",
        tickvals=[np.log10(x) for x in (100,500,1000, 5000, 10000)],
        ticktext=["$100", "$500", "$1k", "$5k", "$10k"],
    )
fig.update_layout(title = 'Distribution of Price for Sax Types',
)
fig.write_html('price-histogram.html')

'001'

In [1]:
import boto3
import pandas as pd

s3 = boto3.client('s3')
s3.list_objects(Bucket = 'ebayfindingdata', Prefix = 'shopping/')

keys = [o['Key'] for o in s3.list_objects(Bucket = 'ebayfindingdata', Prefix = 'shopping')['Contents'] if '.csv' in o['Key']]

df = pd.concat([pd.read_csv(s3.get_object(Bucket = 'ebayfindingdata', Key = k)['Body']) for k in keys])

In [2]:
keys = [o['Key'] for o in s3.list_objects(Bucket = 'ebayfindingdata', Prefix = 'description')['Contents'] if '.csv' in o['Key']]

df1 = pd.concat([pd.read_csv(s3.get_object(Bucket = 'ebayfindingdata', Key = k)['Body']) for k in keys])

In [3]:
df = df.drop_duplicates(subset='ItemID', keep='last')

df = df.set_index('ItemID').join(df1.set_index('ItemID'))



In [4]:
df.isna().sum()

EndTime                                                 0
ViewItemURLForNaturalSearch                             0
ListingType                                             0
Location                                                0
GalleryURL                                             69
PictureURL                                              2
PrimaryCategoryID                                       0
PrimaryCategoryName                                     0
BidCount                                                0
ListingStatus                                           0
TimeLeft                                                0
Title                                                   0
Country                                                 0
AutoPay                                                 0
ConditionID                                            15
ConditionDisplayName                                   15
GlobalShipping                                       3986
ConditionDescr

In [5]:
specifics = pd.read_csv('SaxophoneSpecifics.csv', 
delimiter = '\t', 
error_bad_lines=False, 
warn_bad_lines=False).groupby('Name')['Value'].unique()

mouthpieces = ['Berg Larsen', 'Otto Link', 'Schreiber', 'Vandoren']



In [6]:
import numpy as np
sax_brands = ['P. Mauriat', 'Dakota', 'Antigua', 'Buescher', 'Vito', 'Amati', 'Armstrong', 'B&S', 'Berg Larsen', 'Boosey & Co', 'Buescher','Buffet', 'Buffet Crampon', 'Bundy', 'Cannonball', 'Conn', 'Elkhart', 'Gemeinhardt', 'Jupiter', 'Keilwerth', 'King', 'Martin', 'Odyssey', 'Selmer', 'Trevor James', 'Yamaha', 'Yanagisawa', 'Unbranded', 'Jean Baptiste']

def cleanUpBrand(brand):
    if brand != brand:
        return np.nan
    if brand.title() in sax_brands:
        return brand.title()
    brand = brand.title()

    intersect = set(brand.split(" ")).intersection(sax_brands)
    if len(intersect) == 1:
        # print(f'Mapping "{brand}" to "{list(intersect)[0]}"')
        return list(intersect)[0]
    if 'Conn' in brand:
        # print(f'Mapping "{brand}" to "Conn"')
        return 'Conn'
    if 'unbranded' in brand.lower():
        # print(f'Mapping "{brand}" to "Unbranded"')
        return 'Other'
    if 'selmer' in brand.lower():
        return 'Selmer'
    else:
        return 'Other'

def extractBrandFromTitle(title):

    possible_values = [x for x in sax_brands if x in title.title()]

    if len(possible_values) == 1:
        return possible_values[0]
    if len(possible_values) > 1:
        # Remove Issues Like Selmer Bundy, Buffet Crampon and Conn Elkhart
        possible_values = [x for x in possible_values if x not in ['Buffet', 'Selmer', 'Conn']]
        if len(possible_values) == 1:
            return possible_values[0]

    return np.nan

print(df['ItemSpecifics-Brand'].isna().sum())

df['ItemSpecifics-Brand'] = df['ItemSpecifics-Brand'].apply(cleanUpBrand)

print(df['ItemSpecifics-Brand'].isna().sum())

df.loc[df['ItemSpecifics-Brand'].isna(), 'ItemSpecifics-Brand'] = df[df['ItemSpecifics-Brand'].isna()]['Title'].apply(extractBrandFromTitle)

print(df['ItemSpecifics-Brand'].isna().sum())

297
297
209


In [7]:
import itemSpecificCleanUp  as iscu

df.loc[~df['ItemSpecifics-Type'].isna(), 'ItemSpecifics-Type'] = df[~df['ItemSpecifics-Type'].isna()]['ItemSpecifics-Type'].apply(iscu.cleanUpType)
df.loc[df['ItemSpecifics-Type'].isna(), 'ItemSpecifics-Type'] = df[df['ItemSpecifics-Type'].isna()]['Title'].apply(iscu.extractTypeFromTitle)



In [8]:
from bs4 import BeautifulSoup

df1 = df[df['ItemSpecifics-Brand']=='Selmer']
df1['Model'] = df1['Title'].apply(iscu.selmerModel)
print(df1['Model'].isna().mean())

df1 = df[df['ItemSpecifics-Brand']=='Yamaha']
df1['Model'] = df1['Title'].apply(iscu.yamahaModel)
print(df1['Model'].isna().mean())


0.33553719008264465
0.1559633027522936


In [9]:

s3 = boto3.client('s3')
s3.list_objects(Bucket = 'ebayfindingdata', Prefix = 'shopping/')

keys = [o['Key'] for o in s3.list_objects(Bucket = 'ebayfindingdata', Prefix = 'shopping')['Contents'] if '.csv' in o['Key']]

df = pd.concat([pd.read_csv(s3.get_object(Bucket = 'ebayfindingdata', Key = k)['Body']) for k in keys])

df.loc[~df['ItemSpecifics-Type'].isna(), 'ItemSpecifics-Type'] = df[~df['ItemSpecifics-Type'].isna()]['ItemSpecifics-Type'].apply(iscu.cleanUpType)
df.loc[~df['ItemSpecifics-Brand'].isna(),'ItemSpecifics-Brand'] = df[~df['ItemSpecifics-Brand'].isna()]['ItemSpecifics-Brand'].apply(iscu.cleanUpBrand)
df.loc[~df['ItemSpecifics-Skill Level'].isna(), 'ItemSpecifics-Skill Level'] = df[~df['ItemSpecifics-Skill Level'].isna()]['ItemSpecifics-Skill Level'].apply(iscu.cleanUpSkill)

df.loc[df['ItemSpecifics-Type'].isna(), 'ItemSpecifics-Type'] = df[df['ItemSpecifics-Type'].isna()]['Title'].apply(iscu.extractTypeFromTitle)
df.loc[df['ItemSpecifics-Brand'].isna(),'ItemSpecifics-Brand'] = df[df['ItemSpecifics-Brand'].isna()]['Title'].apply(iscu.extractBrandFromTitle)

df['Model'] = np.nan
df.loc[df['ItemSpecifics-Brand']=='Selmer', 'Model'] = df.loc[df['ItemSpecifics-Brand']=='Selmer', 'Title'].apply(iscu.selmerModel)
df.loc[df['ItemSpecifics-Brand']=='Yamaha', 'Model'] = df.loc[df['ItemSpecifics-Brand']=='Yamaha', 'Title'].apply(iscu.yamahaModel)
df.loc[df['ItemSpecifics-Brand']=='Yanagisawa', 'Model'] = df.loc[df['ItemSpecifics-Brand']=='Yanagisawa', 'Title'].apply(iscu.yanagisawaModel)

available_indicators = ['ItemSpecifics-Type', 'ItemSpecifics-Brand', 'ItemSpecifics-Skill Level', 'ConditionDisplayName']

In [10]:
import plotly.express as px
import plotly.graph_objects as go



def sunburstFig(df, parent_hierarchy, color_val = 'ConvertedCurrentPrice-value'):
    # df1 = df.groupby(['ItemSpecifics-Brand','ItemSpecifics-Type', 'Model'])['ConvertedCurrentPrice-value'].median().reset_index()
    
    values = []
    ids = []
    labels = []
    parents = []
    color = []

    for i in range(len(parent_hierarchy) , 0, -1):

        df1 = df.groupby(parent_hierarchy[:i])[color_val].median().reset_index()
        df1['Count'] = df.groupby(parent_hierarchy[:i]).size().values

        values += df1['Count'].values.tolist()
        ids += df1.apply(lambda x: ' - '.join([str(x[col]) for col in parent_hierarchy[:i]]), axis=1).values.tolist()
        labels += df1[parent_hierarchy[i-1]].values.tolist()
        parents += df1.apply(lambda x: ' - '.join([str(x[col]) for col in parent_hierarchy[:i-1]]), axis=1).values.tolist()
        color += df1['ConvertedCurrentPrice-value'].values.tolist()

    fig = go.Figure(data = go.Sunburst(
    ids = ids,
    values = values,
    parents = parents,
    labels = labels,
    branchvalues="total",
    # color = color
    marker = dict(
        # colors = color,
        # colorscale = 'Purp'
    )
    
    ))
    fig.update_layout(coloraxis_colorbar=dict(
        title="Price",),
        title = "Brand, Type, Price Sunburst Chart"
    )

    return fig

parent_hierarchy = ['ItemSpecifics-Brand','ItemSpecifics-Type']

fig = sunburstFig(df, parent_hierarchy)

fig.write_html('brand-type-sunburst.html')

In [11]:
df[df['ItemSpecifics-Type'].isna()]['Title']
df['ItemSpecifics-Type'].isna().mean()

0.12260536398467432

In [12]:
s3.list_objects(Bucket = 'ebayfindingdata', Prefix = 'description/')
description_keys = [o['Key'] for o in s3.list_objects(Bucket = 'ebayfindingdata', Prefix = 'description')['Contents'] if '.csv' in o['Key']]
df_desc = pd.concat([pd.read_csv(s3.get_object(Bucket = 'ebayfindingdata', Key = k)['Body']) for k in description_keys])
df = df[~df['ItemID'].duplicated()].set_index('ItemID').join(df_desc[~df_desc['ItemID'].duplicated()].set_index('ItemID'))


In [13]:
df['Description'].sample(1).values

array(['Professional Eb Baritone Saxophone gold body Low A +case Material: high grade yellow BrassSurface: Gold surface，Tone: EbLow A key,High F key,high pitch F#,Front F#Free professional CaseFree MouthpieceFree ReedsFree necksFree Cleaning cloth,Great material, first-class workmanship. Great tone We will reply to your enquiries as immediately as possible. However, please note that we have 12-16 hours time difference with Western Europe and a 8 hours time difference with U.S.A. Therefore, we promise that we will do our best to answer your emails within 24 hours. I ship the item by Economy shipping , you can use the track number I provide to you, to watch the process of shiping. Normally, it will take 10-30 working days for you to get the item. INTERNATIONAL BUYERS - PLEASE NOTE: In order to smooth the rapid delivery of your goods, we hope you can provide a phone number.'],
      dtype=object)

In [14]:
df['ConditionID'].value_counts()

3000.0    1938
1000.0    1832
1500.0     183
2500.0     133
7000.0     111
2000.0       7
5000.0       4
4000.0       2
2750.0       1
Name: ConditionID, dtype: int64

In [15]:
import dash

dash.__version__

'1.14.0'

In [16]:


df['ConditionID'].apply(cleanUpCondition)

NameError: name 'cleanUpCondition' is not defined

In [17]:
df.columns

Index(['EndTime', 'ViewItemURLForNaturalSearch', 'ListingType', 'Location',
       'GalleryURL', 'PictureURL', 'PrimaryCategoryID', 'PrimaryCategoryName',
       'BidCount', 'ListingStatus', 'TimeLeft', 'Title', 'Country', 'AutoPay',
       'ConditionID', 'ConditionDisplayName', 'GlobalShipping',
       'ConditionDescription', 'ConvertedCurrentPrice-_currencyID',
       'ConvertedCurrentPrice-value', 'ItemSpecifics', 'ItemSpecifics-Brand',
       'ItemSpecifics-Type', 'ItemSpecifics-Skill Level',
       'ItemSpecifics-Body Finish', 'ItemSpecifics-Body Material',
       'ItemSpecifics-Key Finish', 'ItemSpecifics-Custom Bundle',
       'ItemSpecifics-Modified Item', 'ItemSpecifics-Modified Description',
       'ItemSpecifics-Country/Region of Manufacture', 'QuantityAvailableHint',
       'QuantityThreshold', 'BuyItNowAvailable',
       'ConvertedBuyItNowPrice-_currencyID', 'ConvertedBuyItNowPrice-value',
       'ItemSpecifics-NameValueList-Name', 'ItemSpecifics-NameValueList-Value',
    

In [18]:
import ast
import requests
import numpy as np
# import cv2
url = df['PictureURL'].apply(lambda x: ast.literal_eval(x) if x==x else x).sample(1).values[0][0]
response = requests.get(url)
# response.content
len(response.content)

115061

In [19]:
cond1 = df['PictureURL'].apply(lambda x: len(ast.literal_eval(x)) if x==x else x)>10
cond2 = df['ItemSpecifics-Type'].isin(['Alto', 'Tenor', 'Soprano', 'Baritone/Bass'])
df[(cond1) & (cond2)][['PictureURL', 'ItemSpecifics-Type']]



Unnamed: 0_level_0,PictureURL,ItemSpecifics-Type
ItemID,Unnamed: 1_level_1,Unnamed: 2_level_1
303116830074,['https://i.ebayimg.com/00/s/MTM4MlgxMjk3/z/HU...,Alto
303635927434,['https://i.ebayimg.com/00/s/MTA2NlgxNjAw/z/gl...,Baritone/Bass
254668152053,['https://i.ebayimg.com/00/s/MTYwMFgxMTk5/z/Yd...,Tenor
233663229380,['https://i.ebayimg.com/00/s/NzE5WDEwMjQ=/z/rj...,Alto
233663229383,['https://i.ebayimg.com/00/s/NzE4WDEwMjQ=/z/WA...,Alto
...,...,...
312339546823,['https://i.ebayimg.com/00/s/MTA3NFgxMDAw/z/he...,Alto
312339561349,['https://i.ebayimg.com/00/s/OTMzWDkwMA==/z/I8...,Tenor
324241885097,['https://i.ebayimg.com/00/s/NTk1WDQ4Mw==/z/Np...,Tenor
324241885146,['https://i.ebayimg.com/00/s/NDI3WDY0MA==/z/U4...,Tenor


In [20]:
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit

cond1 = df['PictureURL'].apply(lambda x: len(ast.literal_eval(x)) if x==x else x)>8
cond2 = df['ItemSpecifics-Type'].isin(['Alto', 'Tenor', 'Soprano', 'Baritone/Bass'])

X = df[(cond1) & (cond2)]['PictureURL']
y = df[(cond1) & (cond2)]['ItemSpecifics-Type']


X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify = y, train_size = 0.8)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=42, stratify = y_train, train_size = 0.8)

In [41]:
from PIL import Image
import requests 
from io import BytesIO
import matplotlib.pyplot as plt
import matplotlib.animation as animation

url = 'https://i.ebayimg.com/00/s/MTIwMFgxNjAw/z/bQ0AAOSwUwRfQGNF/$_57.JPG?set_id=8800005007'

def preprocessImage(url, newsize=(128,128)):
    response = requests.get(url)
    img = Image.open(BytesIO(response.content))
    img = img.resize(newsize)
    return np.array(img)

img = preprocessImage(url)

ValueError: Found array with dim 3. the normalize function expected <= 2.

In [22]:
def showImagesHorizontally(list_of_files):
    fig = plt.figure(figsize=(20,10))
    number_of_files = len(list_of_files)
    for i in range(number_of_files):
        a=fig.add_subplot(1,number_of_files,i+1)
        plt.imshow(list_of_files[i],cmap='Greys_r')
        plt.axis('off')

showImagesHorizontally(images)

<Figure size 1440x720 with 0 Axes>

In [30]:
from tensorflow import keras
from tensorflow.keras import layers

def imageInput():
    img_input = keras.Input(shape=(64, 64, 3))
    x = layers.Conv2D(16, 3, activation="relu")(img_input)
    x = layers.Conv2D(32, 3, activation="relu")(x)
    x = layers.MaxPooling2D(3)(x)
    conv_output = layers.Conv2D(32, 3, activation="relu")(x)
    return keras.Model(img_input, conv_output)


inputs = [keras.Input(shape=(64, 64, 3)) for i in range(12)]

x = layers.concatenate([imageInput()(inputs[i]) for i in range(12)])
pred = layers.Dense(4, name="priority")(x)
model = keras.Model(
    inputs=inputs,
    outputs=pred,
)
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=[
        keras.losses.CategoricalCrossentropy(from_logits=True),
    ],
)

In [103]:
keys = [o['Key'] for o in s3.list_objects(Bucket = 'ebayfindingdata', Prefix = 'shopping')['Contents'] if '.csv' in o['Key']]
desc_keys = [o['Key'] for o in s3.list_objects(Bucket = 'ebayfindingdata', Prefix = 'description')['Contents'] if '.csv' in o['Key']]

df1 = pd.concat([pd.read_csv(s3.get_object(Bucket = 'ebayfindingdata', Key = k)['Body']) for k in keys])
desc_df = pd.concat([pd.read_csv(s3.get_object(Bucket = 'ebayfindingdata', Key = k)['Body']) for k in desc_keys])


In [105]:
df1.join(desc_df.set_index('ItemID'), on = 'ItemID')

Unnamed: 0,ItemID,EndTime,ViewItemURLForNaturalSearch,ListingType,Location,GalleryURL,PictureURL,PrimaryCategoryID,PrimaryCategoryName,BidCount,...,ConvertedBuyItNowPrice-value,ItemSpecifics-NameValueList-Name,ItemSpecifics-NameValueList-Value,DiscountPriceInfo-OriginalRetailPrice-_currencyID,DiscountPriceInfo-OriginalRetailPrice-value,DiscountPriceInfo-PricingTreatment,DiscountPriceInfo-SoldOneBay,DiscountPriceInfo-SoldOffeBay,Quantity,Description
0,303116830074,2020-08-04T12:56:22.000Z,https://www.ebay.com/itm/Jupiter-JAS-769GN-sax...,FixedPriceItem,"Saint Louis, Missouri",https://thumbs3.ebaystatic.com/pict/3031168300...,['https://i.ebayimg.com/00/s/MTM4MlgxMjk3/z/HU...,16231,Musical Instruments & Gear:Wind & Woodwind:Ban...,0,...,,,,,,,,,,very nice Jupiter Jas-769GN Saxophone Eb alto ...
0,303116830074,2020-08-04T12:56:22.000Z,https://www.ebay.com/itm/Jupiter-JAS-769GN-sax...,FixedPriceItem,"Saint Louis, Missouri",https://thumbs3.ebaystatic.com/pict/3031168300...,['https://i.ebayimg.com/00/s/MTM4MlgxMjk3/z/HU...,16231,Musical Instruments & Gear:Wind & Woodwind:Ban...,0,...,,,,,,,,,,very nice Jupiter Jas-769GN Saxophone Eb alto ...
1,303635927434,2020-08-04T12:57:58.000Z,https://www.ebay.com/itm/Vintage-Conn-Baritone...,Chinese,"Deep River, Connecticut",https://thumbs3.ebaystatic.com/pict/3036359274...,['https://i.ebayimg.com/00/s/MTA2NlgxNjAw/z/gl...,16231,Musical Instruments & Gear:Wind & Woodwind:Ban...,35,...,,,,,,,,,,"Fresh from a local estate, you are bidding on ..."
1,303635927434,2020-08-04T12:57:58.000Z,https://www.ebay.com/itm/Vintage-Conn-Baritone...,Chinese,"Deep River, Connecticut",https://thumbs3.ebaystatic.com/pict/3036359274...,['https://i.ebayimg.com/00/s/MTA2NlgxNjAw/z/gl...,16231,Musical Instruments & Gear:Wind & Woodwind:Ban...,35,...,,,,,,,,,,"Fresh from a local estate, you are bidding on ..."
2,254668152053,2020-08-04T13:00:31.000Z,https://www.ebay.com/itm/LAMONTE-TENOR-SAXOPHO...,Chinese,"Trenton, New Jersey",https://thumbs2.ebaystatic.com/pict/2546681520...,['https://i.ebayimg.com/00/s/MTYwMFgxMTk5/z/Yd...,16231,Musical Instruments & Gear:Wind & Woodwind:Ban...,0,...,,,,,,,,,,This is a Lamonte tenor sax. Made in Italy. Th...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181,353153512515,2020-09-29T07:12:08.000Z,https://www.ebay.com/itm/Set-2-Selmer-Paris-Al...,FixedPriceItem,Tokyo,https://thumbs4.ebaystatic.com/pict/3531535125...,['https://i.ebayimg.com/00/s/NTQwWDcyMA==/z/LZ...,16231,Musical Instruments & Gear:Wind & Woodwind:Ban...,0,...,,,,,,,,,,DescriptionBEAUTIFULNo noticeable scratch or d...
182,224061856934,2020-09-29T07:50:46.000Z,https://www.ebay.com/itm/Yamaha-Yas-62-Yas-62S...,FixedPriceItem,Seoul,,['https://i.ebayimg.com/00/s/MTYwMFgxNjAw/z/wh...,16231,Musical Instruments & Gear:Wind & Woodwind:Ban...,0,...,,,,,,,,,,Yamaha Yas-62/Yas-62S 04 Alto Saxophone Plated...
183,114327903428,2020-09-29T08:45:51.000Z,https://www.ebay.com/itm/Yamaha-YAS-200ADII-Ad...,FixedPriceItem,"Lafayette, Louisiana",https://thumbs1.ebaystatic.com/pict/1143279034...,['https://i.ebayimg.com/00/s/MTIwMFgxNjAw/z/6w...,16231,Musical Instruments & Gear:Wind & Woodwind:Ban...,0,...,,,,,,,,,,This Sax was bought for my son approx 8 years ...
184,233633659685,2020-09-29T08:58:23.000Z,https://www.ebay.com/itm/SYLPHIDE-Alto-Sax-A-2...,FixedPriceItem,Yokohama,https://thumbs2.ebaystatic.com/pict/2336336596...,['https://i.ebayimg.com/00/s/ODAwWDEyMDA=/z/wE...,16231,Musical Instruments & Gear:Wind & Woodwind:Ban...,0,...,,,,,,,,,,Item DescriptionSylpheed Alto Sax A-2000 Disas...
