In [1]:
import json
import pandas as pd
import boto3
from decimal import Decimal
import requests
from bs4 import BeautifulSoup
import time
from dateutil import parser

dynamodb = boto3.resource('dynamodb', region_name='us-east-2')

In [2]:
def pullDynamo():
    #pull existing data from Dynamodb
    table = dynamodb.Table('usedPlates')
    response = table.scan()
    df = pd.DataFrame(response['Items'])
    return df, df.count()[0]

In [13]:
def pullData():
    # pull old data
    newListingsN = 0
    dynamoOutput = pullDynamo()
    #add something to make use of this, change in N due to process
    existingListingsN = dynamoOutput[1]
    existingDataDF = dynamoOutput[0]
    # sort such that newest listing is at the bottom, to which we append newer listings
    existingDataDF.sort_values(by=['Listings Collected Number'], ascending=True, inplace=True)
    
    #dict to hold the data in
    adict = {"Item Title":[], "Item Price":[], "Listings Collected Number":[], "Sold Date":[]}
    #ebay page url, for loop adds page number
    ebayUrl = "https://www.ebay.co.uk/sch/i.html?_from=R40&_nkw=20kg+weight+plates&_in_kw=1&_ex_kw=&_sacat=0&LH_Sold=1&_udlo=&_udhi=&LH_ItemCondition=4&_samilow=&_samihi=&_sadis=15&_stpos=BS313AX&_sargn=-1%26saslc%3D1&_fsradio2=%26LH_LocatedIn%3D1&_salic=3&LH_SubLocation=1&_sop=13&_dmd=1&_ipg=60&LH_Complete=3&_pgn="
    
    # need to catch failed url?
    toBreak = False
    pageN = 0
    
        #range can change to 1 or 2 when automatically pull
    pageUrl = ebayUrl+str(pageN)
    r= requests.get(pageUrl)
    data=r.text
    soup=BeautifulSoup(data, 'html.parser')
    listings = soup.find_all(class_="s-item s-item__pl-on-bottom")
    while toBreak == False:
        for each in listings:
            each_title_span = each.find(class_='s-item__title')
            each_title = each_title_span.next_element.next_element
            # following checks if for each listing, if that listing is already in the database
            #would be good to get a better way of seeing if each item has already been collected, this seems hacky
            #assumes newer listings are at the start of the for loop
            # matching the listing name doesn't work due to package(repeat) sellers, collect purchase id?
            #although presumably not the same issue with used barbells

            # if current listing was the last added in the database )
            #
            print(each_title)
            # if this particular listing is the same as the newest already added listing
            if each_title == existingDataDF.iloc[-1,:]['Item Title']:
                #print(existingDataDF.iloc[-1,:]['Item Title'])
            #if len(existingDataDF.loc[existingDataDF['Item Title'] == each_title]) > 0:
                toBreak = True
                break

            each_price_span = each.find(class_='s-item__price')
            each_price = each_price_span.next_element.next_element

            each_date_span = each.find(class_='s-item__title--tagblock')
            try:
                each_date = parser.parse(each_date_span.next_element.next_element[5:]).isoformat()
            except:
                # sometimes returns a None object
                pass

            #specifying the £ sound to be the first character in item price ensures the html not accidentally picked up there.
            # and we don't want to remove a price without removing the item, so best to exclude the item before it's added to our lists.
            if str(each_price)[:1] == "£":
                #existingListingsN +=1
                newListingsN += 1
                adict['Item Title'].append(each_title)
                adict['Item Price'].append(each_price)
                adict['Listings Collected Number'].append(newListingsN) 
                adict['Sold Date'].append(each_date)
            else:
                #not sure why I don't get data after some point on the 4th page tried re-writing, oh well
                pass

        pageN += 1


    #here need to make sure the last listing added (the oldest) is added first
    df = pd.DataFrame(adict)
    if df.count()[0] > 0:
        # switch around so last collected (oldest) are at the top
        df.sort_values(by=['Listings Collected Number'], ascending=False, inplace=True)
        # get the numbers the right way round
        #Doesn't this need to be + 1? No, because existingsListingsN is a count variable, ie n is 1 less than length
        df['Listings Collected Number'] = range(newListingsN) + existingListingsN 
        #and add those to the existing df so have a sequence of all listings in date order
        #df['Listings Collected Number'] = df['Listings Collected Number']+existingListingsN + 1
        # returns df of new data to be added
        print("expecting "+str(newListingsN)+" new listings to be added")
        return newListingsN, df
    else:
    # df should only list whatever it collected, ie whatever wasn't already in the csv
        newListingsN = 0
        print("expecting no new listings to be added")
        return newListingsN


In [14]:
pullData()

Shop on eBay
Cast Iron Weight Plates Used
220KG Cast Iron Weight Plates 25KG 20KG 15KG 10KG..
2x 20kg BodyRip Barbell Weight Plates
2 x 20kg BODYPOWER Olympic Weight Plates
Used TKO Rubber Olympic Plate Disc Various Weights KG Gym Lifting SINGLES
2x Hop Sport 20kg cast iron weight plates
20kg Cast Iron Weight Plates, 2 x 10kg
2 x Marcy 1" x 20kg Weight Plates
150KG 2" Weight Plates Set Olympic Bumper Weights 2x 5kg 10kg 15kg 20kg 25kg
Cast Iron Weight Plate 1-20KG Tri Grip and Gazebo Weight Plate REFRUB GRADE B
1 X 20kg Hardcastle Grey Vinyl Weight Lifting Bar Plate Disc Home Gym #983
150KG 2" Weight Plates Set Olympic Bumper Weights 1.5kg 5kg 10kg 12.5kg 20kg 
Wolverson Fitness 20Kg Bumper Plates x 2
expecting 12 new listings to be added


(12,
                                            Item Title Item Price  \
 11  150KG 2" Weight Plates Set Olympic Bumper Weig...     £80.00   
 10  1 X 20kg Hardcastle Grey Vinyl Weight Lifting ...     £17.99   
 9   Cast Iron Weight Plate 1-20KG Tri Grip and Gaz...      £6.99   
 8   150KG 2" Weight Plates Set Olympic Bumper Weig...    £120.00   
 7                   2 x Marcy 1" x 20kg Weight Plates     £41.00   
 6              20kg Cast Iron Weight Plates, 2 x 10kg     £36.00   
 5           2x Hop Sport 20kg cast iron weight plates     £60.00   
 4   Used TKO Rubber Olympic Plate Disc Various Wei...      £4.99   
 3            2 x 20kg BODYPOWER Olympic Weight Plates     £80.00   
 2               2x 20kg BodyRip Barbell Weight Plates     £56.00   
 1   220KG Cast Iron Weight Plates 25KG 20KG 15KG 1...     £99.00   
 0                        Cast Iron Weight Plates Used     £20.00   
 
     Listings Collected Number            Sold Date  
 11                        225  2022-12-13

In [15]:
pulled = pullDynamo()

In [18]:
pulled[0]['Listings Collected Number'].sort_values()

199      0
5        1
4        2
142      3
132      4
      ... 
143    220
11     221
71     222
223    223
195    224
Name: Listings Collected Number, Length: 225, dtype: object

In [19]:
pulled

(    Item Price            Sold Date Listings Collected Number  \
 0       £16.10  2022-11-30T00:00:00                       196   
 1       £16.10  2022-12-05T00:00:00                       209   
 2       £12.00  2022-10-22T00:00:00                        89   
 3       £12.00  2022-09-25T00:00:00                        24   
 4      £200.00  2022-09-15T00:00:00                         2   
 ..         ...                  ...                       ...   
 220    £120.00  2022-12-03T00:00:00                       201   
 221     £70.00  2022-11-22T00:00:00                       172   
 222     £56.00  2022-10-14T00:00:00                        70   
 223      £6.99  2022-12-12T00:00:00                       223   
 224    £175.00  2022-10-09T00:00:00                        56   
 
                                             Item Title  
 0    1 Inch Cast Iron Weight Plates - Dumbbell / Ba...  
 1    1 Inch Cast Iron Weight Plates - Dumbbell / Ba...  
 2                           8x 

In [11]:
existingDataDF.sort_values(by=['Sold Date'])

Unnamed: 0,Item Price,Sold Date,Listings Collected Number,Item Title
164,£65.00,2022-09-12T00:00:00,0,Gold’s Gym 20kg 1 Inch weight plate (tri-grip)
68,£440.00,2022-09-12T00:00:00,1,180kg Olympic Weight Set - 7ft 20kg Barbell wi...
102,£42.00,2022-09-12T00:00:00,2,Body Power 2 x 20kg Black Cast Iron weight dis...
58,£130.00,2022-09-13T00:00:00,3,olympic free weights 230 kg
32,£150.00,2022-09-14T00:00:00,4,2 x 20kg LHF Competition Olympic PU Bumper Wei...
...,...,...,...,...
71,£10.00,2022-12-10T00:00:00,227,*** *** 4x York Barbells with Vinyl Weights 4x...
207,£25.00,2022-12-10T00:00:00,228,Olympic Weight Plates Cast Iron Weights Tri Gr...
148,£0.99,2022-12-10T00:00:00,229,2x Gold’s Gym 20kg 1 Inch weight plate
11,£15.00,2022-12-10T00:00:00,230,15kg York ISO-Grip Steel Composite Rubber Coat...


In [12]:
existingDataDF

Unnamed: 0,Item Price,Sold Date,Listings Collected Number,Item Title
164,£65.00,2022-09-12T00:00:00,0,Gold’s Gym 20kg 1 Inch weight plate (tri-grip)
68,£440.00,2022-09-12T00:00:00,1,180kg Olympic Weight Set - 7ft 20kg Barbell wi...
102,£42.00,2022-09-12T00:00:00,2,Body Power 2 x 20kg Black Cast Iron weight dis...
58,£130.00,2022-09-13T00:00:00,3,olympic free weights 230 kg
32,£150.00,2022-09-14T00:00:00,4,2 x 20kg LHF Competition Olympic PU Bumper Wei...
...,...,...,...,...
241,£240.00,2022-11-08T00:00:00,556,120kg (6x20kg) Bodymax Olympic Rubber Coated ...
273,£41.00,2022-11-09T00:00:00,557,Olympic weight plates with curl bar: 2 x 10kg ...
263,£20.00,2022-11-09T00:00:00,560,Weights - Olympic 5kg plates x4
220,£50.00,2022-11-10T00:00:00,561,York Fitness 4 X 5kg Standard weight plates Ca...


In [None]:
newDF.index