In [1]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from scipy import stats
import datetime as dt

pd.set_option('display.max_colwidth', 100)

In [2]:
def scrape_ebay(url,pages):
    titles,dates,prices,shipping,condition,subtitles,links,sellers = [], [], [], [], [], [], [], []
    for page in range(1,pages+1):  #loop thru every page of max results (200)
        
        # Get html page data using BeautifulSoup
        req = requests.get(url.format(page))
        soup = BeautifulSoup(req.content, 'html.parser')
        listings = soup.find_all('li', attrs={'class': 's-item'})

        for listing in listings:  #loop thru each listing on page
            for name in listing.find_all('h3', attrs={'class':"s-item__title"}):  #filter for errors if listing is invalid
                if (str(name.find(text=True, recursive=False))!='None') and (listing!=listings[0]):  #listing[0] is page headers
                    
                    # Get auction title
                    name = listing.find('h3', attrs={'class':"s-item__title"})
                    title = str(name.find(text=True, recursive=False))
                    titles.append(title)
                    
                    # Get ending date
                    date = listing.find('span', attrs={'class':"s-item__ended-date"})
                    date = date.find(text=True)
                    dates.append(date)

                    # Get ending price
                    price = listing.find('span', attrs={'class':"s-item__price"})
                    price = price.find(text=True)
                    price = price.replace('$','')
                    price = price.replace(',','')
                    prices.append(price)

                    # Get shipping info
                    ship = listing.find('span', attrs={'class':"s-item__shipping"})
                    if ship==None: ship = listing.find_all('span', attrs={'class':"s-item__shipping"})
                    else: ship = ship.find(text=True)
                    shipping.append(ship)

                    # Get condition info
                    cond = listing.find('div', attrs={'class':"s-item__subtitle"})
                    #cond2 = cond.find(attrs={'class':"SECONDARY_INFO"})
                    cond = cond.find(text=True)
                    condition.append(cond)

                    # Get auction link to scrape more info
                    link = listing.find_all("a", attrs={'class':'s-item__link'})
                    links.append(str(link))
                    
    # Put all info into dataframe
    scraped = pd.DataFrame({"Date":dates,"Title":titles,"Price":prices,"Shipping":shipping,"Condition":condition,
                            "Link":links})
    scraped.Link = scraped.Link.apply(lambda x: x.split('href="')[1].split('"')[0])  #clean up html link
    # Add year 2020 in front and convert all date values to datetime object
    scraped.Date = scraped.Date.apply(lambda x: '2020-'+x)
    scraped.Date = pd.to_datetime(scraped.Date)
    scraped = scraped.sort_values(by=['Date'], ascending=False)
    return scraped

In [11]:
# Filtering for other colors (Black, Blue, White, Clear, Green, Multicolor, Orange, Pink, Purple, Red, Yellow, Not Specified)
pages = 4
url = "https://www.ebay.com/sch/i.html?_oaa=1&_dcat=139971&Model=Nintendo%2520Switch&Color=Clear%7CGreen%7CMulticolor%7COrange%7CPink%7CPurple%7CRed%7CYellow%7C%21%7CBlack%7CBlue%7CWhite&_fsrp=1&_nkw=nintendo+switch&LH_Complete=1&Region%2520Code=NTSC%252DU%252FC%2520%2528US%252FCanada%2529&LH_Sold=1&_ipg=200&_pgn={}&rt=nc"
df_misc = scrape_ebay(url,pages)

In [45]:
# Filtering for Switch Lite
pages = 6
url = "https://www.ebay.com/sch/i.html?_oaa=1&_dcat=139971&Model=Nintendo%2520Switch%2520Lite&_fsrp=1&_nkw=nintendo+switch&LH_Complete=1&Region%2520Code=NTSC%252DU%252FC%2520%2528US%252FCanada%2529&LH_Sold=1&_ipg=200&_pgn={}&rt=nc"
df_lite = scrape_ebay(url,pages)

In [78]:
df_misc.to_csv('df.csv',index=False)

Unnamed: 0,Date,Title,Price,Shipping,Condition,Link,Location,Seller,Feedback,FBScore
0,2020-05-11 18:46:00,**NINTENDO SWITCH LITE 32GB GRAY ANIMAL CROSSING BUNDLE WITH GAME & CASE! - NEW!,338.95,Free Shipping,Brand New,https://www.ebay.com/itm/NINTENDO-SWITCH-LITE-32GB-GRAY-ANIMAL-CROSSING-BUNDLE-WITH-GAME-CASE-NE...,"Miami, Florida, United States",man.us.3lt7doe,40,100.0
1,2020-05-11 18:41:00,Nintendo Switch Lite Console 32GB HDHSGAZAA - Gray - Brand New - Same Day Ship,229.95,Free Shipping,Brand New,https://www.ebay.com/itm/Nintendo-Switch-Lite-Console-32GB-HDHSGAZAA-Gray-Brand-New-Same-Day-Shi...,"Portland, Oregon, United States",thimothyboy,4174,100.0
2,2020-05-11 18:27:00,Nintendo Switch Lite Grey - for parts only in box,152.50,+$9.60 shipping,Parts Only,https://www.ebay.com/itm/Nintendo-Switch-Lite-Grey-for-parts-only-in-box/254588158993?hash=item3...,"Toledo, Ohio, United States",sleekgiant90,12,100.0
3,2020-05-11 17:20:00,Nintendo Switch Lite 32GB Handheld Video Game Console Brand New,230.00,Free Shipping,Brand New,https://www.ebay.com/itm/Nintendo-Switch-Lite-32GB-Handheld-Video-Game-Console-Brand-New/1742572...,"Hillsboro, Oregon, United States",raj_pre453,168,100.0
4,2020-05-11 16:57:00,Nintendo Switch Lite - Turquoise - New In Box (Fast Shipping),219.00,+$12.20 shipping,Brand New,https://www.ebay.com/itm/Nintendo-Switch-Lite-Turquoise-New-In-Box-Fast-Shipping/153916184323?ha...,"Portland, Oregon, United States",*redking,57,100.0
...,...,...,...,...,...,...,...,...,...,...
1159,2020-02-12 22:11:00,MINT condition Nintendo Switch Lite 32GB Handheld Video Game Console (Turquoise),179.00,Free Shipping,Pre-Owned,https://www.ebay.com/itm/NINTENDO-SWITCH-LITE-Turquoise-Teal-Brand-New-/372950038725?_trksid=p23...,"New York, New York, United States",vickiewagner,201,100.0
1160,2020-02-12 13:05:00,NINTENDO SWITCH LITE Turquoise Teal Brand New,185.00,Free Shipping,Brand New,https://www.ebay.com/itm/NINTENDO-SWITCH-LITE-Turquoise-Teal-Brand-New/372950038725?hash=item56d...,"Bayonne, New Jersey, United States",sagambin-67,43,100.0
1161,2020-02-12 08:52:00,MINT condition Nintendo Switch Lite 32GB Handheld Video Game Console (Gray) Ext!,250.00,Free Shipping,Pre-Owned,https://www.ebay.com/itm/Nintendo-Switch-Lite-Zacian-and-Zamazenta-Edition-Satisfye-SwitchGrip-/...,"Grosse Pointe, Michigan, United States",pozjoker10,548,100.0
1162,2020-02-12 08:06:00,Nintendo Switch Lite Zacian and Zamazenta Edition & Satisfye SwitchGrip,200.00,+$9.00 shipping,Pre-Owned,https://www.ebay.com/itm/Nintendo-Switch-Lite-Zacian-and-Zamazenta-Edition-Satisfye-SwitchGrip/2...,"Kansas City, Missouri, United States",cjs_2010,264,100.0


In [62]:
# Get additional details: item location, seller username, feedback count and score using direct page link
# Not defined as a function so variables can persist during troubleshooting and tendency for web pages to change/disappear
#i = 0  #index counter
#location, feedback, seller, fbscore = [], [], [], []

for url in df_lite.Link[i:]:

    # Pull additional details from link to auction details
    req = requests.get(url)
    soup = BeautifulSoup(req.content, 'html.parser')
    listing = soup.find_all('tr', attrs={'class': 'vi-ht20'})
        
    if listing==[]:  #filtering for older listings that need to grab another link to get to correct listing page
        listing = soup.find('div', attrs={'class': 'nodestar-item-card-details__view'})
        if listing==None:  #filtering for power seller with active store
            listing = soup.find_all('tr', attrs={'class': 'vi-ht20'})
        else:
            link = listing.a.get('href')
            df_lite.Link[i] = link
            req = requests.get(link)
            soup = BeautifulSoup(req.content, 'html.parser')
            listing = soup.find_all('tr', attrs={'class': 'vi-ht20'})
    
    if listing!=[]:  #filter for different page layouts
            
        # Get item location
        temp = listing[3].find_all('div', attrs={'class': 'u-flL'})
        loc = temp[1].find(text=True)
        if loc=='\n':  #slightly diff page layout
            loc = listing[4].find_all('div', attrs={'class': 'u-flL'})[1].get_text()
        location.append(loc)
        # Get feedback count
        try: feed = listing[4].find_all('a')[1].get_text()  #different page layouts
        except: feed = listing[5].find('div', attrs={'class': 'mbg vi-VR-margBtm3'}).find_all('a')[1].get_text()
        feedback.append(feed)
        # Get seller username
        try: name = listing[4].find('div', attrs={'class': 'mbg vi-VR-margBtm3'}).a.get_text()  #different page layouts
        except: name = listing[5].find('div', attrs={'class': 'mbg vi-VR-margBtm3'}).a.get_text()
        seller.append(name)
        # Get feedback score (need to navigate to seller's page first)
        try: link = listing[4].find('div', attrs={'class':'mbg vi-VR-margBtm3'}).a.get('href')  #different page layouts
        except: link = listing[5].find('div', attrs={'class': 'mbg vi-VR-margBtm3'}).a.get('href')
        req = requests.get(link)
        soup = BeautifulSoup(req.content, 'html.parser')
        listing = soup.find('div', attrs={'class':'perctg'})
        if listing.get_text()=='':  #filter for no feedback in last 12 months which does not report %
            score = 100
        else:
            score = listing.get_text().split('%')[0].split('\t')[2]
        fbscore.append(score)
        
    elif soup.find_all(text='We looked everywhere.') != []:  #filter for dead pages (drop row)
        print(i,'Dead Page!')
        break#df_misc.drop(df_misc.iloc[i:i].index, inplace=True)
        
    else:
        print(i)

        # Get item location
        listing = soup.find_all('div', attrs={'class': 'iti-eu-bld-gry'})
        if listing==[]:  #filter for pages requiring another link to listing
            listing = soup.find_all('div', attrs={'class': 'nodestar-item-card-details__view'})
            temp = soup.find_all('div', attrs={'class': 'app-status-messages'})[0].get_text()
            if listing==[] and temp=="We're sorry, something went wrong. Please try again.":  #filter for dead listing page
                print(i,'Dead Page!!!')
                break#df_misc.drop(df_misc.iloc[i:i].index, inplace=True)
            link = listing[0].a.get('href')
            req = requests.get(link)
            soup = BeautifulSoup(req.content, 'html.parser')
            listing = soup.find_all('div', attrs={'class': 'iti-eu-bld-gry'})
        loc = listing[0].find().get_text()
        location.append(loc)
        # Get seller username
        listing = soup.find('div', attrs={'class': 'mbg vi-VR-margBtm3'})
        name = listing.a.get_text()
        seller.append(name)
        # Get feedback count
        feed = listing.find_all('a')[1].get_text()
        feedback.append(feed)
        # Get feedback score
        listing = soup.find('div', attrs={'id':'si-fb'})
        if listing!=None:  #filter for pages with score displayed otherwise need to navigate to seller's page
            score = listing.get_text().split('%')[0]
        elif feed=='0':  #filter for new accounts (zero feedback) since page won't list %
            score = 100
        elif listing==None:  #navigate to seller's page
            link = soup.find('div', attrs={'class':'mbg vi-VR-margBtm3'}).a.get('href')
            req = requests.get(link)
            soup = BeautifulSoup(req.content, 'html.parser')
            listing = soup.find('div', attrs={'class':'perctg'})
            if listing.get_text()=='':  #filter for no feedback in last 12 months which does not report %
                score = 100
            else:
                score = listing.get_text().split('%')[0].split('\t')[2]
        else: 
            print(i,'!!!')
            break
        fbscore.append(score)
    i+=1


3
5
6
7
8
25
31
34
36
37
41
46
50
54
61
66
73
83
90
94
98
100
108
122
136
137
138
139
140
141
142
148
150
151
152
153
171
176
186
199
205
208
212
215
216
221
224
230
240
242
246
249
260
262
263
264
271
272
284
290
300
301
310
316
322
326
329
345
349


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


357
361
363
371
384
385
388
391
395
397
402
415
416
419
422
423
428
430
461
462
465
466
474
475
477
494
510
512
516
519
523
524
525
529
531
536
541
543
544
554
562
563
573
579
588
608
610
611
618
620
624
634
635
643
644
656
659
663
666
668
671
672
680
685
690
694
695
700
708
714
739
752
753
760
779
790
791
794
795
796
797
799
800
801
802
803
804
805
809
810
818
824
827
834
839
843
845
848
853
863
875
887
889
894
896
897
902
903
911
912
915
917
919
920
921
930
935
936
939
940
941
942
943
944
945
947
948
949
963
965
967
976
978
981
986
995
996
1001
1002
1011
1012
1015
1019
1020
1026
1028
1031
1032
1041
1046
1054
1055
1060
1061
1071
1074
1077
1080
1083
1103
1114
1115
1118
1127
1131
1133
1136
1141
1156
1160
1162


In [64]:
print(len(location),len(seller),len(feedback),len(fbscore), i)
print(location[-3], seller[-3], feedback [-3], fbscore[-3])
print(location[-2], seller[-2], feedback [-2], fbscore[-2])
print(location[-1], seller[-1], feedback [-1], fbscore[-1])

1164 1164 1164 1164 1164
Grosse Pointe, Michigan, United States  pozjoker10 548 100
Kansas City, Missouri, United States  cjs_2010 264 100
Brooklyn, New York, United States  riale12 465 100


In [71]:
df_lite['Location'] = location
df_lite['Seller'] = seller
df_lite['Feedback'] = feedback
df_lite['FBScore'] = fbscore

In [74]:
df_lite.to_csv('df_lite.csv',index=False)

In [69]:
df = df_lite.copy()

In [49]:
df_lite.Link.iloc[1138] == url
#df_lite.Link.iloc[1580] = 'https://www.ebay.com/itm/SHIPS-FREE-Nintendo-Switch-32GB-Console-w-Neon-Red-Blue-Joy-Cons-/274338374961?_trksid=p2349526.m4383.l10137.c10&nordt=true&rt=nc&orig_cvip=true'

True

In [1172]:
# DEAD PAGES - gray
# 252,1105,1493,1851,1955,2185,2393,2416,2579,2764,3059,3094,3147,3269,3298,3570,4148,4185,4387,4665
# 4798,4918,5197,5661,5733,5798,6856,7264,7307,7331,7364,7543,7573,7892,8273,8439,8500,8708,8745,8756,8766
# 8767,8770,8773,8830,8840,8860,8925,9000,9172,9231,9233,9237,9240,9244,9246,9253,9256,9257,9259
# DEAD PAGES - misc
# 10,295?,366,367,804,
# Lite
# 1138

In [36]:
# Retain only newly scraped listings after the last one (1st row)
cutoff = df.Date[0]
df_misc = df_misc[df_misc.Date > cutoff]

# Combine both dataframes
df = pd.concat([df_misc, df])
df = df.reset_index(drop=['index'])
df

Unnamed: 0,Date,Title,Price,Shipping,Condition,Link
0,2020-05-09 10:35:00,Nintendo SwitchÂ Console v2 Blue and Red Joyâ€‘Con **IN HAND** Fast Free Shipping,470.00,Free Shipping,Brand New,https://www.ebay.com/itm/Nintendo-Switch-Console-v2-Blue-and-Red-Joy-Con-IN-HAND-Fast-Free-Shipp...
1,2020-05-09 10:18:00,Nintendo switch,750.00,+$9.60 shipping,Pre-Owned,https://www.ebay.com/itm/Nintendo-switch/254592157293?hash=item3b46e0066d:g:--UAAOSwqjJettm7
2,2020-05-09 10:12:00,Nintendo Switch 32GB Console Neon Joy-con - Newest Version - In Hand FREE SHIP,459.99,Free Shipping,Brand New,https://www.ebay.com/itm/Nintendo-Switch-32GB-Console-Neon-Joy-con-Newest-Version-In-Hand-FREE-S...
3,2020-05-09 10:04:00,NINTENDO SWITCH LITE Turquoise Teal Handheld Video Game Console NEW ~ SHIPS FREE,219.00,+$9.60 shipping,Brand New,https://www.ebay.com/itm/NINTENDO-SWITCH-LITE-Turquoise-Teal-Handheld-Video-Game-Console-NEW-SHI...
4,2020-05-09 09:10:00,Nintendo Switch 32GB Console w/ Neon Blue & Neon Red Joy Con *BRAND NEW IN HAND*,450.00,+$5.00 shipping,Brand New,https://www.ebay.com/itm/Nintendo-Switch-32GB-Console-w-Neon-Blue-Neon-Red-Joy-Con-BRAND-NEW-IN-...
...,...,...,...,...,...,...
3846,2020-01-31 16:41:00,Nintendo Switch Lite Turquoise - Game Console Portable 2019,179.99,Free Shipping,Brand New,https://www.ebay.com/itm/Nintendo-Switch-Lite-Turquoise-Game-Console-Portable-2019/274198405623?...
3847,2020-01-31 14:20:00,Nintendo Switch Red Blue Joy Con Mario Kart 8 Deluxe Bundle NEW SEALED 32GB Neon,319.99,Free Shipping,Brand New,https://www.ebay.com/itm/Nintendo-Switch-Red-Blue-Joy-Con-Mario-Kart-8-Deluxe-Bundle-NEW-SEALED-...
3848,2020-01-31 11:47:00,Nintendo Switch MarioKart Deluxe Edition,265,+$10.00 shipping,Pre-Owned,https://www.ebay.com/itm/Nintendo-Switch-MarioKart-Deluxe-Edition/133316469962?epid=2306616128&a...
3849,2020-01-30 23:32:00,Nintendo Switch Pokemon: Let's Go Evee Comes With Everything But GAME & Ball,270,+$20.00 shipping,Pre-Owned,https://www.ebay.com/itm/Nintendo-Switch-Pokemon-Lets-Go-Evee-Comes-With-Everything-But-GAME-Bal...
