In [64]:
import datetime
import requests
from bs4 import BeautifulSoup
import json

In [79]:
def scrap(url):
    html = requests.get(url).text
    
    soup = BeautifulSoup(html, "lxml")
    
    script = soup.find("script",{"type": "application/ld+json"})

    try:
        items = json.loads(script.text.rstrip('\n'))
    except:
        return dict()
    
    return items

def extractProduct(items):
    item_list = list()

    for item in items:
        d = dict()
        d['sku'] = item['item']['sku']
        d['nombre'] = item['item']['name']
        d['marca'] = item['item']['brand']
        d['precio'] = int(item['item']['offers']['price'])
        d['imagen'] = item['item']['image']
        d['url'] = item['item']['url']
        d['time'] = datetime.datetime.now()
        
        item_list.append(d)
    
    return item_list

def processScrapResults(items):    
    if isinstance(items, type(dict)):
        return list(), ""
    if not bool(items):
        return list(), ""
    if len(items.keys()) <= 0:
        return list(), ""
        
    url_redirect = items['url']
    
    item_list = extractProduct(items["itemListElement"])
    
    print(len(item_list))
        
    return item_list, url_redirect

In [80]:


search_term = "comoda"
url = "https://simple.ripley.cl/search/" + search_term

items = list()

for value in range (1,20):     
    
    print("Search: ", url)
    
    scraped_items, url = (processScrapResults(scrap(url)))
    
    if len(url) <= 0:
        break
    
    if value == 1:
        url = url + "&page=2"
    elif value > 1:
        urlToken = url.split('?')
        params = urlToken[1].split('&')
        params[0] = params[0][:-1]
        url = urlToken[0] + '?' + str(params[0]) + str(value+1) + "".join(["&"+param for param in params[1:]])
        
    
    items.extend(scraped_items)
    


Search:  https://simple.ripley.cl/search/comoda
24
Search:  http://simple.ripley.cl/dormitorio/muebles-de-dormitorio/comoda-y-tocadores?source=search&term=comoda&page=2
24
Search:  http://simple.ripley.cl/dormitorio/muebles-de-dormitorio/comoda-y-tocadores?page=3&source=search&term=comoda
24
Search:  http://simple.ripley.cl/dormitorio/muebles-de-dormitorio/comoda-y-tocadores?page=4&source=search&term=comoda
24
Search:  http://simple.ripley.cl/dormitorio/muebles-de-dormitorio/comoda-y-tocadores?page=5&source=search&term=comoda
24
Search:  http://simple.ripley.cl/dormitorio/muebles-de-dormitorio/comoda-y-tocadores?page=6&source=search&term=comoda
24
Search:  http://simple.ripley.cl/dormitorio/muebles-de-dormitorio/comoda-y-tocadores?page=7&source=search&term=comoda
24
Search:  http://simple.ripley.cl/dormitorio/muebles-de-dormitorio/comoda-y-tocadores?page=8&source=search&term=comoda


In [81]:
import pandas as pd
df = pd.DataFrame(items)
df

Unnamed: 0,sku,nombre,marca,precio,imagen,url,time
0,2000339001028P,COMODA RIPLEY HOME MIMOSA ALTA,RIPLEY HOME,219990,//home.ripley.cl/store/Attachment/WOP/D360/200...,http://simple.ripley.cl/dormitorio/muebles-de-...,2020-03-14 21:21:49.774012
1,2000341669728P,COMODA RIPLEY HOME MISSION,RIPLEY HOME,199990,//home.ripley.cl/store/Attachment/WOP/D360/200...,http://simple.ripley.cl/dormitorio/muebles-de-...,2020-03-14 21:21:49.774012
2,2000343581226P,TOCADOR RIPLEY HOME MIMOSA CAFE,RIPLEY HOME,199990,//home.ripley.cl/store/Attachment/WOP/D360/200...,http://simple.ripley.cl/dormitorio/muebles-de-...,2020-03-14 21:21:49.774012
3,MPM00001809674,COMODA MISURI 6 CAJONES MOBIKIT,MOBIKIT,79990,//ripleycl.imgix.net/http%3A%2F%2Fs3.amazonaws...,http://simple.ripley.cl/dormitorio/muebles-de-...,2020-03-14 21:21:49.774012
4,2000364239526P,TOCADOR RIPLEY HOME VANITY CON BANQUETA,RIPLEY HOME,69990,//home.ripley.cl/store/Attachment/WOP/D360/200...,http://simple.ripley.cl/dormitorio/muebles-de-...,2020-03-14 21:21:49.774012
...,...,...,...,...,...,...,...
163,2000376466583P,COMODA FAVATEX ESTER 5C BLANCO,FAVATEX,69990,//home.ripley.cl/store/Attachment/WOP/D360/200...,http://simple.ripley.cl/dormitorio/muebles-de-...,2020-03-14 21:21:51.911300
164,2000374085601P,TOCADOR FAVATEX AMAPOLA BLANCO 1C,FAVATEX,129990,//home.ripley.cl/store/Attachment/WOP/D360/200...,http://simple.ripley.cl/dormitorio/muebles-de-...,2020-03-14 21:21:51.911300
165,2000376466903P,COMODA FAVATEX AVILA 4C AMBAR/BLANCO,FAVATEX,79990,//home.ripley.cl/store/Attachment/WOP/D360/200...,http://simple.ripley.cl/dormitorio/muebles-de-...,2020-03-14 21:21:51.911300
166,2000376467269P,COMODA FAVATEX AREA 7C 1P NOGAL,FAVATEX,149990,//home.ripley.cl/store/Attachment/WOP/D360/200...,http://simple.ripley.cl/dormitorio/muebles-de-...,2020-03-14 21:21:51.911300


In [82]:
df.describe()

Unnamed: 0,precio
count,168.0
mean,155150.535714
std,105440.410252
min,29990.0
25%,79990.0
50%,120490.0
75%,220990.0
max,780000.0


In [45]:
df[df.duplicated()]

Unnamed: 0,0,1,2,3,4,5
