In [30]:
from bs4 import BeautifulSoup
import requests
import warnings
import random
import string
import time

warnings.filterwarnings('ignore')

TIMEOUT = 5
CNET_BASE_URL = 'https://www.cnet.com'

def fetch_webpage(url, retry_count=5):
    html = None
    while not html and retry_count > 0:
        try:
            html = requests.get(url, verify=False, timeout=TIMEOUT).content
        except requests.exceptions.RequestException:
            print('Could not connect to {}. Retry count: {}'.format(url, retry_count))
            retry_count -= 1
    return html

def get_review_links(html):
    soup = BeautifulSoup(html, 'lxml')
    links = [CNET_BASE_URL + a['href'] 
             for a in soup.select('section.product a.imageWrap')]
    return links

def get_review_page(html):
    soup = BeautifulSoup(html, 'lxml')
    [s.extract() for s in soup(['script', 'figure'])]
    text = soup.select('#editorReview > p, #editorReview > ul')
    text = None if len(text) == 0 else '\n\n'.join([t.get_text().strip() for t in text])
    next_page = soup.select('a.nextPage')
    next_page = None if len(next_page) == 0 or next_page[0]['rel'][0] == 'prev' else CNET_BASE_URL + next_page[0]['href']
    return text, next_page

def get_full_review(first_page_url):
    full_review = ''
    review_title = BeautifulSoup(fetch_webpage(first_page_url))
    review_title = review_title.select('.headline')
    if len(review_title) > 0:
        review_title = review_title[0]
        review_title = review_title.get_text().strip()
    else:
        review_title = 'NO TITLE [{}]'.format(uuid())
    next_page = first_page_url
    while next_page:
        html = fetch_webpage(next_page)
        text, next_page = get_review_page(html)
        full_review += '\n\n' + (text if text else '')
    return full_review, review_title
        
def uuid():
    return ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))

reviews = {}
html = fetch_webpage('https://www.cnet.com/topics/smart-home/products/')
links = get_review_links(html)
print('Page: 1')
for link in links:
    review, key = get_full_review(link)
    if key in reviews:
        key = key + ' ({})'.format(uuid())
    reviews[key] = review
    time.sleep(1)
    
for i in range(2, 158):
    print('Page:', i)
    html = fetch_webpage('https://www.cnet.com/topics/smart-home/products/{}/'.format(i))
    links = get_review_links(html)
    for link in links:
        review, key = get_full_review(link)
        if key in reviews:
            key = key + ' ({})'.format(uuid())
        reviews[key] = review
        time.sleep(1)

Page: 1
Page: 2
Page: 3
Could not connect to https://www.cnet.com/products/weber-genesis-ii-e-310/review/. Retry count: 5
Page: 4
Page: 5
Page: 6
Page: 7
Page: 8
Could not connect to https://www.cnet.com/products/seven-dreamers-laundroid/preview/. Retry count: 5
Page: 9
Page: 10
Page: 11
Could not connect to https://www.cnet.com/products/kerastase-hair-coach-powered-by-withings/preview/. Retry count: 5
Page: 12
Page: 13
Page: 14
Page: 15
Page: 16
Page: 17
Page: 18
Page: 19
Page: 20
Page: 21
Page: 22
Page: 23
Page: 24
Page: 25
Page: 26
Page: 27
Page: 28
Page: 29
Page: 30
Could not connect to https://www.cnet.com/products/united-computer-intelligence-corporation-ubi/preview/. Retry count: 5
Page: 31
Page: 32
Page: 33
Page: 34
Page: 35
Page: 36
Page: 37
Page: 38
Page: 39
Page: 40
Could not connect to https://www.cnet.com/products/hikvision-ds-2cd2642fwd-izs-network-surveillance-camera/. Retry count: 5
Page: 41
Page: 42
Page: 43
Page: 44
Page: 45
Page: 46
Page: 47
Page: 48
Page: 49
Page: 5

In [31]:
import pickle

with open('cnet-reviews.pickle', 'wb') as f:
    pickle.dump(reviews, f)

In [1]:
import pickle

with open('cnet-reviews.pickle', 'rb') as f:
    reviews = pickle.load(f)

In [39]:
import csv

with open('cnet.csv', 'w') as f:
    writer = csv.writer(f, delimiter='|')
    for k,v in reviews.items():
        writer.writerow([k, v])

In [38]:
sample_reviews = {}
keys = list(reviews.keys())
for i in range(0, 628, 13):
    sample_reviews[keys[i]] = reviews[keys[i]]

In [43]:
# Best Buy

from bs4 import BeautifulSoup
import requests
import warnings
import random
import string
import time

warnings.filterwarnings('ignore')

TIMEOUT = 5
BB_BASE_URL = 'https://www.bestbuy.com'

def fetch_webpage(url, retry_count=5):
    html = None
    while not html and retry_count > 0:
        try:
            html = requests.get(url, verify=False, timeout=TIMEOUT).content
        except requests.exceptions.RequestException:
            print('Could not connect to {}. Retry count: {}'.format(url, retry_count))
            retry_count -= 1
    return html

def get_review_links(html):
    soup = BeautifulSoup(html, 'lxml')
    links = [BB_BASE_URL + a['href'] 
             for a in soup.select('div.list-items div.sku-title > h4 > a')]
    return links

def get_review_page(html):
    soup = BeautifulSoup(html, 'lxml')
    [s.extract() for s in soup(['script', 'figure'])]
    text = soup.select('#editorReview')
    text = None if len(text) == 0 else text[0].get_text().strip()
    next_page = soup.select('a.nextPage')
    next_page = None if len(next_page) == 0 or next_page[0]['rel'][0] == 'prev' else CNET_BASE_URL + next_page[0]['href']
    return text, next_page

def get_full_review(first_page_url):
    full_review = ''
    review_title = BeautifulSoup(fetch_webpage(first_page_url))
    review_title = review_title.select('.headline')
    if len(review_title) > 0:
        review_title = review_title[0]
        review_title = review_title.get_text().strip()
    else:
        review_title = 'NO TITLE [{}]'.format(uuid())
    next_page = first_page_url
    while next_page:
        html = fetch_webpage(next_page)
        text, next_page = get_review_page(html)
        full_review += '\n\n' + (text if text else '')
    return full_review, review_title
        
def uuid():
    return ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))

reviews = {}
html = fetch_webpage('https://www.cnet.com/topics/smart-home/products/')
links = get_review_links(html)
for link in links:
    review, key = get_full_review(link)
    if key in reviews:
        key = key + ' ({})'.format(uuid())
    reviews[key] = review
    time.sleep(1)
    
for i in range(2, 158):
    html = fetch_webpage('https://www.cnet.com/topics/smart-home/products/{}/'.format(i))
    links = get_review_links(html)
    for link in links:
        review, key = get_full_review(link)
        if key in reviews:
            key = key + ' ({})'.format(uuid())
        reviews[key] = review
        time.sleep(1)

Could not connect to https://www.cnet.com/products/hestan-cue/review/. Retry count: 5
Could not connect to https://www.cnet.com/products/mayfield-robotics-kuri/preview/. Retry count: 5
Could not connect to https://www.cnet.com/products/first-alert-onelink-wi-fi-smoke-and-co-alarm/review/. Retry count: 5
Could not connect to https://www.cnet.com/topics/smart-home/products/35/. Retry count: 5
Could not connect to https://www.cnet.com/topics/smart-home/products/41/. Retry count: 5
Could not connect to https://www.cnet.com/products/pelco-ip110-series-camclosure-ip111-cwv22-network-surveillance-camera/. Retry count: 5


In [5]:
x = {
    'Ring - Video Doorbell 2 - Satin Nickel/Venetian Bronze': "Smart security you can trust. See who’s at the door and monitor your home when you’re not there with this video doorbell. The 2-way audio lets you listen and talk to anyone on your property, right from your mobile device. Plus, you can easily recharge the battery with the included power pack.\n\nDownload the free Ring app. See, hear and speak to visitors from anywhere. The app gives you instant mobile alerts and lets you answer the door even when you’re not at home. It works with iOS, Android and Windows 10 devices. 1080HD wide-angle video. See more of your space. Live View gives you an on-demand look at your property, plus the built-in microphone and speakers let you listen and talk with 2-way audio.. Adjustable motion sensors and infrared night vision. You’ll be notified when motion is detected, day or night. Plus, you can adjust the sensors to find the ideal setting for your home.. Install in any home. The Ring Video Doorbell 2 works in small and large homes alike, and can be hardwired or battery-operated. It includes 2 faceplates for your convenience.. Quick-release rechargeable battery. Charge the doorbell without removing it. The device comes with one rechargeable battery pack. Purchase an additional one and you’ll always be powered.",
    'Ring - Floodlight Cam - White': "Protect your property. When motion is detected, the Ring Floodlight Cam activates the floodlights and alerts you instantly. You’ll be able to watch and hear a live HD feed of what’s happening, plus you can talk through the speakers right from your phone or tablet.\n\nDownload the free Ring app. See, hear and speak to people or pets. The app lets you use Live View for on-demand HD video and 2-way audio and works with any iOS or Android device.. Ultrabright floodlights and siren alarm. When motion is detected, 2 floodlights illuminate. Control them and the 110-decibel alarm manually with the app.. Custom motion sensors and infrared night vision. Get instant alerts when motion is detected, day or night. Plus, you can create and customize your own motion zones.. Simple installation. Install it yourself. The cam includes all the tools you need to do the job, and it easily replaces existing wired floodlights.. Lifetime purchase protection. Ring replaces stolen cameras, so you’ll always be covered. You also receive a 30-day trial of Ring Video recording so you can save, share and review your footage.",
    'Arlo - Pro Add-On Indoor/Outdoor HD Wire Free Security Camera - White': "Expand the view of your security camera system with this Arlo Pro add-on HD security camera. The wire-free configuration allows for flexible placement options, and the included wall mount and hardware make it easy to set this camera up in your home. Featuring night vision, this Arlo Pro add-on HD security camera delivers a clear view even in low lighting conditions.\n\nAccess the system remotely. Personalize notifications, watch video, or make changes from your mobile device or computer.. Weatherproof wire-free construction. Let\'s you use the cameras both indoors and outdoors.. Rechargeable batteries. Allow you to keep your system running. Additional batteries available (sold separately).. 2-way communication. Built-in microphone and speaker let you hear what happens around the camera and talk back through your phone, no matter where you are.. Infrared night vision lets you see in the dark. Keep an eye on things up to 25' away when there's little to no light.. Viewing angle up to 130°. Expertly captures a wide angle of your property for exceptional security at all times.. Voice Control of Your Smart Home Security. Live Stream your Arlo cameras on the Amazon Echo Show using simple voice commands. All you have to do is ask!. Advanced motion detection. Automatically records once motion is detected, then sends you e-mail or app notifications. Customize motion sensitivity according to your needs.. Zoom in on details. The 8x digital zoom magnification lets you focus on specific parts of your home.. Base station with built-in siren. To alert you to trouble or unwanted visitors, with a 100+ decibel siren that can be controlled remotely, or triggered via motion or audio.. FREE Cloud storage. Let\'s you save recent activity to view at a later time. Keep 7 days of motion and/or audio triggered recordings for free.. Local backup storage. Connect a USB drive to the Arlo Pro base station for optional local backup storage.. Mountable design. For optimal placement on a wall."
}
x

{'Arlo - Pro Add-On Indoor/Outdoor HD Wire Free Security Camera - White': "Expand the view of your security camera system with this Arlo Pro add-on HD security camera. The wire-free configuration allows for flexible placement options, and the included wall mount and hardware make it easy to set this camera up in your home. Featuring night vision, this Arlo Pro add-on HD security camera delivers a clear view even in low lighting conditions.\n\nAccess the system remotely. Personalize notifications, watch video, or make changes from your mobile device or computer.. Weatherproof wire-free construction. Let's you use the cameras both indoors and outdoors.. Rechargeable batteries. Allow you to keep your system running. Additional batteries available (sold separately).. 2-way communication. Built-in microphone and speaker let you hear what happens around the camera and talk back through your phone, no matter where you are.. Infrared night vision lets you see in the dark. Keep an eye on things