# Automated Web Scrape Practice

In [15]:
from splinter import Browser
from bs4 import BeautifulSoup as soup
import pandas as pd

In [16]:
# Set up Splinter
browser = Browser('chrome')

In [17]:
# Visit the Quotes to Scrape site
url = 'https://www.ssense.com/en-us/women/designers/mm6-maison-margiela/t-shirts'
browser.visit(url)

In [19]:
# Parse the HTML
html = browser.html
html_soup = soup(html, 'html.parser')
mm_tshirt_prices = []
for i in range(0, 17):
    mm_tshirt_price = html_soup.find('span', class_='s-text', attrs={'data-test': f'productCurrentPrice{i}'})
    if mm_tshirt_price:
        mm_tshirt_prices.append(mm_tshirt_price.text.strip())
    else:
        print(f'Error: price {i} not found.')

print(mm_tshirt_prices)

['$545', '$390', '$390', '$390', '$190', '$185', '$325', '$355', '$325', '$210', '$210', '$185', '$309', '$266', '$207', '$156', '$153']


In [20]:
tshirt_df = pd.DataFrame(mm_tshirt_prices, columns=["MM6"])
tshirt_df['MM6'] = tshirt_df['MM6'].str.replace('$','').astype(int)
tshirt_df

  tshirt_df['MM6'] = tshirt_df['MM6'].str.replace('$','').astype(int)


Unnamed: 0,MM6
0,545
1,390
2,390
3,390
4,190
5,185
6,325
7,355
8,325
9,210


In [21]:
mm_tshirt_min = tshirt_df['MM6'].min()
print(f'The minimum price for a MM6 tshirt is ${mm_tshirt_min}.')

mm_tshirt_max = tshirt_df['MM6'].max()
print(f'The maxiumum price for a MM6 tshirt is ${mm_tshirt_max}.')

mm_tshirt_avg = tshirt_df['MM6'].mean()
print(f'The average price for a MM6 tshirt is ${round(mm_tshirt_avg,2)}.')

The minimum price for a MM6 tshirt is $153.
The maxiumum price for a MM6 tshirt is $545.
The average price for a MM6 tshirt is $281.82.


In [22]:
browser.quit()
browser = Browser('chrome')
url_2 = 'https://www.ssense.com/en-us/women/designers/diesel/t-shirts'
browser.visit(url_2)

In [23]:
html = browser.html
html_soup = soup(html, 'html.parser')

diesel_tshirt_prices = []
for i in range(0, 17):
    diesel_tshirt_price = html_soup.find('span', class_='s-text', attrs={'data-test': f'productCurrentPrice{i}'})
    if diesel_tshirt_price:
        diesel_tshirt_prices.append(diesel_tshirt_price.text.strip())
    else:
        print(f'Error: price {i} not found.')

print(diesel_tshirt_prices)

['$125', '$125', '$105', '$125', '$125', '$175', '$150', '$174', '$100', '$90', '$99', '$87', '$59', '$214', '$134', '$78', '$84']


In [24]:
tshirt_df['diesel'] = diesel_tshirt_prices
tshirt_df['diesel'] = tshirt_df['diesel'].str.replace('$','').astype(int).fillna(0)
tshirt_df

  tshirt_df['diesel'] = tshirt_df['diesel'].str.replace('$','').astype(int).fillna(0)


Unnamed: 0,MM6,diesel
0,545,125
1,390,125
2,390,105
3,390,125
4,190,125
5,185,175
6,325,150
7,355,174
8,325,100
9,210,90


In [26]:
diesel_tshirt_min = tshirt_df['diesel'].min()
print(f'The minimum price for a Diesel tshirt is ${diesel_tshirt_min}.')

diesel_tshirt_max = tshirt_df['diesel'].max()
print(f'The maxiumum price for a Diesel tshirt is ${diesel_tshirt_max}.')

diesel_tshirt_avg = tshirt_df['diesel'].mean()
print(f'The average price for a Diesel tshirt is ${round(diesel_tshirt_avg,2)}.')

The minimum price for a Diesel tshirt is $59.
The maxiumum price for a Diesel tshirt is $214.
The average price for a Diesel tshirt is $120.53.


In [27]:
browser.quit()
browser = Browser('chrome')
url_3 = 'https://www.ssense.com/en-us/women/designers/blumarine/t-shirts'
browser.visit(url_3)

In [28]:
html = browser.html
html_soup = soup(html, 'html.parser')

blu_tshirt_prices = []
for i in range(0, 17):
    blu_tshirt_price = html_soup.find('span', class_='s-text', attrs={'data-test': f'productCurrentPrice{i}'})
    if blu_tshirt_price:
        blu_tshirt_prices.append(blu_tshirt_price.text.strip())
    else:
        print(f'Error: price {i} not found.')

print(blu_tshirt_prices)

['$550', '$168', '$924', '$117', '$176', '$114', '$119', '$122', '$119', '$114', '$114', '$130', '$172', '$102', '$231', '$90', '$103']


In [34]:
tshirt_df['blumarine'] = blu_tshirt_prices
tshirt_df['blumarine'] = tshirt_df['blumarine'].str.replace('$','').astype(int)
tshirt_df

  tshirt_df['blumarine'] = tshirt_df['blumarine'].str.replace('$','').astype(int)


Unnamed: 0,MM6,diesel,blumarine
0,545,125,550
1,390,125,168
2,390,105,924
3,390,125,117
4,190,125,176
5,185,175,114
6,325,150,119
7,355,174,122
8,325,100,119
9,210,90,114


In [32]:
blu_tshirt_min = tshirt_df['blumarine'].min()
print(f'The minimum price for a blumarine tshirt is ${blu_tshirt_min}.')

blu_tshirt_max = tshirt_df['blumarine'].max()
print(f'The maxiumum price for a blumarine tshirt is ${blu_tshirt_max}.')

blu_tshirt_avg = tshirt_df['blumarine'].mean()
print(f'The average price for a blumarine tshirt is ${round(blu_tshirt_avg,2)}.')

The minimum price for a blumarine tshirt is $90.
The maxiumum price for a blumarine tshirt is $924.
The average price for a blumarine tshirt is $203.82.


In [33]:
# End the automated browsing section
browser.quit()