# Scrape Amazon Cart items Detail

- Technologies:  Requests, Beautifulsoup, Pandas, ExcelWriter, sqlite3, browser_cookie3

In [64]:
import browser_cookie3
import requests
from pandas import ExcelWriter
import pandas as pd
import sqlite3
from bs4 import BeautifulSoup

cookies = browser_cookie3.chrome(domain_name='.amazon.com')
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
}

with requests.session() as s:
    s.cookies = cookies
    s.headers = headers
    url = 'https://www.amazon.com/gp/cart/view.html?ref_=nav_cart'

    resp = s.get(url)
    print('resp text contains shopping cart data')

resp text contains shopping cart data


In [65]:
soup = BeautifulSoup(resp.content,'html.parser')
data_asins = soup.find_all('div',{'class': 'a-row sc-list-item sc-java-remote-feature'})
baseurl = 'https://www.amazon.com/gp/product/'
asin_list=[baseurl+ asin['data-asin'] for asin in data_asins]
asin_list

['https://www.amazon.com/gp/product/1491957662',
 'https://www.amazon.com/gp/product/1491920513',
 'https://www.amazon.com/gp/product/1484265750',
 'https://www.amazon.com/gp/product/1492055026',
 'https://www.amazon.com/gp/product/1449355730',
 'https://www.amazon.com/gp/product/1593279280',
 'https://www.amazon.com/gp/product/1491991739',
 'https://www.amazon.com/gp/product/1789951291',
 'https://www.amazon.com/gp/product/1787285219',
 'https://www.amazon.com/gp/product/1491985046',
 'https://www.amazon.com/gp/product/1786462583']

In [66]:
def get_items_webpage(url):
    HEADERS = {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36','Accept-Language': 'en-US, en;q=0.5'}
    webpage = requests.get(url, headers=HEADERS)
    soup = BeautifulSoup(webpage.content, "lxml")
    return soup

In [67]:
def get_item_detail(item_url):
    soup=get_items_webpage(item_url)
    mobile_title=soup.find('span',{'id':'productTitle'})
    title=mobile_title.text.strip()
    
    mobile_price=soup.find('div',{'class':'a-column a-span6 a-text-right a-span-last'})
    price=mobile_price.text.strip()
    
    
    mobile_imgs=soup.find('div',{'id':'mainImageContainer'})
    imgurl=mobile_imgs.img['src']
           
    return title,price,imgurl

In [70]:
get_item_detail(asin_list[9])

('Mining the Social Web: Data Mining Facebook, Twitter, LinkedIn, Instagram, GitHub, and More',
 '$32.99',
 'https://images-na.ssl-images-amazon.com/images/I/51HTwPZsE3L._SX218_BO1,204,203,200_QL40_ML2_.jpg')

In [73]:
def get_allitems_detail():
    item_dict ={ 
        'ProductLinks': asin_list,     
        'Title':[][:17],
                 'Price': [],
            'ImageLinks': [],
           }

    # Get repo info
    for i in range(len(asin_list)):
        item_info = get_item_detail(asin_list[i])
        item_dict['Title'].append(item_info[0])
        item_dict['Price'].append(item_info[1])
        item_dict['ImageLinks'].append(item_info[2])
        
    return pd.DataFrame(item_dict)

In [74]:

get_allitems_detail()
    

Unnamed: 0,ProductLinks,Title,Price,ImageLinks
0,https://www.amazon.com/gp/product/1491957662,Python for Data Analysis: Data Wrangling with ...,$28.01,https://images-na.ssl-images-amazon.com/images...
1,https://www.amazon.com/gp/product/1491920513,Data Visualization with Python and JavaScript:...,$23.02,https://images-na.ssl-images-amazon.com/images...
2,https://www.amazon.com/gp/product/1484265750,Getting Structured Data from the Internet: Run...,$44.99,https://images-na.ssl-images-amazon.com/images...
3,https://www.amazon.com/gp/product/1492055026,High Performance Python: Practical Performant ...,$23.69,https://images-na.ssl-images-amazon.com/images...
4,https://www.amazon.com/gp/product/1449355730,"Learning Python, 5th Edition",$33.86,https://images-na.ssl-images-amazon.com/images...
5,https://www.amazon.com/gp/product/1593279280,"Python Crash Course, 2Nd Edition: A Hands-On, ...",$21.00,https://images-na.ssl-images-amazon.com/images...
6,https://www.amazon.com/gp/product/1491991739,Flask Web Development: Developing Web Applicat...,$28.89,https://images-na.ssl-images-amazon.com/images...
7,https://www.amazon.com/gp/product/1789951291,Flask Framework Cookbook: Over 80 proven recip...,$25.49,https://images-na.ssl-images-amazon.com/images...
8,https://www.amazon.com/gp/product/1787285219,Python Web Scraping Cookbook: Over 90 proven r...,$31.00,https://images-na.ssl-images-amazon.com/images...
9,https://www.amazon.com/gp/product/1491985046,"Mining the Social Web: Data Mining Facebook, T...",$32.99,https://images-na.ssl-images-amazon.com/images...


## Saving to ExcelSheet

In [76]:
writer = ExcelWriter('AmazonCartItems.xlsx')
get_allitems_detail().to_excel(writer,'Sheet5')
writer.save()

## Saving to Database

In [77]:
conn = sqlite3.connect('CartDb.db')
c = conn.cursor()
c.execute('''CREATE TABLE Mobiles( ProductLinks TEXT, Title TEXT, Price TEXT, ImageLinks TEXT )''')
data=get_allitems_detail()
data.to_sql('SA', conn, if_exists='replace', index=False) # - writes the pd.df to SQLIte DB
pd.read_sql('select * from Mobiles', conn)
conn.commit()
conn.close()