# 1. Import Modules and Data

In [12]:
# Auto-reload custom modules
%load_ext autoreload
%autoreload 2

# Import modules
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select

import configparser
import pandas as pd
import time
from tqdm import tqdm
import os
import json
import io

# Import custom modules
from src import process_jsons
from src import chrome_driver

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
# setup config parser

config = configparser.ConfigParser()
config.read('config.ini')
    

[]

In [14]:
# Extract lists of dictionaries from .json files. 
# Each json file is comprised of one order. 

path_to_json = '../data/sample_json_orders' 
orders_dict_list = process_jsons.extract_jsons_from_directory(path_to_json)

In [17]:
# Filters each of the 

country_filter_list = ['US']
filtered_orders = process_jsons.filter_json(orders_dict_list, country_filter_list)

In [18]:
len(filtered_orders)

4

In [19]:
filtered_orders[0]


{'idempotency_key': '22ds4iQ423ZlvhR',
 'shipping_address': {'first_name': 'Ohshop',
  'last_name': 'Amazon Order',
  'address_line1': '498 Centerpoint Blvd',
  'address_line2': 'SOJEONGBO - KR0136926615',
  'zip_code': '19720',
  'city': 'New Castle',
  'state': 'DE',
  'country': 'US',
  'phone_number': '3023228792'},
 'client_notes': {'order_code': 'AFMAJO', 'retailer': 'amazon'},
 'payment_method': {'name_on_card': 'Jason Kim',
  'expiration_year': '2022',
  'expiration_month': '4',
  'use_gift': False},
 'retailer': 'amazon',
 'affiliate_info': {'tag': 'ohmyzip19-20'},
 'products': [{'product_id': 'B07CMKX3C7',
   'quantity': 2,
   'seller_selection_criteria': {'buy_box': True},
   'variants': []}],
 'shipping_method': 'cheapest',
 'billing_address': {'first_name': 'Jason M',
  'last_name': 'Kim',
  'address_line1': '21 WOODCREST CT',
  'address_line2': 'NEWARK DE',
  'zip_code': '19702',
  'city': 'New castle',
  'state': 'DE',
  'country': 'US',
  'phone_number': '7143807008'},


### 2.2 Function to Log into Amazon.com

In [9]:
def login_to_amazon(driver):
    logged_in = False
    
    while logged_in == False:
        # Open up amazon.com
        driver.get("https://www.amazon.com/")

        # Sign into amazon
        driver.find_element_by_xpath("//span[contains(text(),'Hello, Sign in')]").click()

        # Enter email address
        login_email_ele = WebDriverWait(driver, 20).until(
        EC.element_to_be_clickable((By.XPATH, "//input[@id='ap_email']")))
        login_email_ele.click()
        WebDriverWait(driver, 5).until(lambda browser: login_email_ele.get_attribute('value') == '')

        login_email_ele.send_keys(config['amazon_login']['email'])

        driver.find_element_by_xpath("//input[@id='continue']").click()

        # Enter password
        login_pw_ele = WebDriverWait(driver, 20).until(
        EC.element_to_be_clickable((By.XPATH, "//input[@id='ap_password']")))
        login_pw_ele.click()
        WebDriverWait(driver, 5).until(lambda browser: login_pw_ele.get_attribute('value') == '')

        login_pw_ele.send_keys(config['amazon_login']['password'])
        driver.find_element_by_xpath("//input[@name='rememberMe']").click()
        driver.find_element_by_xpath("//input[@id='signInSubmit']").click()
        logged_in = True

## 2.3 Function to Add Orders to Amazon Cart

In [8]:
def extract_search_results(orders_dict_list, driver, order_index, results_df):
    return_df = pd.DataFrame()
    
    # Iterates over each product for a given order
    for product in orders_dict_list[order_index]['products']:
        # Initialize order specific tracking variables
        product_entered_to_search = False
        went_to_link = False
        product_id_matches = False
        discovery_method = ""
        product_id = product['product_id']
        product_quant = product['quantity']
        json_dict = orders_dict_list[order_index]
        href_one = ""
        num_products = len(orders_dict_list[order_index]['products'])
        
        
        # Gets product id related information.
        product_id_bh = "bh/" + product_id
        product_id_dp = "dp/" + product_id
        
    
        # Enters product id into the search bar. 
        driver.get("https://www.amazon.com")
        search_bar = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//input[@id='twotabsearchtextbox']")))
        search_bar.click()
        search_bar.clear()
        WebDriverWait(driver, 5).until(lambda browser: search_bar.get_attribute('value') == '')
        while product_entered_to_search == False:
            search_bar.click()
            search_bar.send_keys(product_id)
            if len(search_bar.get_attribute('value')) == len(product_id):
                product_entered_to_search = True
        search_bar.send_keys(u'\ue007') 

        # Collects all the results from the search using a variety of different methods.
        # Ensures that the search results load.
        WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "//div[@class='s-result-list s-search-results sg-row']")))

        # Method 1: Span
        search_results = driver.find_elements_by_xpath("//span[@class='a-size-medium a-color-base a-text-normal']")
        if len(search_results) > 0:
            discovery_method = "span"

        # Method 2: Div 
        elif len(search_results) == 0:
            search_results = driver.find_elements_by_xpath("//div[@class='a-section a-spacing-medium']//a[@class='a-link-normal a-text-normal']")
            if len(search_results) > 0:
                discovery_method = "div"

        time.sleep(.5)

        # If there is only 1 result, make sure the product id is in the link url. Then, extract the href. 
        if (len(search_results) == 1):
            
            # Extracts the url for the search result
            prev_url = driver.current_url
            search_results[0].click()
            while(prev_url == driver.current_url):
                pass
            
            href_one = driver.current_url
            
            if(product_id_bh in href_one or product_id_dp in href_one):
                product_id_matches = True
            went_to_link = True

        elif len(search_results) > 1:
            multiple_hrefs = []

            for search_result in search_results:
                if discovery_method == "span":
                    result_href = search_result.find_element_by_xpath('..').get_attribute("href")
                    multiple_hrefs.append(result_href)
                    if(product_id_bh in result_href or product_id_dp in result_href):
                        product_id_matches = True
                        href_one = result_href
                        driver.get(result_href)
                        went_to_link = True
                        break
        
        # Select the quantity
        if(went_to_link == True and product_id_matches == True):
            if product_quant > 1:
                try:
                    select = Select(driver.find_element_by_xpath("//select[@id='quantity']"))
                    select.select_by_visible_text(f"{product_quant}")
                    
                except:
                    "Something strange happened"
                    
            driver.find_element_by_xpath("//input[@id='add-to-cart-button']").click() 
            time.sleep(3)

        # Append the result of the order attempt to a dictionary/DataFrame
        result_row_dict = {'order_index':order_index, 
                           'json_dict': json_dict,
                           'product_id': product_id,
                           'product_quant': product_quant, 
                           'num_products': num_products,
                           'href_link':href_one,
                           'went_to_link':went_to_link, 
                           'product_id_matches':product_id_matches,
                           'discovery_method':discovery_method,
                           'num_reults': len(search_results)}
        return_df = return_df.append(result_row_dict, ignore_index=True)
        
    results_df = results_df.append(return_df, ignore_index=True)
    return(results_df)

            
    

# 3. Running the Script

In [9]:
driver = chrome_driver.setup_chrome_driver()
login_to_amazon(driver)

# Creates a dataframe to track how each order interacts with amazon. 
orders_results_df = pd.DataFrame()

for i in tqdm(range(len(USA_orders_dict_list))):
    
    orders_results_df = extract_search_results(USA_orders_dict_list, driver, i, orders_results_df)
                                         

  0%|          | 0/4 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [15]:
orders_results_df

Unnamed: 0,discovery_method,href_link,json_dict,num_products,num_reults,order_index,product_id,product_id_matches,product_quant,went_to_link
0,span,https://www.amazon.com/Tempur-Pedic-Adaptable-...,"{'idempotency_key': '22ds4iQ423ZlvhR', 'shippi...",1.0,1.0,0.0,B07CMKX3C7,1.0,2.0,1.0
1,span,https://www.amazon.com/Christmas-Goodies-Draws...,"{'idempotency_key': 'nttfkg3oiSJ7Yjd', 'shippi...",1.0,1.0,1.0,B07YTX2VLP,1.0,1.0,1.0
2,span,https://www.amazon.com/Apple-Watch-GPS-38mm-Sp...,"{'idempotency_key': 'Zr2zqnNJCM9UrT', 'shippin...",1.0,1.0,2.0,B07K39FRSL,1.0,1.0,1.0


In [11]:
orders_results_df.iloc[4]['href_link']

'https://www.amazon.com/s?k=B0013L3XMM&ref=nb_sb_noss'

In [13]:
orders_results_df.iloc[0]['json_dict']

{'idempotency_key': '22ds4iQ423ZlvhR',
 'shipping_address': {'first_name': 'Ohshop',
  'last_name': 'Amazon Order',
  'address_line1': '498 Centerpoint Blvd',
  'address_line2': 'SOJEONGBO - KR0136926615',
  'zip_code': '19720',
  'city': 'New Castle',
  'state': 'DE',
  'country': 'US',
  'phone_number': '3023228792'},
 'client_notes': {'order_code': 'AFMAJO', 'retailer': 'amazon'},
 'payment_method': {'name_on_card': 'Jason Kim',
  'expiration_year': '2022',
  'expiration_month': '4',
  'use_gift': False},
 'retailer': 'amazon',
 'affiliate_info': {'tag': 'ohmyzip19-20'},
 'products': [{'product_id': 'B07CMKX3C7',
   'quantity': 2,
   'seller_selection_criteria': {'buy_box': True},
   'variants': []}],
 'shipping_method': 'cheapest',
 'billing_address': {'first_name': 'Jason M',
  'last_name': 'Kim',
  'address_line1': '21 WOODCREST CT',
  'address_line2': 'NEWARK DE',
  'zip_code': '19702',
  'city': 'New castle',
  'state': 'DE',
  'country': 'US',
  'phone_number': '7143807008'},
