# Part 1 - iPstack and MongoDB

In [55]:
from pymongo import MongoClient ## For interaction with MongoDB

In [56]:
client = MongoClient() ## Create MongoDB connection instance

client = MongoClient("mongodb://localhost:27017/") ## Connect to MongoDB

In [57]:
db = client["msba"] ## Create msba database
collection = db["ip_addresses"] ## Create a collection

doc = {"ip": "192.168.1.1", "city": "Davis", "zip": "95616"} ## required document to be inserted

insert1 = collection.insert_one(doc) ## Insert one record into db

cursor = collection.find({}) ## Check if document inserted by checking id
for document in cursor:
    print(document)

{'_id': ObjectId('65f0db18cb5a9aeb78cccb83'), 'ip': '192.168.1.1', 'city': 'Davis', 'zip': '95616'}


In [49]:
import requests
from bs4 import BeautifulSoup
import json

data_dict = [] ## Declare emply list to store json objects

ip = ["8.8.8.8", "128.120.0.25", "128.32.12.14", "64.165.72.144", "209.129.88.187"] ## last one is UC Law's public IP

for i in ip:
    ## Create required API url
    url_api = f"http://api.ipstack.com/{i}?access_key=59fb27850d350074ec936236ddfa437a&output=json&fields=main"

    page = requests.get(url_api) ## Get page
    
    data = BeautifulSoup(page.content, 'html.parser') ## Convert response to BeautifulSoup instance
    
    json_dict = json.loads(str(data))  ## Convert BeautifulSoup object to json
    
    data_dict.append(json_dict)  ## Append objects into data dictionary

In [58]:
## Print City and Zip to screen for the collected data
for i in data_dict:
    print(f"City: {i['city']}")
    print(f"Zip: {i['zip']}")
    print("\n")

City: Glenmont
Zip: 44628


City: Davis
Zip: 95616


City: Berkeley
Zip: 94705


City: Florin
Zip: 95819


City: San Francisco
Zip: 94132




In [59]:
## Get required data and push to MongoDB

data = [] ## List to store the documents

keys = ['ip', 'city', 'zip']

for i in range(len(data_dict)):
    obj = {key: data_dict[i][key] for key in keys}
    data.append(obj)

print("Check data:")
print(data)  ## Check documents
print("\n")

insert2 = collection.insert_many(data) ## Batch insert into MongoDB

#print documents in collection to check
cursor = collection.find({})
for document in cursor:
    print(document)

Check data:
[{'ip': '8.8.8.8', 'city': 'Glenmont', 'zip': '44628'}, {'ip': '128.120.0.25', 'city': 'Davis', 'zip': '95616'}, {'ip': '128.32.12.14', 'city': 'Berkeley', 'zip': '94705'}, {'ip': '64.165.72.144', 'city': 'Florin', 'zip': '95819'}, {'ip': '209.129.88.187', 'city': 'San Francisco', 'zip': '94132'}]


{'_id': ObjectId('65f0db18cb5a9aeb78cccb83'), 'ip': '192.168.1.1', 'city': 'Davis', 'zip': '95616'}
{'_id': ObjectId('65f0db25cb5a9aeb78cccb84'), 'ip': '8.8.8.8', 'city': 'Glenmont', 'zip': '44628'}
{'_id': ObjectId('65f0db25cb5a9aeb78cccb85'), 'ip': '128.120.0.25', 'city': 'Davis', 'zip': '95616'}
{'_id': ObjectId('65f0db25cb5a9aeb78cccb86'), 'ip': '128.32.12.14', 'city': 'Berkeley', 'zip': '94705'}
{'_id': ObjectId('65f0db25cb5a9aeb78cccb87'), 'ip': '64.165.72.144', 'city': 'Florin', 'zip': '95819'}
{'_id': ObjectId('65f0db25cb5a9aeb78cccb88'), 'ip': '209.129.88.187', 'city': 'San Francisco', 'zip': '94132'}


# Part 2 - eBay + Selenium

In [3]:
## Load necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
from bs4 import BeautifulSoup
import re
import os

In [21]:
driver = webdriver.Chrome()  ## Open chrome session

url = 'https://www.ebay.com/'

driver.get(url) ## go to url

time.sleep(5)

search_box = driver.find_element(By.CSS_SELECTOR, 'input[aria-label = "Search for anything"]') ## Get search box element

search_box.send_keys('Cell Phones\n') ## input required text

time.sleep(5)

network = driver.find_element(By.XPATH, '//li[@name="Network"]//input[@type="checkbox"][@aria-label="Unlocked"]') ## Get network checkbox

network.click() ## Select 1st check box in network which is Unlocked

time.sleep(5)

brand = driver.find_element(By.XPATH,'//li[@name="Brand"]//input[@type="checkbox"][@aria-label="LG"]') ## Get brand checkbox

brand.click()

time.sleep(5)

driver.execute_script("window.scrollTo(0,500)")

time.sleep(5)

screen_size = driver.find_element(By.XPATH,'//li[@name="Screen%2520Size"]//input[@type="checkbox"][@aria-label="6 in or More"]')
## Get screen_size checkbox

screen_size.click()

time.sleep(5)

driver.execute_script("window.scrollTo(0,1000)")

time.sleep(5)

capacity = driver.find_element(By.XPATH,'//li[@name="Storage%2520Capacity"]//input[@type="checkbox"][@aria-label="128 GB"]')
## Get storage capacity checkbox

capacity.click()

time.sleep(5)

driver.execute_script("window.scrollTo(0,1000)")

time.sleep(5)

lock = driver.find_element(By.XPATH,'//li[@name="Lock%2520Status"]//input[@type="checkbox"][@aria-label="Factory Unlocked"]')

lock.click()

time.sleep(5)

driver.execute_script("window.scrollTo(0,1500)")

time.sleep(5)

condition = driver.find_element(By.XPATH,'//input[@type="checkbox"][@aria-label="Used"]')
## Get condition checkbox

condition.click()

time.sleep(5)

web_page = BeautifulSoup(driver.page_source, 'html.parser') ## Create BeautifulSoup instance of the page

time.sleep(10)

driver.quit()

In [22]:
## Save scrapped webpage to system
with open('unlocked-lg-128gb-used.html', 'w', encoding='utf-8') as file:
    file.write(web_page.prettify())

In [4]:
## Open saved file
with open('unlocked-lg-128gb-used.html', 'r', encoding='utf-8') as file:
    page = file.read()
    
soup = BeautifulSoup(page, 'html.parser') ## Create BeautifulSoup instance

In [10]:
## Get all titles
headings = soup.find_all('span', {'role': 'heading'})
titles = []
for title in headings:
    titles.append(title.text.strip())
    
## Get seller name and seller rating
sellers = soup.find_all('span', {'class': 's-item__seller-info-text'})
seller = []
rating = []

for i in sellers:
    details = i.text.strip()
    seller.append(re.findall("([a-zA-Z0-9\-\_]*) \(", details))
    rating.append(re.findall("\) ([0-9\.\%]*)", details))
    
## Get price details
prices = soup.find_all('span', {'class': 's-item__price'})[1:61]
price = []
for rate in prices:
    price.append(rate.text.strip())
price = [''.join(string.split()).replace('to', ' to ') for string in price]

## Get shipping costs
shipping_costs = soup.find_all('span', {'class': 's-item__shipping s-item__logisticsCost'})[:60]
shipping = []

for cost in shipping_costs:
    shipping.append(cost.text.strip())

In [111]:
pattern = re.compile(r'item[0-9A-Za-z]+')
phones = soup.find_all(id=pattern) ## All phones are contained in this

## Print required info to screen
title, seller, rating, price, shipping, returns, bids = [], [], [], [], [], [], []


for i in phones:
    
    ## Get item description/title
    heading = i.find('span', {'role': 'heading'})
    title.append(heading.text.strip())
    print(f'Title: {heading.text.strip()}')
    
    ## Get seller and seller rating
    details = i.find('span', {'class': 's-item__seller-info-text'}).text.strip()
    seller.append(re.findall("([a-zA-Z0-9\-\_]*) \(", details)[0])
    print('Seller: ', re.findall("([a-zA-Z0-9\-\_]*) \(", details)[0])
    rating.append(re.findall("\) ([0-9\.\%]*)", details)[0])
    print('Seller rating: ', re.findall("\) ([0-9\.\%]*)", details)[0])
    
    ## Get price
    cost = i.find('span', {'class': 's-item__price'}).text.strip()
    cost = ''.join(cost.split()).replace('to', ' to ')
    price.append(cost)
    print(f'Price: {cost}')
    
    ## Get shipping cost
    shipping_info = i.find('span', {'class': 's-item__shipping s-item__logisticsCost'})
    if shipping_info is None:
        shipping.append('Shipping information not available') 
        print(f'Shipping information: No information')
    else:
        shipping.append(shipping_info.text.strip()) 
        print(f'Shipping information: {shipping_info.text.strip()}')
    
    ## Get returns info
    return_info = i.find('span', {'class': 's-item__free-returns s-item__freeReturnsNoFee'})
    if return_info is None:
        returns.append('No return info') 
        print(f'Return information: No return info')
    else:
        returns.append(return_info.text.strip())  
        print(f'Return information: {return_info.text.strip()}')
    
    ## Gets bids info    
    bids_info = i.find('span', {'class': 's-item__bids s-item__bidCount'})
    if bids_info is None:
        bids.append('Bids not applicable') 
        print(f'Number of bids : Bids not applicable')
    else:
        string = bids_info.text.strip()
        string = re.sub(r'\s+|[.\']', " ", string)
        bids.append(string)
        print(f'Number of bids : {string}')
    print("\n")

Title: LG Velvet 5G 128GB Grey LM-G900TM (T-Mobile Unlocked) Reduced Price zW7656
Seller:  soonersoft
Seller rating:  98.8%
Price: $104.19
Shipping information: Free shipping
Return information: Free returns
Number of bids : Bids not applicable


Title: LG Velvet 5G - LM-G900VMP 128GB Gray Verizon + GSM Unlocked LTE Smartphone GREAT
Seller:  discountphonedepot
Seller rating:  97.5%
Price: $129.70
Shipping information: Free shipping
Return information: Free returns
Number of bids : Bids not applicable


Title: LG K61 LG-Q630UM 128GB Unlocked White Android Smartphone
Seller:  monkeytote
Seller rating:  98.5%
Price: $89.99
Shipping information: Free shipping
Return information: No return info
Number of bids : Bids not applicable


Title: LG Velvet 5G 128GB Gray LM-G900TM (Unlocked) - GSM World Phone - DV8150
Seller:  soonersoft
Seller rating:  98.8%
Price: $125.98
Shipping information: Free shipping
Return information: Free returns
Number of bids : Bids not applicable


Title: LG Velvet 5

In [112]:
## Alternatively convert to DF

data_dict = {"Title": title, "Seller": seller, "Seller rating (percentage)": rating, "Price": price, 
             "Shipping information (if available)": shipping,
             "Return information (if available)": returns, "Number of bids (if applicable)": bids}

import pandas as pd
pd.DataFrame(data_dict)

Unnamed: 0,Title,Seller,Seller rating (percentage),Price,Shipping information (if available),Return information (if available),Number of bids (if applicable)
0,LG Velvet 5G 128GB Grey LM-G900TM (T-Mobile Un...,soonersoft,98.8%,$104.19,Free shipping,Free returns,Bids not applicable
1,LG Velvet 5G - LM-G900VMP 128GB Gray Verizon +...,discountphonedepot,97.5%,$129.70,Free shipping,Free returns,Bids not applicable
2,LG K61 LG-Q630UM 128GB Unlocked White Android ...,monkeytote,98.5%,$89.99,Free shipping,No return info,Bids not applicable
3,LG Velvet 5G 128GB Gray LM-G900TM (Unlocked) -...,soonersoft,98.8%,$125.98,Free shipping,Free returns,Bids not applicable
4,LG Velvet 5G LM-G900UM G900 (Unlocked) 128GB 6...,wikiwoo,98.9%,$124.95,Free shipping,Free returns,Bids not applicable
5,LG G8 Thinq - LM-G820 - 128GB - Black (T-Mobil...,katiesriches,97.1%,$30.00,+$6.15 shipping,No return info,0 bids ·
6,LG G8 ThinQ 4G 5G LM-G820QM 128GB Smartphone -...,electronicsradar,96.9%,$48.00,Free shipping,No return info,30 bids ·
7,LG G8 ThinQ - 128GB - Aurora Black (Unlocked) ...,bstco,99.5%,$85.99,Free shipping,Free returns,Bids not applicable
8,LG RARE FLIP PHONE Ux280-WINE RED VINTAGE RETR...,theworldofstephjones,100%,$56.99,+$6.05 shipping,No return info,Bids not applicable
9,"LG Velvet 5G G900UM 128GB Aurora Silver 6.8"" S...",cucoins88,99.6%,$92.75,+$6.15 shipping,Free returns,Bids not applicable
