## Scrpaing Divine Orb Price in Chaos Orbs (Buy and Sell) from Poe Ninja

**Steps**
1. Import necessary packages
2. Set up the web driver, and soup(parsed with lxml)
3. Extract the transaction types
4. Get symbols for each transaction
5. Get the values of each symbol
6. Make a dictionary of transactions type, symbols and their corresponding values
7. Cache in a DB

**Import necessary packages**

In [44]:
import pymongo
from bs4 import BeautifulSoup
from datetime import datetime
import time
from selenium import webdriver
# from selenium.webdriver.chrome.service import Service

**Set up the web driver, and soup(parsed with lxml)**

In [48]:
class Scraper:
    def __init__(self, divine_url):
        self.divine_url = divine_url

    # Set up the web driver, and soup(parsed with lxml)
    def setup_driver_soup(self):
        options = webdriver.ChromeOptions()
        options.add_argument("--headless")
        driver = webdriver.Chrome(options=options)
        driver.get(self.divine_url)
        soup = BeautifulSoup(driver.page_source, "lxml")
        driver.quit()  # Close browser session after parsing.
        return soup

     # find all "div" tags (with "layou-stack" as class attribute) in the "main" tag
    def get_div_tag(self):
        main = self.setup_driver_soup().find_all("main")
        return [d.find("div", class_="layout-stack") for d in main]

    # Extract the transaction types
    def get_transaction_types(self):
        print("Searching div for transaction types...")
        transactions = []
        for h in self.get_div_tag():
            if h is None:
                print("None found in 'get_transaction_types' method...")
                break
            else:
                trx = h.find_all("h2")
                for bs in trx:
                    transactions.append(bs.text)
            print("Done!\n-------")
        return transactions

    # Get symbols for each transaction
    def get_symbols(self):
        print("Searching div for symbols...")
        symbols = []
        for s in self.get_div_tag():
            if s is None:
                print("None found in 'get_symbols' method...")
                break
            else:
                span = s.find_all("span", {"data-variant": "subdued"})
                for symbol in span:
                    symbols.append(symbol.text)
            print("Done!\n-------")
        return symbols

    # Get the values of each symbol
    def get_symbol_values(self):
        print("Searching div for symbols values...")
        values = []
        for d in self.get_div_tag():
            if d is None:
                print("None found in 'get_symbol_values' method...")
                break
            else:
                inner_div = d.find_all("div", class_="justify-center")
                for id in inner_div:
                    values.append(id.text)
                print("Done!\n-------")
        return values
    
    # date and time for current rates
    def get_timestamp(self):
        return datetime.today().strftime("%Y-%m-%d %H:%M:%S").split()
        
    # Make a dictionary of transactions type, symbols and their corresponding values
    def to_dict(self):
        transaction_types = self.get_transaction_types()
        symbols = self.get_symbols()
        symbol_values = self.get_symbol_values()
        price_dict = {}

        if len(transaction_types) >= 2 and len(symbols) >= 4 and len(symbol_values) >= 4:            
            price_dict[transaction_types[0]] = {
                    symbols[0]: float(symbol_values[0]),
                    symbols[1]: float(symbol_values[1])
                }
            
            price_dict[transaction_types[1]] = {
                    symbols[2]: float(symbol_values[2]),
                    symbols[3]: float(symbol_values[3])
                }

            price_dict["date"] = self.get_timestamp()[0]
            price_dict["time"] = self.get_timestamp()[1]
        return price_dict

    # Cache in a DB
    def save_to_db(self, price_dict_):
        divine_client = pymongo.MongoClient('mongodb://localhost:27017/')
        divine_db = divine_client['poe_ninja']
        orb_collection = divine_db['divine_orb']
        print("Caching data to database...")
        orb_collection.insert_one(price_dict_)
        return orb_collection
    


divine_url = "https://poe.ninja/challenge/currency/divine-orb"

scraper = Scraper(divine_url)


# retries if None is returned in any of the methods
counter = 0
while counter < 3:
    price_dict = scraper.to_dict()
    if price_dict:
        scraper.save_to_db(price_dict)
        now = datetime.now().strftime("%H:%M:%S")
        time.sleep(1)
        print(f"{divine_url} successfully scraped.\nData stored in database time: {now}")
        break
    else:
        print(f"Attempt {counter+1}: 'scraper.to_dict()' returned an empty dictionary.\n----------------------------------------------")
        counter += 1
        time.sleep(1)

Searching div for transaction types...
Done!
-------
Searching div for symbols...
Done!
-------
Searching div for symbols values...
Done!
-------
Caching data to database...
https://poe.ninja/challenge/currency/divine-orb successfully scraped.
Data stored in database time: 12:38:21


In [1]:
import pandas as pd

see = pd.read_json(r"C:\Users\abume\OneDrive - University of Greenwich\Term2\Project\Documents\ProjectProposedProjectTimeline.json")
see

Unnamed: 0,config,data,mark,encoding,title,$schema,datasets
view,"{'continuousWidth': 400, 'continuousHeight': 300}",,bar,,Proposed Project Timeline (in weeks),https://vega.github.io/schema/vega-lite/v4.17....,
name,,data-19434af95ebcff3c1b5536d87d685874,bar,,Proposed Project Timeline (in weeks),https://vega.github.io/schema/vega-lite/v4.17....,
x,,,bar,"{'field': 'start', 'type': 'quantitative'}",Proposed Project Timeline (in weeks),https://vega.github.io/schema/vega-lite/v4.17....,
x2,,,bar,{'field': 'end'},Proposed Project Timeline (in weeks),https://vega.github.io/schema/vega-lite/v4.17....,
y,,,bar,"{'field': 'Activities', 'type': 'nominal'}",Proposed Project Timeline (in weeks),https://vega.github.io/schema/vega-lite/v4.17....,
data-19434af95ebcff3c1b5536d87d685874,,,bar,,Proposed Project Timeline (in weeks),https://vega.github.io/schema/vega-lite/v4.17....,"[{'Activities': 'Researching materials', 'star..."
