In [None]:
# Create a venv
# Start with requests
# Host a flask site on intranet with listener
# Listener curls from BJJHQ
# Listener writes curled site to Redis DB (or should it be mongo?) on the flask site
# Some processing using beautiful soup
# API for creating custom rules? use fastapi and uvicorn
# Send email according to custom rules using email package and smtplib package

# Before continuing, create a venv and install the following packages:
# * dotenv (python-dotenv)
# * bs4 (BeautifulSoup4)
# * requests
# * ipykernel
# * redis
# * flask


# Don't forget to run the following:
# >ipython kernel install --user --name=bjj-hq-product-analyzer

In [2]:
# Add an extended venv environment preparer here

In [15]:
# Import dependencies and load environment variables in
import os, venv, requests, bs4, datetime
from bs4 import BeautifulSoup as soup
from dotenv import load_dotenv
from datetime import datetime

load_dotenv()
dir = os.getenv("DIR")
dl_dir = os.path.join(dir, 'html-files')

In [16]:
def format_current_time():
    return datetime.now().strftime('%m-%d-%Y_%H%M')

In [17]:
# Only run this once a day for now. We don't want to spam them and get IP banned.
url = 'http://bjjhq.com'

# per stackoverflow
# https://stackoverflow.com/questions/16511337/correct-way-to-try-except-using-python-requests-module
try:
    response = requests.get(url, params={})
    response.raise_for_status()
# except requests.exceptions.HTTPError as err:
#     raise SystemExit(err)
# except requests.exceptions.Timeout:
#     # Maybe set up for a retry, or continue in a retry loop
# except requests.exceptions.TooManyRedirects:
#     # Tell the user their URL was bad and try a different one
except requests.exceptions.RequestException as e:
    raise SystemExit(e)

response_text = response.text
bjj_soup = soup(response_text)

In [18]:
def write_textfile(dir, fname, text):
    try:
        file_loc = os.path.join(dir, fname)
        if not os.path.isfile(file_loc):
            with open(file_loc, 'w') as f:
                f.write(text)
        else:
            raise FileExistsError
    except FileNotFoundError as e:
        raise(e)
    except FileExistsError:
        print(f"WARNING: Could not create file {fname} because it already exists.")

In [19]:
dl_fname = f'soup_{format_current_time()}.txt'
dl_fname_loc = os.path.join(dl_dir, dl_fname)

if not os.path.isdir(dl_dir):
    os.mkdir(dl_dir)
write_textfile(dl_dir, dl_fname, response_text)

with open(dl_fname_loc) as f:
    raw_soup = soup(f, 'html.parser')
# TODO: code something here for permissions error if can't create the dir

In [20]:
class BJJHQProduct():
    
    def __init__(self, site_soup):
        """ where site_text is the raw html text of bjjhq.com """
        if site_soup is not None:
            self.data = site_soup
            self.extract_data()
    

    def set_data(self, data):
        """ in case data is None for some reason and we need to assign it after construction"""
        self.data = data


    def extract_data(self):
        self.product_name = self.extract_product_name()
        self.price = self.extract_price()


    def extract_product_name(self):
        return str(self.data.h1.get_text())
    

    def extract_price(self):
        return str([elem.text for elem in self.data.find_all('em') if elem.text.strip()[0] == '$'][0])
    
    
    def get_product_name(self):
        return self.product_name


    def get_price(self):
        return self.price

In [21]:
product = BJJHQProduct(raw_soup)        
print(product.product_name, product.price)

Kitsune "Barrage" Gear Bag - Brown $25
