In [None]:
# the Python Requests package will allow us to send HTTP requests to get HTML files
import requests

# the GET method indicates that you’re trying to get or retrieve data from a specified resource. 
# to make a GET request, invoke requests.get()
from requests import get

# Beautiful Soup is a Python library for pulling data out of HTML and XML files
from bs4 import BeautifulSoup

# pandas is a Python data analysis library
import pandas as pd

# NumPy is a Python library used for working with large, multi-dimensional arrays and matrices
import numpy as np

from datetime import datetime

import re

import os

import os.path as path

In [None]:
if path.exists('canadacannabisdispensary.csv'):
   existing_data = pd.read_csv('canadacannabisdispensary.csv')
else:
   columns = ['date', 'product','price', 'quantity']
   existing_data = pd.DataFrame(columns = columns)

In [None]:
# canadacannabisdispensary has been taken down as of March 2022, and is currently not accessible by this URL
# this block of code will throw a connection error unless the website returns
url = 'https://www.canadacannabisdispensary.co/product-category/aaa/'

all_names = []

all_prices = []

all_quantities = []

date = []

results = requests.get(url)

soup = BeautifulSoup(results.content, "html.parser")

products = soup.find_all('a', class_="woocommerce-LoopProduct-link woocommerce-loop-product__link")

for product in products:  
    name = product.find('h2', class_='woocommerce-loop-product__title').text
    product_page = product.get('href')
    
    product_page_results = requests.get(product_page)
    product_page_soup = BeautifulSoup(product_page_results.content, "html.parser")
        
    price = product_page_soup.find('span', class_='woocommerce-Price-amount amount').text
    quantities = product_page_soup.find_all('option')
    
    stripped_quantities = [] 
    
    if quantities:
        for quantity in quantities:
            # find all number quantity options listed on product page
            strip = re.findall(r"[-+]?(?:\d*\.\d+|\d+)", quantity.text)
            if strip:
                stripped_quantities.append(float(strip[0]))
    
    print(stripped_quantities)
    
    # if stripped_quantities is non-empty
    if stripped_quantities:
        all_quantities.append(min(stripped_quantities))
    else:
        all_quantities.append('No quantity options')

    current_month = datetime.now().month
    current_year = datetime.now().year

    all_names.append(name)
    all_prices.append(price)
    date.append(str(current_month) + '/' + str(current_year))

In [None]:
print(all_names)
print(all_prices)
print(all_quantities)

In [None]:
new_data = pd.DataFrame(
    {'date': date,
     'product': all_names,
     'price': all_prices,
     'quantity': all_quantities
    })

In [None]:
new_data

In [None]:
joined_data = pd.concat([existing_data, new_data])

In [None]:
output_path = 'canadacannabisdispensary.csv'

df.to_csv(output_path, index = False)