In [1]:
# the Python Requests package will allow us to send HTTP requests to get HTML files
import requests

# the GET method indicates that you’re trying to get or retrieve data from a specified resource. 
# to make a GET request, invoke requests.get()
from requests import get

# Beautiful Soup is a Python library for pulling data out of HTML and XML files
from bs4 import BeautifulSoup

# pandas is a Python data analysis library
import pandas as pd

# NumPy is a Python library used for working with large, multi-dimensional arrays and matrices
import numpy as np

from datetime import datetime

import re

import os

import os.path as path

In [None]:
if path.exists('canadacannabisdispensary.csv'):
   existing_data = pd.read_csv('canadacannabisdispensary.csv')
else:
   columns = ['date', 'product','price', 'quantity']
   existing_data = pd.DataFrame(columns = columns)

In [2]:
# canadacannabisdispensary has been taken down as of March 2022, and is currently not accessible by this URL
# this block of code will throw a connection error unless the website returns
url = 'https://www.canadacannabisdispensary.co/product-category/aaa/'

all_names = []

all_prices = []

all_quantities = []

date = []

results = requests.get(url)

soup = BeautifulSoup(results.content, "html.parser")

products = soup.find_all('a', class_="woocommerce-LoopProduct-link woocommerce-loop-product__link")

for product in products:  
    name = product.find('h2', class_='woocommerce-loop-product__title').text
    product_page = product.get('href')
    
    product_page_results = requests.get(product_page)
    product_page_soup = BeautifulSoup(product_page_results.content, "html.parser")
        
    price = product_page_soup.find('span', class_='woocommerce-Price-amount amount').text
    quantities = product_page_soup.find_all('option')
    
    stripped_quantities = [] 
    
    if quantities:
        for quantity in quantities:
            # find all number quantity options listed on product page
            strip = re.findall(r"[-+]?(?:\d*\.\d+|\d+)", quantity.text)
            if strip:
                stripped_quantities.append(float(strip[0]))
    
    print(stripped_quantities)
    
    # if stripped_quantities is non-empty
    if stripped_quantities:
        all_quantities.append(min(stripped_quantities))
    else:
        all_quantities.append('No quantity options')

    current_month = datetime.now().month
    current_year = datetime.now().year

    all_names.append(name)
    all_prices.append(price)
    date.append(str(current_month) + '/' + str(current_year))

[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]
[3.5, 7.0, 14.0, 28.0]


In [3]:
print(all_names)
print(all_prices)
print(all_quantities)

['9 Pound Hammer (AAA)', 'Alaskan Thunder Fuck (AAA)', 'Blueberry (AAA)', 'Cherry Cough (AAA)', 'Donkey Butter (AAA)', 'Godzilla Glue (AAA)', 'Humble Pie (AAA)', 'Meat Breath (AAA)', 'MK Ultra (AAA)', 'Pie Face (AAA)', 'Pineapple Express (AAA)', 'Platinum Girl Scout Cookies (AAA)', 'Pot of Gold (AAA)', 'Purple Pineapple (AAA)', 'Raspberry Cough (AAA)', 'Space Grape (AAA)', 'Tom Ford Pink Kush (AAA)', 'Tutankhamon / King Tut (AAA)']
['$26.00', '$28.00', '$23.00', '$19.00', '$26.00', '$22.00', '$28.00', '$23.00', '$22.00', '$23.00', '$29.00', '$29.00', '$23.00', '$24.00', '$23.00', '$24.00', '$29.00', '$29.00']
[3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5]


In [4]:
new_data = pd.DataFrame(
    {'date': date,
     'product': all_names,
     'price': all_prices,
     'quantity': all_quantities
    })

In [5]:
new_data

Unnamed: 0,Date,Product,Price,Quantity
0,3/2022,9 Pound Hammer (AAA),$26.00,3.5
1,3/2022,Alaskan Thunder Fuck (AAA),$28.00,3.5
2,3/2022,Blueberry (AAA),$23.00,3.5
3,3/2022,Cherry Cough (AAA),$19.00,3.5
4,3/2022,Donkey Butter (AAA),$26.00,3.5
5,3/2022,Godzilla Glue (AAA),$22.00,3.5
6,3/2022,Humble Pie (AAA),$28.00,3.5
7,3/2022,Meat Breath (AAA),$23.00,3.5
8,3/2022,MK Ultra (AAA),$22.00,3.5
9,3/2022,Pie Face (AAA),$23.00,3.5


In [None]:
joined_data = pd.concat([existing_data, new_data])

In [7]:
output_path = 'canadacannabisdispensary.csv'

df.to_csv(output_path, mode = 'a', index = False, header = not os.path.exists(output_path))