In [43]:
# the Python Requests package will allow us to send HTTP requests to get HTML files
import requests

# the GET method indicates that you’re trying to get or retrieve data from a specified resource. 
# to make a GET request, invoke requests.get()
from requests import get

# Beautiful Soup is a Python library for pulling data out of HTML and XML files
from bs4 import BeautifulSoup

# pandas is a Python data analysis library
import pandas as pd

# NumPy is a Python library used for working with large, multi-dimensional arrays and matrices
import numpy as np

# Datetime is a module that supplies classes for manipulating dates and times
from datetime import datetime

# The os module provides a portable way of using operating system dependent functionality
import os

# The os.path module implements some useful functions on pathnames
import os.path as path

In [44]:
# look in working directory for CSV file where existing data is stored and read as dataframe
# if this file does not exist, create empty dataframe with columns corresponding to required information
if path.exists('cheebas.csv'):
   existing_data = pd.read_csv('cheebas.csv')
else:
   columns = ['date', 'product', 'category', 'price']
   existing_data = pd.DataFrame(columns = columns)

In [45]:
existing_data

Unnamed: 0,date,product,category,price
0,2022-03-15,Death Bubba Hash by Tegridy Farms,premium-domestic-hash,$16.00/g
1,2022-03-15,Master Kush Hash by Tegridy Farms,premium-domestic-hash,$15.00/g
2,2022-03-15,Gorilla Glue Hash By Tegridy Farms,premium-domestic-hash,$16.00/g
3,2022-03-15,Tuna Rockstar HASH by TF,premium-domestic-hash,$16.00/g
4,2022-03-15,Chernobyl Cookies by Tegridy Farms,premium-domestic-hash,$16.00/g
5,2022-03-15,Malani Style Elephant Hash,premium-domestic-hash,$13.00/g
6,2022-03-15,Gods Green Crack by Tegridy Farms,premium-domestic-hash,$17.00/g
7,2022-03-15,Dragon Full Melt Hash,premium-domestic-hash,$18.00/g
8,2022-03-15,Tegridy Farms - Hindu Kush,premium-domestic-hash,$14.00/g
9,2022-03-15,Blue Dream by Tegridy Farms,premium-domestic-hash,$16.50/g


To find new data, this script can be run and cannabis prices can be found for a particular date

In [46]:
# the URLs we want to obtain data from
url = 'https://www.cheebas.co/'
keys = ['premium-domestic-hash', 'buy-sativa-online']

# store scraped data in lists
all_names = []
all_prices = []
all_categories = []
date = []

# iterate through each URL
for key in keys:
    results = requests.get(url + key)
    
    # parse web content into Python-readable format
    soup = BeautifulSoup(results.content, "html.parser")
    
    # from inspecting the HTML, information we need is stored here for each product
    # the find_all() function will find every instance of matching tags and filters from the soup
    products = soup.find_all('div', class_="product-thumb")
    
    # iterate through each product and obtain data from each
    for product in products:
        # obtain where the name of each product is stored
        product_link = product.find('a')
        name = product_link.get('title')
        
        # obtain where the price per unit of each product is stored
        price_per_quantity = product.find('h4', class_='thumb-price').find('b').text
        
        # obtain the date the script is being run
        current_date = datetime.today().strftime('%Y-%m-%d')

        # add data to lists
        all_names.append(name)
        all_prices.append(price_per_quantity)
        all_categories.append(key)
        date.append(current_date)

        print(name)
        print(price_per_quantity)
        print(current_date)

Master Kush Hash by Tegridy Farms
$15.00/g
2022-04-14
Tuna Rockstar HASH by Tegridy Farms
$15.00/g
2022-04-14
Tegridy Farms - Hindu Kush
$14.00/g
2022-04-14
Pink Goo Hash by Tegridy Farms
$15.00/g
2022-04-14
Chernobyl Cookies by Tegridy Farms
$14.00/g
2022-04-14
Gorilla Glue Hash By Tegridy Farms
$16.00/g
2022-04-14
Kings Crown
$19.00/g
2022-04-14
Full Melt Hash Banana Cookies Strain
$17.00/g
2022-04-14
Death Star by Hash Assassins
$15.00/g
2022-04-14
Kush Mint Full Melt Hash
$18.00/g
2022-04-14
Tom Ford Full Melt Hash
$17.00/g
2022-04-14
Malani Style Elephant Hash
$13.00/g
2022-04-14
MK Ultra Hash by Tegridy Farms
$14.00/g
2022-04-14
Full Melt Wildberry Runtz
$17.00/g
2022-04-14
Phoenix Stamp Banana Kush Hash
$13.00/g
2022-04-14
Sour Dream Sativa Hash by Hash Assassins
$15.00/g
2022-04-14
Gods Green Crack by Tegridy Farms
$15.00/g
2022-04-14
Death Bubba Hash by Tegridy Farms
$15.50/g
2022-04-14
Holy Bible Stamp Lindsay OG Hash
$14.00/g
2022-04-14
GDP Fresh Frozen Full Melt
$19.00/g
20

In [47]:
# create dataframe using new data
new_data = pd.DataFrame(
    {'date': date,
     'product': all_names,
     'category': all_categories,
     'price': all_prices,
    })

In [48]:
new_data

Unnamed: 0,date,product,category,price
0,2022-04-14,Master Kush Hash by Tegridy Farms,premium-domestic-hash,$15.00/g
1,2022-04-14,Tuna Rockstar HASH by Tegridy Farms,premium-domestic-hash,$15.00/g
2,2022-04-14,Tegridy Farms - Hindu Kush,premium-domestic-hash,$14.00/g
3,2022-04-14,Pink Goo Hash by Tegridy Farms,premium-domestic-hash,$15.00/g
4,2022-04-14,Chernobyl Cookies by Tegridy Farms,premium-domestic-hash,$14.00/g
5,2022-04-14,Gorilla Glue Hash By Tegridy Farms,premium-domestic-hash,$16.00/g
6,2022-04-14,Kings Crown,premium-domestic-hash,$19.00/g
7,2022-04-14,Full Melt Hash Banana Cookies Strain,premium-domestic-hash,$17.00/g
8,2022-04-14,Death Star by Hash Assassins,premium-domestic-hash,$15.00/g
9,2022-04-14,Kush Mint Full Melt Hash,premium-domestic-hash,$18.00/g


In [49]:
# join original and new data by column
joined_data = pd.concat([existing_data, new_data])

In [50]:
joined_data

Unnamed: 0,date,product,category,price
0,2022-03-15,Death Bubba Hash by Tegridy Farms,premium-domestic-hash,$16.00/g
1,2022-03-15,Master Kush Hash by Tegridy Farms,premium-domestic-hash,$15.00/g
2,2022-03-15,Gorilla Glue Hash By Tegridy Farms,premium-domestic-hash,$16.00/g
3,2022-03-15,Tuna Rockstar HASH by TF,premium-domestic-hash,$16.00/g
4,2022-03-15,Chernobyl Cookies by Tegridy Farms,premium-domestic-hash,$16.00/g
...,...,...,...,...
24,2022-04-14,Five Alive,buy-sativa-online,$6.50/g
25,2022-04-14,Orange Aid 7g by Tegridy Farms,buy-sativa-online,$69.00
26,2022-04-14,Mimosa,buy-sativa-online,$6.75/g
27,2022-04-14,Mandarin Sunset,buy-sativa-online,$8.25/g


In [51]:
# ensure duplicates are removed in case data is added twice
final_data = joined_data.drop_duplicates(subset=['date', 'product'], keep = 'last', inplace=False)

In [52]:
final_data

Unnamed: 0,date,product,category,price
0,2022-03-15,Death Bubba Hash by Tegridy Farms,premium-domestic-hash,$16.00/g
1,2022-03-15,Master Kush Hash by Tegridy Farms,premium-domestic-hash,$15.00/g
2,2022-03-15,Gorilla Glue Hash By Tegridy Farms,premium-domestic-hash,$16.00/g
3,2022-03-15,Tuna Rockstar HASH by TF,premium-domestic-hash,$16.00/g
4,2022-03-15,Chernobyl Cookies by Tegridy Farms,premium-domestic-hash,$16.00/g
5,2022-03-15,Malani Style Elephant Hash,premium-domestic-hash,$13.00/g
6,2022-03-15,Gods Green Crack by Tegridy Farms,premium-domestic-hash,$17.00/g
7,2022-03-15,Dragon Full Melt Hash,premium-domestic-hash,$18.00/g
8,2022-03-15,Tegridy Farms - Hindu Kush,premium-domestic-hash,$14.00/g
9,2022-03-15,Blue Dream by Tegridy Farms,premium-domestic-hash,$16.50/g


In [53]:
# define filename for data
output_path = 'cheebas.csv'

# overwrite previous file with new file containing previous and new data
joined_data.to_csv(output_path, index = False)