In [92]:
import pandas as pd
import numpy as np

## 0. Load and Clean Data

In [93]:
cannabis_full_df = pd.read_csv("https://www.dropbox.com/scl/fi/ryzkzqwbxzod2yie59ma3/cannabis_full.csv?rlkey=odvjjdivybzli8po2e0w390kq&dl=1")
cannabis_df = pd.concat([cannabis_full_df["Type"], cannabis_full_df["Rating"], cannabis_full_df.loc[:, "Creative":"Pear"]], axis=1)
cannabis_df = cannabis_df[cannabis_df["Type"] != "hybrid"]
cannabis_df["Type_is_sativa"] = np.where(cannabis_df["Type"] == "sativa", 1, 0)
cannabis_df = cannabis_df.dropna()

In [94]:
len(cannabis_df.columns), cannabis_df.columns

(67,
 Index(['Type', 'Rating', 'Creative', 'Energetic', 'Tingly', 'Euphoric',
        'Relaxed', 'Aroused', 'Happy', 'Uplifted', 'Hungry', 'Talkative',
        'Giggly', 'Focused', 'Sleepy', 'Dry', 'Mouth', 'Earthy', 'Sweet',
        'Citrus', 'Flowery', 'Violet', 'Diesel', 'Spicy/Herbal', 'Sage',
        'Woody', 'Apricot', 'Grapefruit', 'Orange', 'Pungent', 'Grape', 'Pine',
        'Skunk', 'Berry', 'Pepper', 'Menthol', 'Blue', 'Cheese', 'Chemical',
        'Mango', 'Lemon', 'Peach', 'Vanilla', 'Nutty', 'Chestnut', 'Tea',
        'Tobacco', 'Tropical', 'Strawberry', 'Blueberry', 'Mint', 'Apple',
        'Honey', 'Lavender', 'Lime', 'Coffee', 'Ammonia', 'Minty', 'Tree',
        'Fruit', 'Butter', 'Pineapple', 'Tar', 'Rose', 'Plum', 'Pear',
        'Type_is_sativa'],
       dtype='object'))

## 1. Get Regression Coefficients

Fitting three different Logistic Regression model predicting `Type`

In [95]:
import logistic_regression as lr

Effect predictors [`"Creative"` : `"Mouth"`]

In [96]:
X = cannabis_df.loc[:, "Creative":"Mouth"].values
y = cannabis_df["Type_is_sativa"].values

lr_betas_effects = lr.fit(X, y, lam=1)
lr_betas_effects = np.insert(lr_betas_effects, 0, 0)
lr_betas_effects = np.append(lr_betas_effects, np.zeros(49))

print(len(lr_betas_effects))
print(lr_betas_effects)

65
[ 0.          1.3810244   2.67177003 -0.25179967  0.75454563 -0.44206622
  0.12697881  1.30061358  2.11409624 -0.52269947  0.92790292 -0.04831339
  1.43233131 -1.8970674   0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.        ]


Flavor predictors [`"Earthy"` : `"Pear"`]

In [97]:
X = cannabis_df.loc[:, "Earthy":"Pear"].values
y = cannabis_df["Type_is_sativa"].values

lr_betas_flavors = lr.fit(X, y, lam=1)
lr_betas_flavors = np.append(np.zeros(16), lr_betas_flavors)
print(len(lr_betas_flavors))
print(lr_betas_flavors)

65
[ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.         -0.31618916  0.19096436
  0.73784297 -0.23794156  0.00239725  0.05788641  0.01883925 -0.00142675
 -0.18662514  0.04341806  0.03437673  0.14624831 -0.33060471 -0.33530607
  0.01905059 -0.05801883 -0.46020919 -0.05524014 -0.01364452 -0.19756487
 -0.09592525 -0.02975349  0.07512575  0.20463248  0.00088819 -0.00350622
 -0.03060104 -0.04016233  0.02369999 -0.00937342  0.360665    0.11431516
 -0.1951864  -0.02164033 -0.00269246 -0.05215031 -0.05652542  0.04772197
 -0.00939268 -0.02511319 -0.00202946  0.03244368  0.03244368 -0.09075152
  0.15054162  0.0142764  -0.0484407   0.00294664 -0.00823416]


Only Rating predictor [`"Rating"`]

In [98]:
X = cannabis_df[["Rating"]].values
y = cannabis_df["Type_is_sativa"].values

lr_betas_rating = lr.fit(X, y, lam=1)
lr_betas_rating = np.append(lr_betas_rating, np.zeros(64))
print(len(lr_betas_rating))
print(lr_betas_rating)

65
[11.18409276  0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.        ]


## 2. Weight sets

In [99]:
w1 = lr_betas_effects
w2 = lr_betas_flavors
w3 = lr_betas_rating

## 3. Perceptron

In [100]:
X_inputs = cannabis_df.loc[:, "Rating":"Pear"]
y = cannabis_df["Type_is_sativa"]

X_inputs.head()

Unnamed: 0,Rating,Creative,Energetic,Tingly,Euphoric,Relaxed,Aroused,Happy,Uplifted,Hungry,...,Ammonia,Minty,Tree,Fruit,Butter,Pineapple,Tar,Rose,Plum,Pear
2,4.4,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,4.2,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,4.6,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,4.4,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [101]:
def relu(x):
    return np.maximum(0, x)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def leaky_relu(x, alpha=0.01):
    return np.where(x > 0, x, alpha * x)

In [108]:
def calculate_activations(X, weight_sets, activation_f):
    dot_product = np.dot(X, weight_sets)
    activations = np.apply_along_axis(activation_f, 1, dot_product)
    return activations    


def perceptron(X, weight_sets, decision_boundary=0.5):
    weight_sets = np.array(weight_sets).T

    l1_activations = calculate_activations(X, weight_sets, sigmoid)
    l2_activation = np.mean(l1_activations, axis=1)

    return np.where(l2_activation >= decision_boundary, 1, 0)

print("Testing with non-hybrid set")
value_counts = np.bincount(perceptron(X_inputs, [w1, w2, w3]))
value_counts

Testing with non-hybrid set


array([  14, 1104])

## 4. Prediction

In [103]:
hybrid_cannabis_df = pd.concat([cannabis_full_df["Type"], cannabis_full_df["Rating"], cannabis_full_df.loc[:, "Creative":"Pear"]], axis=1)
hybrid_cannabis_df = hybrid_cannabis_df[hybrid_cannabis_df["Type"] == "hybrid"]
hybrid_cannabis_df["Type_is_sativa"] = np.where(hybrid_cannabis_df["Type"] == "sativa", 1, 0)
hybrid_cannabis_df = hybrid_cannabis_df.dropna()

In [104]:
X_hybrid = hybrid_cannabis_df.loc[:, "Rating":"Pear"]

In [105]:
print("Testing with hybrid set")
value_counts = np.bincount(perceptron(X_hybrid, [w1, w2, w3]))
value_counts

Testing with hybrid set


array([   3, 1184])

In [None]:
cannabis_df.head()