## Is it possible to predict a semi-accurate price for a tab of lsd, based on its dose measurement, rating, and origin?

In [547]:
darknet_data_path = './datasets/agora_fixed.csv'
#Import everything needed for the project.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [548]:
# Clean it up for only LSD items
darkDf = pd.read_csv(darknet_data_path)
darkDf
drugsDf = darkDf[darkDf[" Category"].str.contains("Drugs")]
acidDf = drugsDf[drugsDf[" Category"].str.contains("LSD")]
acidDf = acidDf[acidDf[" Item"].str.contains("LSD|lsd|Lsd|Acid|acid|ACID|ug|UG|mcg")]
acidDf = acidDf[~acidDf[" Item"].str.contains("LAD|Bottle")]
acidDf

Unnamed: 0,Vendor,Category,Item,Item Description,Price,Origin,Destination,Rating,Remarks
14426,FelixUK,Drugs/Psychedelics/LSD,FelixUK (10) - 110ug Hofmann Blotter Dutch LSD...,10 - 110ug Hofmann Blotters Dutch LSD We ship...,0.14345433142857145 BTC,UK,Worldwide,~5/5,
14427,MEGA_PHARMA,Drugs/Psychedelics/LSD,LSD acid blotter (130+ ug) 2 blotters,-----------------------LSD BLOTTERS-----------...,0.04109156 BTC,World,Worldwide,4.7/5,
14428,MEGA_PHARMA,Drugs/Psychedelics/LSD,LSD acid blotter (130+ ug) 1 blotter,-----------------------LSD BLOTTERS-----------...,0.021238328181818183 BTC,World,Worldwide,4.7/5,
14429,Dekay,Drugs/Psychedelics/LSD,2 LSD Blotters,PLEASE NOTE - ALL ORDERS MUST FE / FINALIZE EA...,0.06896385 BTC,Australia,Australia,~5/5,
14430,value.lucid,Drugs/Psychedelics/LSD,25 x Alex Grey's Hofmann LSD Acid (25 x 100 - ...,I offer you 25 Pieces of Alex Grey's Albert Ho...,0.4764299111111112 BTC,Germany,Everywhere Worldwide any Destination,5.0/5,
14431,value.lucid,Drugs/Psychedelics/LSD,1 x Alex Grey's Hofmann LSD Acid (1 x 100 - 12...,I offer you 1 single Piece of Alex Grey's Albe...,0.02418582220588236 BTC,Germany,Everywhere Worldwide any Destination,5.0/5,
14433,MrSunshine,Drugs/Psychedelics/LSD,FREE DRUGS buy 2 hits of LSD get 1 of Mescalin...,"2 hits of Fresh Laid ""POTENT"" Needlepoint 115-...",0.04278990590909092 BTC,USA,,4.7/5,
14434,HigherGroundsNL,Drugs/Psychedelics/LSD,10 pcs High Quality LSD Blotters,Here we got some nice LSD blotters from Hollan...,0.13974562 BTC,Netherlands,,~5/5,
14435,value.lucid,Drugs/Psychedelics/LSD,15 x Alex Grey's Hofmann LSD Acid (15 x 100 - ...,I offer you 15 Pieces of Alex Grey's Albert Ho...,0.28554621927835044 BTC,Germany,Everywhere Worldwide any Destination,5.0/5,
14436,SpiceGirl,Drugs/Psychedelics/LSD,100 tab 120ug LSD,Yummy clean LSD. Blotter art differs from pict...,1.778981541764706 BTC,Australia,Australia,~5/5,Average price may be skewed outliar > .5 BTC f...


In [549]:
# Get rid of unnecessary Columns
acidDf = acidDf.drop(["Vendor", " Category", " Item Description", " Destination", " Remarks"], axis=1)


# Extract Dose From Item Name
import re
def getDose(row):
    itemName = row[" Item"]
    regex = "\d*(?=(ug|Âµg|mcg))"
    dosage = re.search(regex, itemName)
    if dosage and dosage.group().isdigit():
        return dosage.group()

def getQuantity(row):
    itemName = row[" Item"]
    regex = "\d*(?=(x|X| x|  x| X| tab| Tab| hits| hit| pcs| doses| blotters| blotter| Blots| stamps))"
    dosage = re.search(regex, itemName)
    if dosage and dosage.group().isdigit() and not dosage.group() == "000":
        return dosage.group()
    else:
        regex = "\d*(?!(x|x |X|X ))"
        dosage = re.search(regex, itemName)
        if dosage and dosage.group().isdigit():
            return dosage.group()


acidDf["Dose"] = acidDf.apply(lambda row: getDose(row), axis=1)
acidDf["Quantity"] = acidDf.apply(lambda row: getQuantity(row), axis=1)

acidDf.dropna(subset=['Quantity'], inplace=True)
acidDf.dropna(subset=['Dose'], inplace=True)
acidDf


Unnamed: 0,Item,Price,Origin,Rating,Dose,Quantity
14430,25 x Alex Grey's Hofmann LSD Acid (25 x 100 - ...,0.4764299111111112 BTC,Germany,5.0/5,125,25
14431,1 x Alex Grey's Hofmann LSD Acid (1 x 100 - 12...,0.02418582220588236 BTC,Germany,5.0/5,125,1
14435,15 x Alex Grey's Hofmann LSD Acid (15 x 100 - ...,0.28554621927835044 BTC,Germany,5.0/5,125,15
14436,100 tab 120ug LSD,1.778981541764706 BTC,Australia,~5/5,120,100
14437,5 tab 120ug LSD,0.1497232238461538 BTC,Australia,~5/5,120,5
14439,10 x Dutch LSD Dalai Lama 225ug LAB TESTED!,0.27969047500000005 BTC,Germany,4.8/5,225,10
14442,5 x 225ug LSDJubilee Bunny Hofmanns Needlepoin...,0.13396833666666663 BTC,Germany,5.0/5,225,5
14444,100 x 225ug LSDJubilee Bunny Hofmanns Needlepo...,2.023297351333333 BTC,Germany,5.0/5,225,100
14445,10 x Alex Grey's Hofmann LSD Acid (10 x 100 - ...,0.18924345839080461 BTC,Germany,5.0/5,125,10
14446,25 x Dutch LSD Dalai Lama 225ug LAB TESTED!,0.7128599865957449 BTC,Germany,4.8/5,225,25


In [550]:
#convert BTC to USD and Bring Quantity to just 1 tab
avgBtcPrice = 300

def convertPrice(row):
    regex = "\*(?=( BTC))"
    btcPrice = row[" Price"][0:row[" Price"].index("B")]
    if float(row["Quantity"]) == 0:
        print(row)
    return ((float(btcPrice) * avgBtcPrice) / float(row["Quantity"]))

acidDf[" Price"] = acidDf.apply(lambda row: convertPrice(row), axis=1)
acidDf["Quantity"] = 1


In [551]:
# Without Taking Dosage into Consideration
# A 100ug tab costs less than a 300ug tab
acidDf


Unnamed: 0,Item,Price,Origin,Rating,Dose,Quantity
14430,25 x Alex Grey's Hofmann LSD Acid (25 x 100 - ...,5.717159,Germany,5.0/5,125,1
14431,1 x Alex Grey's Hofmann LSD Acid (1 x 100 - 12...,7.255747,Germany,5.0/5,125,1
14435,15 x Alex Grey's Hofmann LSD Acid (15 x 100 - ...,5.710924,Germany,5.0/5,125,1
14436,100 tab 120ug LSD,5.336945,Australia,~5/5,120,1
14437,5 tab 120ug LSD,8.983393,Australia,~5/5,120,1
14439,10 x Dutch LSD Dalai Lama 225ug LAB TESTED!,8.390714,Germany,4.8/5,225,1
14442,5 x 225ug LSDJubilee Bunny Hofmanns Needlepoin...,8.038100,Germany,5.0/5,225,1
14444,100 x 225ug LSDJubilee Bunny Hofmanns Needlepo...,6.069892,Germany,5.0/5,225,1
14445,10 x Alex Grey's Hofmann LSD Acid (10 x 100 - ...,5.677304,Germany,5.0/5,125,1
14446,25 x Dutch LSD Dalai Lama 225ug LAB TESTED!,8.554320,Germany,4.8/5,225,1


In [552]:
# Normalizing the Price to Dosage
standardTabDose = 120

def normalizePrice(row):
    ratio = standardTabDose / float(row["Dose"])
    return round((float(row[" Price"]) * ratio), 2)

normalizedDf = acidDf
normalizedDf[" Price"] = normalizedDf.apply(lambda row: normalizePrice(row), axis=1)
normalizedDf["Dose"] = standardTabDose

#normalizedDf
normalizedDf.describe()

Unnamed: 0,Price,Dose,Quantity
count,1168.0,1168.0,1168.0
mean,6.379443,120.0,1.0
std,6.56675,0.0,0.0
min,0.01,120.0,1.0
25%,3.58,120.0,1.0
50%,5.155,120.0,1.0
75%,7.2625,120.0,1.0
max,121.61,120.0,1.0


In [553]:
acidDf = acidDf.drop([" Item", "Quantity"], axis=1)
acidDf[' Price'] = acidDf[' Price'].apply(lambda x: round(x,0))
acidDf = pd.get_dummies(acidDf, prefix="is")

In [554]:
# LETS GET TO PREDICTING
#DECISION TREE
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.metrics import f1_score

labels = acidDf[" Price"]
acidDf = acidDf.drop(" Price", axis=1)

# acidDf = acidDf
# labels = labels

X_train, X_test, y_train, y_test = train_test_split(acidDf, labels)

clf = tree.DecisionTreeRegressor()
# Call clf.fit(), and pass in X_train and y_train as parameters
clf.fit(X_train, y_train)

test_pred = clf.predict(X_test.values.reshape(1,-1)) # Use the .predict() method to have our model create predictions on the X_test variable

# Finally, pass in test_pred and y_test to the f1_score() object to get an f1_score!
#f1_score(test_pred, y_test, average="weighted")
clf.score(test_pred, y_test)


ValueError: Number of features of the model must match the input. Model n_features is 98 and input n_features is 28616 

In [None]:
#KNN
from sklearn.neighbors import KNeighborsClassifier
KNN = KNeighborsClassifier(n_neighbors=25)
KNN.fit(X_train, y_train)
predictions = KNN.predict(X_test)
from sklearn.metrics import accuracy_score

acc = accuracy_score(predictions, y_test)
acc