In [269]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import compress

from datetime import datetime
from dateutil.parser import parse

import math
import os
import copy
import pickle

Create `conversion` dictionary, containing conversion from every metric to grams

In [270]:
conversion_dict = dict({'g': 1,
                         'kg': 1000,
                         'kilo': 1000,
                         'lb': 453.592,
                         'mcg': 1e-06,
                         'mg': 0.001,
                         'ounce': 28.35,
                         'oz': 28.35,
                         'pound': 453.592,
                         'ug': 1e-06,
                         '¬ug': 1e-06,
                         '¬µg': 1e-06,
                         'µg': 1e-06,
                          'ml': 1,
                          0: 0})

In [271]:
data = pd.read_csv("marijuana_unique.csv")

In [272]:
data = data.fillna(0)

In [273]:
data.head()

Unnamed: 0,vendor_name,bitcoin_amt,USD,date,product_description,Count,can_be_vaped,is_edible,THC purity (%),Quantity/mass,Unit,Unit mass in grams,Total mass in grams
0,Cannabuds,-1.0,51.5,2018-11-01 0:00:00,4g Shatter! Lemon Tree! New Vendor Special!,0.0,0,0.0,0.0,0.0,0,4.0,0.0
1,Cannabuds,-1.0,11.67,2018-07-04 0:00:00,1g Rosin! Lemon Tree! (Nug run) New Vendor Spe...,0.0,0,0.0,0.0,0.0,0,1.0,0.0
2,Cannabuds,-1.0,138.0,2018-12-08 0:00:00,1 oz Master Kush! New Vendor Special,0.0,0,0.0,0.0,1.0,oz,0.0,0.0
3,Cannabuds,-1.0,108.0,2018-08-07 0:00:00,1 oz Fruity Pebbles OG! New Vendor Special,0.0,0,0.0,0.0,1.0,oz,0.0,0.0
4,Cannabuds,-1.0,60.0,2018-12-23 0:00:00,5g Shatter! Lemon Tree! New Vendor Special!,0.0,0,0.0,0.0,0.0,0,5.0,0.0


In [274]:
#Take out all deleted listings
data = data[data.product_description != 0]

In [275]:
#Make a list of all the wanted units
_, unique_units = pd.factorize(data['Unit'])

print ('Unique units: ', unique_units)

units_list = [0, 'oz', 'ml', 'lb', 'mg', 'kg', 'g']

print ('Unit list: ', units_list)

Unique units:  Index([        0,      'oz',      'ml',      'lb',     'bar',      'mg',
       'capsule',      'kg',     'ltr',       'g',    'seed',       '0',
          'pill'],
      dtype='object')
Unit list:  [0, 'oz', 'ml', 'lb', 'mg', 'kg', 'g']


In [276]:
#Get data that have units in unit list
data = data[data.Unit.isin(units_list)]

In [277]:
#Set new names for columns in dataframe
data.columns = ['vendor_name', 'bitcoin_amt', 'USD', 'date', 'product_description',
       'Count', 'can_be_vaped', 'is_edible', 'THC_purity', 'Quantity',
       'Unit', 'Unit_mass_in_grams', 'Total_mass_in_grams']

In [278]:
#convert unit mass column to float
data.Unit_mass_in_grams = data.Unit_mass_in_grams.astype(float)

In [279]:
#define function to get the conversion for each rate of each row
def getTotalMassWithUnit(count,quantity,unit):
    if count == 0:
        totalMass = quantity * conversion_dict[unit]
    elif quantity == 0:
        totalMass = count * conversion_dict[unit]
    else:
        totalMass = (count * quantity) * conversion_dict[unit]
        
    return totalMass

In [280]:
#define function to get the conversion for each rate of each row
def getTotalMassWithGram(count,quantity,gram):
    if count == 0 and quantity == 0:
        totalMass = gram
    elif quantity == 0 and count != 0:
        totalMass = count * gram
    elif count == 0 and quantity != 0:
        totalMass = quantity * gram
        
    return totalMass

In [281]:
#for every row that has a Unit in units list and a total mass of 0
for index, row in data.iterrows():
    if (row['Unit'] in units_list and row['Total_mass_in_grams'] == 0):
        data.at[index, 'Total_mass_in_grams'] = getTotalMassWithUnit(row['Count'], row['Quantity'],row['Unit'])

In [282]:
for index, row in data.iterrows():
    if row['Unit_mass_in_grams'] != 0:
        data.at[index, 'Total_mass_in_grams'] = getTotalMassWithGram(row['Count'], row['Quantity'],row['Unit_mass_in_grams'])

In [284]:
# len (data[data['Total_mass_in_grams'] == 0])

277

In [263]:
#If values in columns 5,6,7,9,10 == 0 then set the 'Unit mass in grams' column = 'Total mass in grams'

#FOR EVERY ROW IN DATA
# for index, row in data.iterrows():
#     #IF ROWS 5 - 10 ARE ZEROS, SET 'TOTAL' = 'UNIT'
#     if row['Count'] == 0 and row['can_be_vaped'] == 0 and row['is_edible'] == 0 and row ['THC_purity'] == 0 and row['Quantity'] == 0 and row['Unit'] == 0:
#         data.at[index, 'Total_mass_in_grams'] = data.at[index, 'Unit_mass_in_grams']
#     #ELSE IF ROW DOESNT EQUAL ZERO GET MASS OF THAT ROW
#     elif row['Unit_mass_in_grams'] != 0:
#         data.at[index, 'Total_mass_in_grams'] = getTotalMassWithGram(row['Count'], row['Quantity'],row['Unit_mass_in_grams'])

In [215]:
#for the remaining listings in where Total Mass = 0 
    #if unit mass != 0, get total mass
# for index, row in subset_data.iterrows():
#     if (row['Unit_mass_in_grams'] != 0):
#         data.at[index, 'Total_mass_in_grams'] = row['Count'] * row['Quantity'] * row ['Unit_mass_in_grams']

In [285]:
#Export dataframe as csv
data.to_csv(r'Total_Marijuana_Mass.csv')