Lambda School Data Science

*Unit 3, Med Cabinet Build*

---

In [1]:
import pandas as pd
import numpy as np


pd.set_option('display.max_rows', 500)
df = pd.read_csv('cannabis.csv')
df = df.replace('None', np.NaN)
df = df.replace('Dry', np.NaN)
df = df.dropna()

df.head()

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Floral,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy,Herbal,Sage,Wood",1024 is a sativa-dominant hybrid bred in Spain...
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%..."


In [2]:
# df.loc[df['SEARCH_HERE'].isnull() == True]

df.isnull().sum()

Strain         0
Type           0
Rating         0
Effects        0
Flavor         0
Description    0
dtype: int64

## Step 1 - Effects/Flavor Search Preparation

We need to break out the effects for each strain into something searchable. To do this we're going to grab the number of unique entries, this way we can know how any new columns we'll have for encoding.

In [3]:
# This is what one row looks like on efects

print(df['Effects'][0])

# Python considers this a string

print(type(df['Effects'][0]))

Creative,Energetic,Tingly,Euphoric,Relaxed
<class 'str'>


In [4]:
# Pandas has an option to turn strings in a series into lists through the split method.
# Since methods run across the whole series we need to tell it to focus on the strings for each and split that,
# otherwise it thinks we're trying to split the series, which makes no sense.

df['Effects_List'] = df['Effects'].str.split(',')
df['Flavor_List']  = df['Flavor'].str.split(',')

df['Effects_List']

0       [Creative, Energetic, Tingly, Euphoric, Relaxed]
1         [Relaxed, Aroused, Creative, Happy, Energetic]
2        [Uplifted, Happy, Relaxed, Energetic, Creative]
3          [Tingly, Creative, Hungry, Relaxed, Uplifted]
4        [Happy, Relaxed, Euphoric, Uplifted, Talkative]
                              ...                       
2346     [Happy, Uplifted, Relaxed, Euphoric, Energetic]
2347        [Relaxed, Happy, Euphoric, Uplifted, Sleepy]
2348       [Relaxed, Sleepy, Talkative, Euphoric, Happy]
2349          [Relaxed, Sleepy, Euphoric, Happy, Hungry]
2350          [Hungry, Relaxed, Uplifted, Happy, Sleepy]
Name: Effects_List, Length: 2187, dtype: object

In [5]:
# Now python sees the field as a list.

print(type(df['Effects_List'][0]))
print(df['Effects_List'][0])

<class 'list'>
['Creative', 'Energetic', 'Tingly', 'Euphoric', 'Relaxed']


In [6]:
# From here we can see that while some are below 5, none surpass it.
# We might need to do something about that, but for now we can ignore it.

df['Effects_List'].str.len()

0       5
1       5
2       5
3       5
4       5
       ..
2346    5
2347    5
2348    5
2349    5
2350    5
Name: Effects_List, Length: 2187, dtype: int64

In [7]:
# So we can see we have 15 unique values. Now we need to encode this.

print(len(df['Effects_List'].apply(pd.Series).stack().value_counts())
     ,df['Effects_List'].apply(pd.Series).stack().value_counts()
     ,len(df['Flavor_List'].apply(pd.Series).stack().value_counts())
     ,df['Flavor_List'].apply(pd.Series).stack().value_counts()
     )

13 Happy        1821
Relaxed      1682
Euphoric     1594
Uplifted     1468
Creative      722
Sleepy        721
Energetic     620
Focused       581
Hungry        459
Talkative     348
Tingly        331
Giggly        277
Aroused       191
dtype: int64 53 Earthy        1096
Sweet         1047
Citrus         521
Pungent        445
Berry          353
Pine           298
Wood           285
Floral         267
Diesel         240
Spicy          227
Herbal         227
Lemon          190
Skunk          169
Tropical       153
Blueberry      153
Grape          127
Orange          76
Cheese          67
Pepper          59
Lime            52
Strawberry      47
Minty           41
Pineapple       41
Sage            39
Chemical        38
Grapefruit      38
Lavender        37
Vanilla         34
Fruity          33
Mango           33
Honey           32
Ammonia         28
Nutty           24
Coffee          24
Menthol         22
Butter          19
Tea             18
Mint            18
Rose            16
Apple 

In [8]:
df['Flavor_List'].apply(pd.Series).stack().value_counts()

Earthy        1096
Sweet         1047
Citrus         521
Pungent        445
Berry          353
Pine           298
Wood           285
Floral         267
Diesel         240
Spicy          227
Herbal         227
Lemon          190
Skunk          169
Tropical       153
Blueberry      153
Grape          127
Orange          76
Cheese          67
Pepper          59
Lime            52
Strawberry      47
Minty           41
Pineapple       41
Sage            39
Chemical        38
Grapefruit      38
Lavender        37
Vanilla         34
Fruity          33
Mango           33
Honey           32
Ammonia         28
Nutty           24
Coffee          24
Menthol         22
Butter          19
Tea             18
Mint            18
Rose            16
Apple           16
Apricot          8
Tobacco          8
Violet           7
Tar              7
Chestnut         6
Peach            5
Sour             4
Pear             3
Plum             2
Tangy            1
Candy            1
Tart             1
Melon       

## Step 2 - Encoding

In [9]:
# These two do the same thing, but they don't work with lists.
# They also only work with EXACT MATCHES.

df.loc[df['Effects'] == 'Creative']

df.loc[df['Effects'].isin(['Creative'])]

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description,Effects_List,Flavor_List
369,Brain-Candy,hybrid,5.0,Creative,Sweet,Brain Candy by Insanity Strains is a handy hyb...,[Creative],[Sweet]


### MultiLabelBinarizer

In [10]:
from sklearn.preprocessing import MultiLabelBinarizer


mlb = MultiLabelBinarizer()

print(
pd.DataFrame(mlb.fit_transform(df['Effects_List'])
            ,columns = mlb.classes_
            ,index   = df.index
            )
     )

      Aroused  Creative  Energetic  Euphoric  Focused  Giggly  Happy  Hungry  \
0           0         1          1         1        0       0      0       0   
1           1         1          1         0        0       0      1       0   
2           0         1          1         0        0       0      1       0   
3           0         1          0         0        0       0      0       1   
4           0         0          0         1        0       0      1       0   
...       ...       ...        ...       ...      ...     ...    ...     ...   
2346        0         0          1         1        0       0      1       0   
2347        0         0          0         1        0       0      1       0   
2348        0         0          0         1        0       0      1       0   
2349        0         0          0         1        0       0      1       1   
2350        0         0          0         0        0       0      1       1   

      Relaxed  Sleepy  Talkative  Tingl

In [11]:
mlb_effects_df = pd.DataFrame(mlb.fit_transform(df['Effects_List'])
                              ,columns = mlb.classes_
                              ,index   = df.index
                              )

mlb_flavor_df = pd.DataFrame(mlb.fit_transform(df['Flavor_List'])
                            ,columns = mlb.classes_
                            ,index   = df.index
                            )
print(mlb_effects_df.head()
     ,mlb_flavor_df.head()
     )

   Aroused  Creative  Energetic  Euphoric  Focused  Giggly  Happy  Hungry  \
0        0         1          1         1        0       0      0       0   
1        1         1          1         0        0       0      1       0   
2        0         1          1         0        0       0      1       0   
3        0         1          0         0        0       0      0       1   
4        0         0          0         1        0       0      1       0   

   Relaxed  Sleepy  Talkative  Tingly  Uplifted  
0        1       0          0       1         0  
1        1       0          0       0         0  
2        1       0          0       0         1  
3        1       0          0       1         1  
4        1       0          1       0         1      Ammonia  Apple  Apricot  Berry  Blueberry  Butter  Candy  Cheese  Chemical  \
0        0      0        0      0          0       0      0       0         0   
1        0      0        0      0          0       0      0       0        

## Step 3 - Merge

In [12]:
mlb_effects_df = mlb_effects_df.merge(mlb_flavor_df
                                     ,left_index = True
                                     ,right_index = True
                                     )
mlb_df         = df.merge(mlb_effects_df
                         ,left_index = True
                         ,right_index = True
                         )

mlb_df         = mlb_df.drop(['Effects_List', 'Flavor_List'], axis = 1)

## Step 4 - SQL

In [13]:
import sqlite3
import SQL

mlb_df.to_sql('cannabis'
             ,con       = sqlite3.Connection('cannabis.sqlite3')
             ,if_exists = 'replace'
             )

sql_df = SQL.SQL('cannabis.sqlite3')

## Part 5 - Search Bar

In [14]:
dict_converter = {'indica': 0
                 ,'sativa': 1
                 ,'hybrid': 2
                 }

mlb_df['Type'] = mlb_df['Type'].replace(dict_converter)
mlb_df         = mlb_df.drop(['Description', 'Rating', 'Effects', 'Flavor'], axis = 1)


# For printing out the new dataframe
mlb_df.to_csv('cannabis_slim.csv', index = False)

mlb_df.head()

Unnamed: 0,Strain,Type,Aroused,Creative,Energetic,Euphoric,Focused,Giggly,Happy,Hungry,...,Sweet,Tangy,Tar,Tart,Tea,Tobacco,Tropical,Vanilla,Violet,Wood
0,100-Og,2,0,1,1,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,98-White-Widow,2,1,1,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
2,1024,1,0,1,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
3,13-Dawgs,2,0,1,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,24K-Gold,2,0,0,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
import SQL


sqldf = SQL.SQL('cannabis.sqlite3')

# sqldf.query('''
# SELECT
#     *
# FROM
#     cannabis
# ''')

In [16]:
# Search strain
input_value = 'Wonder-Woman'

mlb_df.loc[mlb_df['Strain'] == input_value]

Unnamed: 0,Strain,Type,Aroused,Creative,Energetic,Euphoric,Focused,Giggly,Happy,Hungry,...,Sweet,Tangy,Tar,Tart,Tea,Tobacco,Tropical,Vanilla,Violet,Wood
2320,Wonder-Woman,2,0,1,0,1,0,0,1,0,...,1,0,0,0,0,0,1,0,0,0


In [17]:
# Search type
input_type = 'hybrid'

mlb_df.loc[mlb_df['Type'] == input_type]

  result = method(y)


Unnamed: 0,Strain,Type,Aroused,Creative,Energetic,Euphoric,Focused,Giggly,Happy,Hungry,...,Sweet,Tangy,Tar,Tart,Tea,Tobacco,Tropical,Vanilla,Violet,Wood


In [18]:
# Search effects/flavor
input_value = 'Sweet'

mlb_df.loc[mlb_df[input_value] == 1]

Unnamed: 0,Strain,Type,Aroused,Creative,Energetic,Euphoric,Focused,Giggly,Happy,Hungry,...,Sweet,Tangy,Tar,Tart,Tea,Tobacco,Tropical,Vanilla,Violet,Wood
0,100-Og,2,0,1,1,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
6,3-Kings,2,0,0,0,1,0,0,1,1,...,1,0,0,0,0,0,0,0,0,0
9,3X-Crazy,0,0,0,0,1,0,0,1,0,...,1,0,0,0,0,0,0,0,0,0
17,9-Pound-Hammer,0,0,0,0,1,0,0,1,1,...,1,0,0,0,0,0,0,0,0,0
19,A-10,0,0,0,0,0,1,0,1,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2343,Zellys-Gift,1,0,0,1,1,0,0,1,0,...,1,0,0,0,0,0,0,0,0,0
2344,Zen,2,0,0,1,1,1,0,1,0,...,1,0,0,0,0,0,0,0,0,0
2347,Zkittlez,0,0,0,0,1,0,0,1,0,...,1,0,0,0,0,0,0,0,0,0
2348,Zombie-Kush,0,0,0,0,1,0,0,1,0,...,1,0,0,0,0,0,0,0,0,0


### Multi search

In [19]:
input_value = 'Wonder-Woman'
input_type  = 'hybrid'
input_effects_and_flavor = ['Sweet', 'Sour']


ans_df      = mlb_df

# Search strain
ans_df      = ans_df.loc[ans_df['Strain'] == input_value]

# Search type
ans_df      = ans_df.loc[ans_df['Type'] == input_type]

# Search effects/flavor
for thing in input_effects_and_flavor:
    ans_df = ans_df.loc[ans_df[thing] == 1]



ans_df

Unnamed: 0,Strain,Type,Aroused,Creative,Energetic,Euphoric,Focused,Giggly,Happy,Hungry,...,Sweet,Tangy,Tar,Tart,Tea,Tobacco,Tropical,Vanilla,Violet,Wood


In [20]:
# Search function


def search(input_strain = '', input_type = '', input_effects_and_flavor = ''):

    import pandas as pd


    df = pd.read_csv('cannabis_slim.csv')
    # Search strain
    if input_strain != '':
        df      = df.loc[df['Strain'] == input_value]

    # Search type
    if input_type != '':
        df      = df.loc[df['Type'] == input_type]

    # Search effects/flavor
    if input_effects_and_flavor != '':
        for thing in input_effects_and_flavor:
            df  = df.loc[df[thing] == 1]
    
    if len(df) == 0:
        return 'No results'
    else:
        return df

In [21]:
search(input_strain = 'Wonder-Woman'
      ,input_type   = 'hybrid'
      ,input_effects_and_flavor = ['Sweet', 'Sour']
      )

'No results'

In [22]:
import function


# function.search(input_type = 'hybrid')

In [34]:
data    = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
    
    
input_feelings = [34, 6]
input_taste    = [3, 4]


for feeling in input_feelings:
    data[0][feeling] = 1

for taste in input_taste:
    data[0][taste] = 1

print(data)

[[0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]


In [48]:
a = zip(mlb_df.columns, data[0])

a = set(a)

In [49]:
a

{('Ammonia', 0),
 ('Apple', 0),
 ('Apricot', 0),
 ('Aroused', 0),
 ('Berry', 0),
 ('Blueberry', 0),
 ('Butter', 0),
 ('Candy', 0),
 ('Cheese', 0),
 ('Chemical', 0),
 ('Chestnut', 0),
 ('Citrus', 0),
 ('Coffee', 0),
 ('Creative', 1),
 ('Diesel', 0),
 ('Earthy', 0),
 ('Energetic', 1),
 ('Euphoric', 0),
 ('Floral', 0),
 ('Focused', 1),
 ('Fruity', 0),
 ('Giggly', 0),
 ('Grape', 0),
 ('Grapefruit', 0),
 ('Happy', 0),
 ('Herbal', 0),
 ('Honey', 1),
 ('Hungry', 0),
 ('Lavender', 0),
 ('Lemon', 0),
 ('Lime', 0),
 ('Mango', 0),
 ('Melon', 0),
 ('Menthol', 0),
 ('Mint', 0),
 ('Minty', 0),
 ('Nutty', 0),
 ('Orange', 0),
 ('Peach', 0),
 ('Pear', 0),
 ('Pepper', 0),
 ('Pine', 0),
 ('Pineapple', 0),
 ('Plum', 0),
 ('Pungent', 0),
 ('Relaxed', 0),
 ('Rose', 0),
 ('Sage', 0),
 ('Skunk', 0),
 ('Sleepy', 0),
 ('Sour', 0),
 ('Spicy', 0),
 ('Strain', 0),
 ('Strawberry', 0),
 ('Sweet', 0),
 ('Talkative', 0),
 ('Tangy', 0),
 ('Tar', 0),
 ('Tart', 0),
 ('Tea', 0),
 ('Tingly', 0),
 ('Tobacco', 0),
 ('Tropica