In [90]:
import requests
import json
import pandas as pd

# Get all strains
API_KEY = 'uEH5NtA'

request_url = f"http://strainapi.evanbusse.com/{API_KEY}/strains/search/all"
response = requests.get(request_url)
data = json.loads(response.text)


In [91]:
# Get all strains
strain_names = pd.DataFrame(list(data.keys()), columns =['name'])

# Get strain characteristics
strain_values = list(data.values())
strain_values = pd.DataFrame.from_dict(strain_values)

# Add strain name to the dataframe
strains_raw = pd.concat([strain_names, strain_values], axis=1)

# Take a look at the data
strains_raw.head()

Unnamed: 0,name,id,race,flavors,effects
0,Afpak,1,hybrid,"[Earthy, Chemical, Pine]","{'positive': ['Relaxed', 'Hungry', 'Happy', 'S..."
1,African,2,sativa,"[Spicy/Herbal, Pungent, Earthy]","{'positive': ['Euphoric', 'Happy', 'Creative',..."
2,Afternoon Delight,3,hybrid,"[Pepper, Flowery, Pine]","{'positive': ['Relaxed', 'Hungry', 'Euphoric',..."
3,Afwreck,4,hybrid,"[Pine, Earthy, Flowery]","{'positive': ['Relaxed', 'Happy', 'Creative', ..."
4,Agent Orange,5,hybrid,"[Citrus, Orange, Sweet]","{'positive': ['Relaxed', 'Euphoric', 'Happy', ..."


In [92]:
# Get a line for each type of effect
strains = strains_raw.effects.apply(pd.Series) \
    .merge(strains_raw, right_index = True, left_index = True) \
    .drop(["effects"], axis = 1)
strains

Unnamed: 0,positive,negative,medical,name,id,race,flavors
0,"[Relaxed, Hungry, Happy, Sleepy]",[Dizzy],"[Depression, Insomnia, Pain, Stress, Lack of A...",Afpak,1,hybrid,"[Earthy, Chemical, Pine]"
1,"[Euphoric, Happy, Creative, Energetic, Talkative]",[Dry Mouth],"[Depression, Pain, Stress, Lack of Appetite, N...",African,2,sativa,"[Spicy/Herbal, Pungent, Earthy]"
2,"[Relaxed, Hungry, Euphoric, Uplifted, Tingly]","[Dizzy, Dry Mouth, Paranoid]","[Depression, Insomnia, Pain, Stress, Cramps, H...",Afternoon Delight,3,hybrid,"[Pepper, Flowery, Pine]"
3,"[Relaxed, Happy, Creative, Uplifted, Sleepy]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Pain, Stress, Headache, Fatigue, Headaches, M...",Afwreck,4,hybrid,"[Pine, Earthy, Flowery]"
4,"[Relaxed, Euphoric, Happy, Energetic, Uplifted]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Depression, Pain, Stress, Nausea, Headache, H...",Agent Orange,5,hybrid,"[Citrus, Orange, Sweet]"
...,...,...,...,...,...,...,...
1965,"[Relaxed, Happy, Energetic, Uplifted, Focused]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Depression, Insomnia, Pain, Stress, Fatigue]",Zeus OG,2158,hybrid,"[Earthy, Woody, Pine]"
1966,"[Relaxed, Euphoric, Happy, Uplifted, Sleepy]","[Dizzy, Dry Mouth, Dry Eyes]","[Depression, Insomnia, Pain, Stress, Nausea, H...",Zkittlez,2159,indica,"[Sweet, Berry, Grape]"
1967,"[Relaxed, Euphoric, Happy, Sleepy, Giggly]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Depression, Insomnia, Pain, Stress, Lack of A...",Zombie OG,2160,indica,"[Earthy, Pungent, Sweet]"
1968,"[Relaxed, Hungry, Happy, Uplifted, Giggly]",[],"[Insomnia, Pain, Cramps, Lack of Appetite, Mus...",Zoom Pie,2161,indica,"[Berry, Earthy, Pungent]"


In [93]:
# Get the description for every strain_id
unique_ids = strains['id'].unique()
descriptions = []
for id in unique_ids:
    request_url = f"http://strainapi.evanbusse.com/{API_KEY}/strains/data/desc/{id}"
    response = requests.get(request_url)
    data = json.loads(response.text)
    desc = list(data.values())
    descriptions.append({'id':id, 'description':desc})
descriptions = pd.DataFrame(descriptions)
descriptions.head()

Unnamed: 0,id,description
0,1,"[Afpak, named for its direct Afghani and Pakis..."
1,2,[African refers to the indigenous varieties of...
2,3,"[Afternoon Delight, created by Colorado Seed I..."
3,4,[Afwreck is a hybrid cross of Afghani and Trai...
4,5,[Don’t let the name scare you! The only herbic...


In [94]:
# Add description to the df
strains = strains.merge(descriptions, on='id')
strains

Unnamed: 0,positive,negative,medical,name,id,race,flavors,description
0,"[Relaxed, Hungry, Happy, Sleepy]",[Dizzy],"[Depression, Insomnia, Pain, Stress, Lack of A...",Afpak,1,hybrid,"[Earthy, Chemical, Pine]","[Afpak, named for its direct Afghani and Pakis..."
1,"[Euphoric, Happy, Creative, Energetic, Talkative]",[Dry Mouth],"[Depression, Pain, Stress, Lack of Appetite, N...",African,2,sativa,"[Spicy/Herbal, Pungent, Earthy]",[African refers to the indigenous varieties of...
2,"[Relaxed, Hungry, Euphoric, Uplifted, Tingly]","[Dizzy, Dry Mouth, Paranoid]","[Depression, Insomnia, Pain, Stress, Cramps, H...",Afternoon Delight,3,hybrid,"[Pepper, Flowery, Pine]","[Afternoon Delight, created by Colorado Seed I..."
3,"[Relaxed, Happy, Creative, Uplifted, Sleepy]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Pain, Stress, Headache, Fatigue, Headaches, M...",Afwreck,4,hybrid,"[Pine, Earthy, Flowery]",[Afwreck is a hybrid cross of Afghani and Trai...
4,"[Relaxed, Euphoric, Happy, Energetic, Uplifted]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Depression, Pain, Stress, Nausea, Headache, H...",Agent Orange,5,hybrid,"[Citrus, Orange, Sweet]",[Don’t let the name scare you! The only herbic...
...,...,...,...,...,...,...,...,...
1965,"[Relaxed, Happy, Energetic, Uplifted, Focused]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Depression, Insomnia, Pain, Stress, Fatigue]",Zeus OG,2158,hybrid,"[Earthy, Woody, Pine]",[Zeus OG is a hybrid cross between Pineapple O...
1966,"[Relaxed, Euphoric, Happy, Uplifted, Sleepy]","[Dizzy, Dry Mouth, Dry Eyes]","[Depression, Insomnia, Pain, Stress, Nausea, H...",Zkittlez,2159,indica,"[Sweet, Berry, Grape]",[Zkittlez is an indica-dominant mix of Grape A...
1967,"[Relaxed, Euphoric, Happy, Sleepy, Giggly]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Depression, Insomnia, Pain, Stress, Lack of A...",Zombie OG,2160,indica,"[Earthy, Pungent, Sweet]",[If you’re looking to transform into a flesh-e...
1968,"[Relaxed, Hungry, Happy, Uplifted, Giggly]",[],"[Insomnia, Pain, Cramps, Lack of Appetite, Mus...",Zoom Pie,2161,indica,"[Berry, Earthy, Pungent]",[Zoom Pie (also known as Zombie Pie) is a heav...


In [95]:
# Change column order
cols = ['id', 'name', 'race', 'flavors', 'positive', 'negative', 'medical', 'description']
strains = strains[cols]
strains.head(1)

Unnamed: 0,id,name,race,flavors,positive,negative,medical,description
0,1,Afpak,hybrid,"[Earthy, Chemical, Pine]","[Relaxed, Hungry, Happy, Sleepy]",[Dizzy],"[Depression, Insomnia, Pain, Stress, Lack of A...","[Afpak, named for its direct Afghani and Pakis..."


In [96]:
# Combine everything in one column
strains['Combined'] = strains[strains.columns[3:]].apply(
    lambda x: ','.join(x.dropna().astype(str)),
    axis=1
)
strains

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0,id,name,race,flavors,positive,negative,medical,description,Combined
0,1,Afpak,hybrid,"[Earthy, Chemical, Pine]","[Relaxed, Hungry, Happy, Sleepy]",[Dizzy],"[Depression, Insomnia, Pain, Stress, Lack of A...","[Afpak, named for its direct Afghani and Pakis...","['Earthy', 'Chemical', 'Pine'],['Relaxed', 'Hu..."
1,2,African,sativa,"[Spicy/Herbal, Pungent, Earthy]","[Euphoric, Happy, Creative, Energetic, Talkative]",[Dry Mouth],"[Depression, Pain, Stress, Lack of Appetite, N...",[African refers to the indigenous varieties of...,"['Spicy/Herbal', 'Pungent', 'Earthy'],['Euphor..."
2,3,Afternoon Delight,hybrid,"[Pepper, Flowery, Pine]","[Relaxed, Hungry, Euphoric, Uplifted, Tingly]","[Dizzy, Dry Mouth, Paranoid]","[Depression, Insomnia, Pain, Stress, Cramps, H...","[Afternoon Delight, created by Colorado Seed I...","['Pepper', 'Flowery', 'Pine'],['Relaxed', 'Hun..."
3,4,Afwreck,hybrid,"[Pine, Earthy, Flowery]","[Relaxed, Happy, Creative, Uplifted, Sleepy]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Pain, Stress, Headache, Fatigue, Headaches, M...",[Afwreck is a hybrid cross of Afghani and Trai...,"['Pine', 'Earthy', 'Flowery'],['Relaxed', 'Hap..."
4,5,Agent Orange,hybrid,"[Citrus, Orange, Sweet]","[Relaxed, Euphoric, Happy, Energetic, Uplifted]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Depression, Pain, Stress, Nausea, Headache, H...",[Don’t let the name scare you! The only herbic...,"['Citrus', 'Orange', 'Sweet'],['Relaxed', 'Eup..."
...,...,...,...,...,...,...,...,...,...
1965,2158,Zeus OG,hybrid,"[Earthy, Woody, Pine]","[Relaxed, Happy, Energetic, Uplifted, Focused]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Depression, Insomnia, Pain, Stress, Fatigue]",[Zeus OG is a hybrid cross between Pineapple O...,"['Earthy', 'Woody', 'Pine'],['Relaxed', 'Happy..."
1966,2159,Zkittlez,indica,"[Sweet, Berry, Grape]","[Relaxed, Euphoric, Happy, Uplifted, Sleepy]","[Dizzy, Dry Mouth, Dry Eyes]","[Depression, Insomnia, Pain, Stress, Nausea, H...",[Zkittlez is an indica-dominant mix of Grape A...,"['Sweet', 'Berry', 'Grape'],['Relaxed', 'Eupho..."
1967,2160,Zombie OG,indica,"[Earthy, Pungent, Sweet]","[Relaxed, Euphoric, Happy, Sleepy, Giggly]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Depression, Insomnia, Pain, Stress, Lack of A...",[If you’re looking to transform into a flesh-e...,"['Earthy', 'Pungent', 'Sweet'],['Relaxed', 'Eu..."
1968,2161,Zoom Pie,indica,"[Berry, Earthy, Pungent]","[Relaxed, Hungry, Happy, Uplifted, Giggly]",[],"[Insomnia, Pain, Cramps, Lack of Appetite, Mus...",[Zoom Pie (also known as Zombie Pie) is a heav...,"['Berry', 'Earthy', 'Pungent'],['Relaxed', 'Hu..."


In [131]:
strains.tail()

Unnamed: 0,id,name,race,flavors,positive,negative,medical,description,Combined
1965,2158,Zeus OG,hybrid,"[Earthy, Woody, Pine]","[Relaxed, Happy, Energetic, Uplifted, Focused]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Depression, Insomnia, Pain, Stress, Fatigue]",[Zeus OG is a hybrid cross between Pineapple O...,"['Earthy', 'Woody', 'Pine'],['Relaxed', 'Happy..."
1966,2159,Zkittlez,indica,"[Sweet, Berry, Grape]","[Relaxed, Euphoric, Happy, Uplifted, Sleepy]","[Dizzy, Dry Mouth, Dry Eyes]","[Depression, Insomnia, Pain, Stress, Nausea, H...",[Zkittlez is an indica-dominant mix of Grape A...,"['Sweet', 'Berry', 'Grape'],['Relaxed', 'Eupho..."
1967,2160,Zombie OG,indica,"[Earthy, Pungent, Sweet]","[Relaxed, Euphoric, Happy, Sleepy, Giggly]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Depression, Insomnia, Pain, Stress, Lack of A...",[If you’re looking to transform into a flesh-e...,"['Earthy', 'Pungent', 'Sweet'],['Relaxed', 'Eu..."
1968,2161,Zoom Pie,indica,"[Berry, Earthy, Pungent]","[Relaxed, Hungry, Happy, Uplifted, Giggly]",[],"[Insomnia, Pain, Cramps, Lack of Appetite, Mus...",[Zoom Pie (also known as Zombie Pie) is a heav...,"['Berry', 'Earthy', 'Pungent'],['Relaxed', 'Hu..."
1969,2162,ʻAlenuihāhā,hybrid,"[Sweet, Tropical, Pineapple]","[Euphoric, Happy, Creative, Energetic, Aroused]","[Dizzy, Dry Mouth, Paranoid, Anxious]","[Depression, Stress, Fatigue]",[ʻAlenuihāhā is a sativa-dominant hybrid by Pu...,"['Sweet', 'Tropical', 'Pineapple'],['Euphoric'..."


In [98]:
!python -m spacy download en_core_web_lg

[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_lg')


In [99]:
import pandas as pd
from flask import request
import requests
import json
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA
import spacy
import seaborn as sns
nlp = spacy.load('en_core_web_lg')

In [100]:
strains['Combined'][0]

"['Earthy', 'Chemical', 'Pine'],['Relaxed', 'Hungry', 'Happy', 'Sleepy'],['Dizzy'],['Depression', 'Insomnia', 'Pain', 'Stress', 'Lack of Appetite'],['Afpak, named for its direct Afghani and Pakistani landrace heritage, is a beautiful indica-dominant hybrid with light green and deep bluish purple leaves. The taste and aroma are floral with a touch of lemon, making the inhale light and smooth. Its effects start in the stomach by activating the appetite. There is also a potent relaxation that starts in the head and face, and gradually sinks down into the body. Enjoy this strain if you’re suffering from stress, mild physical discomfort, or having difficulty eating. \\xa0']"

In [101]:
type(strains['Combined'][0])

str

In [102]:
# vect = CountVectorizer()
# vect.fit(strains['Combined'])

In [104]:
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)

In [105]:
dtm = tfidf.fit_transform(strains['Combined'])

In [106]:
# dtm = vect.transform(strains['Combined'])
# print(vect.get_feature_names())

In [132]:
dump(tfidf, 'tfidf.joblib', compress=1)

['tfidf.joblib']

In [108]:
print(dtm)

  (0, 4729)	0.06614190116404875
  (0, 1346)	0.20056750901749404
  (0, 1201)	0.20056750901749404
  (0, 1964)	0.1681376414091121
  (0, 1212)	0.15305861169082652
  (0, 3156)	0.09967984827087434
  (0, 2740)	0.13918460388195986
  (0, 4111)	0.1438376428516664
  (0, 4054)	0.03999843507304799
  (0, 1421)	0.11189170309126688
  (0, 501)	0.0659869457551453
  (0, 3816)	0.19601714471690837
  (0, 1857)	0.18267502404782712
  (0, 1546)	0.15616874217924157
  (0, 1975)	0.11391087891862153
  (0, 4008)	0.20056750901749404
  (0, 3463)	0.09000212024244383
  (0, 3234)	0.08431188700332537
  (0, 132)	0.22102847739600542
  (0, 4042)	0.20056750901749404
  (0, 4005)	0.18010654063898265
  (0, 1356)	0.05069233598293972
  (0, 3862)	0.13162972257811226
  (0, 2190)	0.1665416344776096
  (0, 2611)	0.10954769937723487
  :	:
  (1969, 252)	0.05532042083329067
  (1969, 3654)	0.14597971448555414
  (1969, 2606)	0.09149842880016443
  (1969, 4175)	0.03749617441824365
  (1969, 3625)	0.05022284670924727
  (1969, 1581)	0.098890570

In [109]:
# dtm.todense()
# dtm = vect.transform(strains['Combined'])

In [111]:
dtm = pd.DataFrame(dtm.todense(), columns=tfidf.get_feature_names())

In [112]:
dtm.head()

Unnamed: 0,09,10,100,11,12,13,14,15,16,17,...,zen,zest,zestful,zesty,zeta,zion,zkittlez,zombie,zone,ʻalenuihāhā
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [113]:
# doc_len = [len(doc) for doc in strains['Combined']]
# sns.distplot(doc_len);

In [115]:
# Calculate Distance of TF-IDF Vectors
from sklearn.metrics.pairwise import cosine_similarity

dist_matrix  = cosine_similarity(dtm)

In [116]:
df = pd.DataFrame(dist_matrix)

In [117]:
df.shape

(1970, 1970)

In [118]:
strains['Combined'][0]

"['Earthy', 'Chemical', 'Pine'],['Relaxed', 'Hungry', 'Happy', 'Sleepy'],['Dizzy'],['Depression', 'Insomnia', 'Pain', 'Stress', 'Lack of Appetite'],['Afpak, named for its direct Afghani and Pakistani landrace heritage, is a beautiful indica-dominant hybrid with light green and deep bluish purple leaves. The taste and aroma are floral with a touch of lemon, making the inhale light and smooth. Its effects start in the stomach by activating the appetite. There is also a potent relaxation that starts in the head and face, and gradually sinks down into the body. Enjoy this strain if you’re suffering from stress, mild physical discomfort, or having difficulty eating. \\xa0']"

In [119]:
df[0]

0       1.000000
1       0.044468
2       0.060770
3       0.106337
4       0.053862
          ...   
1965    0.036566
1966    0.065819
1967    0.126475
1968    0.099830
1969    0.033919
Name: 0, Length: 1970, dtype: float64

In [120]:
# Grab the row
df[df[0] < 1][0].sort_values(ascending=False)[:5]

0       1.000000
593     0.200106
1383    0.162912
1122    0.156385
946     0.147333
Name: 0, dtype: float64

In [121]:
print(strains['Combined'][0])

['Earthy', 'Chemical', 'Pine'],['Relaxed', 'Hungry', 'Happy', 'Sleepy'],['Dizzy'],['Depression', 'Insomnia', 'Pain', 'Stress', 'Lack of Appetite'],['Afpak, named for its direct Afghani and Pakistani landrace heritage, is a beautiful indica-dominant hybrid with light green and deep bluish purple leaves. The taste and aroma are floral with a touch of lemon, making the inhale light and smooth. Its effects start in the stomach by activating the appetite. There is also a potent relaxation that starts in the head and face, and gradually sinks down into the body. Enjoy this strain if you’re suffering from stress, mild physical discomfort, or having difficulty eating. \xa0']


In [122]:
print(strains['Combined'][593])

['Earthy', 'Sweet', 'Pungent'],['Relaxed', 'Hungry', 'Euphoric', 'Happy', 'Sleepy'],['Dizzy', 'Dry Mouth', 'Paranoid', 'Dry Eyes', 'Anxious'],['Depression', 'Insomnia', 'Pain', 'Stress', 'Lack of Appetite'],['Do-Si-Dos is an indica-dominant hybrid with qualities similar to its parent, Girl Scout Cookies. With glittering trichomes, bright pistils, and lime green and lavender leaves, this strain is a feast for eyes. Its aroma is pungent, sweet, and earthy with slight floral funkiness. These classic OG aromas blend nicely with medical-grade body effects that lean toward the sedative side with the addition of Face Off OG genetics. Enjoy a stoney, in-your-face buzz off the start that melts down over the body, prettifying the consumer with relaxation that emanates outward.\xa0']


In [150]:
dtm.columns

Index(['09', '10', '100', '11', '12', '13', '14', '15', '16', '17',
       ...
       'zen', 'zest', 'zestful', 'zesty', 'zeta', 'zion', 'zkittlez', 'zombie',
       'zone', 'ʻalenuihāhā'],
      dtype='object', length=5000)

In [141]:
from sklearn.neighbors import NearestNeighbors

# Fit on DTM
nn = NearestNeighbors(n_neighbors=4, algorithm='kd_tree')
nn.fit(dtm)

NearestNeighbors(algorithm='kd_tree', leaf_size=30, metric='minkowski',
                 metric_params=None, n_jobs=None, n_neighbors=4, p=2,
                 radius=1.0)

In [142]:
nn.kneighbors([dtm.iloc[0].values])

(array([[0.        , 1.26482729, 1.29389936, 1.29893431]]),
 array([[   0,  593, 1383, 1122]]))

In [144]:
# Query Using kneighbors 
nn.kneighbors([dtm.iloc[256]])

(array([[0.        , 1.21532335, 1.2822921 , 1.28422796]]),
 array([[ 256,  240,   76, 1578]]))

In [145]:
strains['Combined'][256]

"['Earthy', 'Pungent', 'Berry'],['Relaxed', 'Happy', 'Energetic', 'Uplifted', 'Focused'],['Dry Mouth'],['Depression', 'Pain', 'Stress', 'Nausea', 'Headache', 'Fatigue'],['Blue Dot, also known as MGB or Medical Grade Bud,\\xa0is a\\xa0sativa-dominant strain\\xa0originally bred\\xa0in Sonoma, California\\xa0by a medical patient. Blue Dot is a genetic blend of Leda Uno, Northern Lights, and Haze\\xa0. Its resinous buds offer a sweet\\xa0hashy\\xa0flavor profile, and can be expected to finish flowering in 9 to 10 weeks.']"

In [146]:
strains['Combined'][240]

"['Pine', 'Earthy', 'Sage'],['Relaxed', 'Euphoric', 'Happy', 'Uplifted', 'Sleepy'],['Dry Mouth'],['Insomnia', 'Pain', 'Stress', 'Nausea', 'Headache', 'Muscle Spasms'],['Blowfish is an indica-dominant hybrid bred by Dutch Flowers, who crossed a G13 and Oregon Funk hybrid with a G13 and Blue Dot hybrid to create this cannabis strain.']"

In [147]:
from joblib import dump
dump(nn, 'strains_recomender.joblib', compress=1)

['strains_recomender.joblib']

In [185]:
strains.head()

Unnamed: 0,id,name,race,flavors,positive,negative,medical,description,Combined
0,1,Afpak,hybrid,"[Earthy, Chemical, Pine]","[Relaxed, Hungry, Happy, Sleepy]",[Dizzy],"[Depression, Insomnia, Pain, Stress, Lack of A...","[Afpak, named for its direct Afghani and Pakis...","['Earthy', 'Chemical', 'Pine'],['Relaxed', 'Hu..."
1,2,African,sativa,"[Spicy/Herbal, Pungent, Earthy]","[Euphoric, Happy, Creative, Energetic, Talkative]",[Dry Mouth],"[Depression, Pain, Stress, Lack of Appetite, N...",[African refers to the indigenous varieties of...,"['Spicy/Herbal', 'Pungent', 'Earthy'],['Euphor..."
2,3,Afternoon Delight,hybrid,"[Pepper, Flowery, Pine]","[Relaxed, Hungry, Euphoric, Uplifted, Tingly]","[Dizzy, Dry Mouth, Paranoid]","[Depression, Insomnia, Pain, Stress, Cramps, H...","[Afternoon Delight, created by Colorado Seed I...","['Pepper', 'Flowery', 'Pine'],['Relaxed', 'Hun..."
3,4,Afwreck,hybrid,"[Pine, Earthy, Flowery]","[Relaxed, Happy, Creative, Uplifted, Sleepy]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Pain, Stress, Headache, Fatigue, Headaches, M...",[Afwreck is a hybrid cross of Afghani and Trai...,"['Pine', 'Earthy', 'Flowery'],['Relaxed', 'Hap..."
4,5,Agent Orange,hybrid,"[Citrus, Orange, Sweet]","[Relaxed, Euphoric, Happy, Energetic, Uplifted]","[Dizzy, Dry Mouth, Paranoid, Dry Eyes]","[Depression, Pain, Stress, Nausea, Headache, H...",[Don’t let the name scare you! The only herbic...,"['Citrus', 'Orange', 'Sweet'],['Relaxed', 'Eup..."


In [186]:
X_train = strains.drop(['name', 'Combined', 'id'], axis=1)
y_train = strains['name']
X_train.shape, y_train.shape

((1970, 6), (1970,))

In [187]:
print('Vectorization...')
X_train = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y_train = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] =1

Vectorization...


NameError: name 'sentences' is not defined

In [179]:
max_features = 1000
maxlen = 80
batch_size= 30

In [180]:
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding
from tensorflow.keras.layers import LSTM
from sklearn.model_selection import train_test_split

In [181]:
X_train, y_train, X_test, y_test = train_test_split(X_train, y_train, test_size=0.20, random_state=42)

In [182]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((1576, 6), (1576,), (394, 6), (394,))

In [183]:
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

1576 train sequences
1576 test sequences


In [184]:
print('Pad Sequences (samples x time)')
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape: ', X_train.shape)
print('X_test shape: ', X_test.shape)

Pad Sequences (samples x time)


ValueError: invalid literal for int() with base 10: 'race'