In [1]:

"""
This library interfaces with the pickled model.
"""

import os
import pickle
import pandas as pd
import numpy as np
import spacy
from spacy.tokens import Doc


###################
##BUILD PREDICTOR##
###################

class Predictor():
    def __init__(self, model=None, vectorizer=None):
        self.model = load_file('model')
        self.vectorizer = Vectorizer()
        
    def transform(self, raw_input, verbose=False):
        self.raw_input = raw_input
        vinput = self.vectorizer.transform(raw_input).reshape(1, -1)
        if verbose:
            print(vinput)

        return vinput

    def predict(self, user_input=None, size=5):
        if self.data_available(user_input):
            if user_input:
                distances, indices = self.model.query(
                    self.transform(user_input),
                    k=size)
                return indices[0], distances[0]
            else:
                distances, indices = self.model.query(
                    self.vectorized_input,
                    k=size)
                return indices[0], distances[0]
        else:
            raise Error

    def data_available(self, user_input):
        if user_input is None and self.vectorized_input is None:
            raise NoDataProvided
        else:
            if type(user_input) == str:
                self.transform(user_input, verbose=False)
            elif user_input:
                self.vectorized_input = user_input
            return True

class Vectorizer():
    def __init__(self):
        pass

    def transform(self, input_string):
        vectorized_input = get_vector_from_doc(
            tokenize_text(input_string)
        )
        return vectorized_input.reshape(1,-1)

####################
###Error Handling###
####################

class Error(Exception):
    """Base class for Custom Errors"""
    pass

class NoDataProvided(Error):
    """No Data Provided"""
    pass

######################
###Helper Functions###
######################

def get_abs_path(filename, **kwargs):
    if os.path.isfile(os.path.abspath(filename)):
        return os.path.abspath(filename)
    else:
        return os.path.join(
            os.getcwd(), 'djapi/recommender/'+filename,
        )

def load_file(file_key):
    with open(get_abs_path(params[file_key]), 'rb') as f:
        opened = pickle.load(f)
    return opened

##################
##SET PARAMETERS##
##################

params = {
    'model': 'kdtree_model_1.2.pkl'
}

# Load spacy model

# Use if deploying to heroku.  manually add folder to base repo
# path_to_model = os.path.join(os.getcwd(), "en_core_web_md-2.2.0/")

# Use if local/pushing to github.  Requires installation of model via
#    python -m spacy download en_core_web_md
path_to_model = "en_core_web_sm"

nlp = spacy.load(path_to_model)

############################
###Spacy filter/tokenizer###
############################


def tokenize_text(text):
    return nlp(text)

def get_vector_from_doc(x):
    return x.vector


In [2]:
def get_preds(user_info):
        """Retrives prediction
        
        Arguments:
            user_info {str} -- Returns a List of recommended strains, and a score of recommendation strength
        
        Returns:
            list -- Predictions
        """
        nlpmodel = Predictor()
        pred_indices, pred_distances = nlpmodel.predict(user_input=user_info)

        return [pred_indices, pred_distances]

text = 'great ape kush needed for back pain relief'
results = get_preds(text)
results

[array([1586,  612,  742, 1774, 1418]),
 array([6.98708887, 7.00553338, 7.11482066, 7.14737441, 7.14871198])]

In [3]:
indices = results[0]
distances = results[1]
indices, distances

(array([1586,  612,  742, 1774, 1418]),
 array([6.98708887, 7.00553338, 7.11482066, 7.14737441, 7.14871198]))

In [165]:
"""code to retrieve data from the database"""

import sqlite3
from sklearn.preprocessing import MinMaxScaler

def strain_info(id_list, distance_list):
    """takes in the list of two arrays returned from the model
    and returns all the information about the strains
    in a json format"""

    #connecting to the database
    sl_conn = sqlite3.connect('med_cabinet3.sqlite3')
    sl_curs = sl_conn.cursor()



    # print(sl_curs.execute('SELECT effect from strain_info;').fetchall())

    #initalizing the list to be returned
    return_list = []


    #the columns that will be retrieved from the database
    needed_columns = ['strain', 'effect', 'medical_effect_plain',
                      'flavor', 'Type', 'THC_Percent',
                      'CBD', 'Description1']


    #Creating a list then adding key-value-pairs to said list
    # ends by appending the key-value-pair list to another list as a key-value-pair 
    distances = []
    for i in range(0, 5):
        distances.append(distance_list[i])
    distances = np.asarray(distances)
    scaler = MinMaxScaler(feature_range=(1, 3))
    scaled = scaler.fit_transform(distances.reshape(-1, 1))
    scaled = scaled.round()
    scores = scaled.reshape(1,-1)
    for i in range(0, 5):
        strain_list = {}
        strain_list['Recommendation'] = i + 1
        for item in needed_columns:
            request = f'SELECT {item} FROM strain_info WHERE id = {int(id_list[i])};'
            value = str(sl_curs.execute(request).fetchall())
            #For some reason the SQL query returns something 
            # formatted like (['<strain-name>,]) so this is 
            # to remove all the useless characters
            value = value.replace(')', '')
            value = value.replace('[', '')
            value = value.replace(']', '')
            value = value.replace('(', '')
            value = value.replace(',', '')
            value = value.replace("'", '')
            strain_list[item] = value
        strain_list['Score'] = scores[0][i]
        return_list.append(strain_list)
    sl_curs.close()
    return return_list


In [166]:
strain_list = strain_info(indices, distances)

In [167]:
strain_list

[{'Recommendation': 1,
  'strain': 'magic-beans-og',
  'effect': 'Body High Euphoric Happy Hungry Relaxed Sleepy Tingly Uplifted',
  'medical_effect_plain': 'Pain relief Reduces depression Inhibits fungal growth Reduces inflammation Aids sleep Inhibits cancer growth Tranquilizing Suppresses muscle spasms Immune response stimulant',
  'flavor': 'Chemical Dank Earthy Lemon Menthol Pine Spicy Woody',
  'Type': 'hybrid',
  'THC_Percent': '0.0',
  'CBD': '0.0',
  'Description1': 'Magic Beans OG is a rare indica dominant hybrid strain of unknown heritage due to a general lack of information about it. The insight we do have on Magic Beans comes primarily from user experience and potency reports. What is most commonly said about Magic Beans OG is that it has a relaxing physical high that’s perfect for kicking back and snacking on a few things at the end of a long day. It does have some heady effects although mild and relaxing including a lifted effect that boosts your mood at the onset of the 

In [164]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(1, 3))
scaled = scaler.fit_transform(distances.reshape(-1, 1))
scaled = scaled.round(decimals=0)
scaled = scaled.reshape(1,-1)
scaled

array([[1., 1., 3., 3., 3.]])