# JD Drug Recommendation System 

This webpage is build with binder to showcase JD Drug Recommendation System only.

In [29]:
# Pandas is a data analysis tool that allows importing data from other file formats
# Numpy facilitate advanced mathematical operation on large numbers of data
# Pyplot is a module in matplotlib that manipulate elements of a figure, create figure
# Seaborn is used for making statistical graphics.
# plot_decision_regions is a function for plotting decision regions of classifiers in 1 or 2 dimensions
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.plotting import plot_decision_regions

# ggplot is used to map data to visual objects that make up the visualization
# we specified the figure format to be svg format
# we use matplotlib to renders the figure inline in this jupyter notebook
plt.style.use('ggplot')
%config InlineBackend.figure_format = 'svg'
%matplotlib inline
np.set_printoptions(suppress=True) # Suppress scientific notation where possible

from sklearn import naive_bayes
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import precision_score, recall_score, precision_recall_curve,f1_score, fbeta_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, roc_auc_score, make_scorer
from sklearn.datasets import fetch_20newsgroups
import gensim
# !python -m pip install -U gensim to install automatically
# pip install python-Levenshtein  to install levenshtein

# we start reading dataset from a csv file
df_train = pd.read_csv(r'df_train.csv')
df_test = pd.read_csv(r'df_test.csv')
# To drops all missing rows with missing values
df_train.dropna(inplace=True)
df_test.dropna(inplace=True)

# Remove 4-7 star reviews
df_train2 = df_train.drop(df_train[(df_train['rating'] > 4.0) & (df_train['rating'] < 6.0)].index)

# Set 8-10 star reviews to positive(1), the rest to negative(0)
df_train2['sentiment'] = np.where(df_train2['rating'] >= 7, '1', '0')

# Remove 4-7 star reviews
df_test2 = df_test.drop(df_test[(df_test['rating'] > 4.0) & (df_test['rating'] < 6.0)].index)

# Set 8-10 star reviews to positive(1), the rest to negative(0)
df_test2['sentiment'] = np.where(df_test2['rating'] >= 7, '1', '0')

# Various Approach to Recommend Drugs

In [30]:
# initialise prototype
import warnings
warnings.filterwarnings('ignore')

import ipywidgets as widgets
from IPython.display import display, clear_output


# !jupyter nbextension enable --py widgetsnbextension --sys-prefix
# !jupyter serverextension enable voila --sys-prefix

In [31]:
# Image Widget

file = open("logo.jpg", "rb")
image = file.read()

image_headline = widgets.Image(
                    value=image,
                    format='jpg',
                    width='200'
                )

label_headline = widgets.Label(
                    value='JD Drug Recommender Prototype',
                    style={'description_width': 'initial'}
                )

vbox_headline = widgets.VBox([image_headline, label_headline])

## A. Collobolative Recommendation Approach - Method I(based on similar condition)

In [32]:
# Sorting values according to the rating 
drugmat = df_train2.pivot_table(index ='drugName',columns ='condition', values ='rating') 

In [33]:
# to test the drug matrix
user_input = widgets.Text(placeholder="Enter a condition(suggested 'Anxiety' for obvious results): ")

In [34]:
# button send

button_send = widgets.Button(
                description='View Result',
                tooltip='View',
                style={'description_width': 'initial'}
            )

output = widgets.Output()

def on_button_clicked(event):
    with output:
        clear_output()
        try:
            drug_user_ratings = drugmat.sort_values(user_input.value, ascending = False).head()
            similar_to_condition = drug_user_ratings.corrwith(drugmat) 
            corr_Condition = pd.DataFrame(similar_to_condition, columns =['Correlation']) 
            corr_Condition.dropna(inplace = True) 
            print(f"The condition input is {user_input.value}.")
            print("")
            print("Similar Condition:")
            print(corr_Condition.head(20) )
            print("")
            print("Recommended Drug As Below:")
            print(drug_user_ratings[user_input.value][:10])
            print("")
        except Exception as e:
            print("Not recommendation available...")

button_send.on_click(on_button_clicked)

vbox_result = widgets.VBox([button_send, output])

In [35]:
# stacked right hand side
text_0 = widgets.HTML(value="<h1>Welcome to JD System!</h1>")
text_1 = widgets.HTML(value="<h3>Recommend drug based on similar condition of </h3>")

vbox_text = widgets.VBox([text_0, text_1, user_input, vbox_result])

In [36]:
page = widgets.HBox([vbox_headline, vbox_text])
display(page)

HBox(children=(VBox(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00`\x00`\x00\x00\xff…

## B. Collobolative Recommendation Approach - Method II(based on similar drug)

In [37]:
# Sorting values according to the rating 
drugmat2 = df_train2.pivot_table(index ='condition',columns ='drugName', values ='rating') 

In [38]:
# to test the drug matrix
user_input_2 = widgets.Text(placeholder="Enter a drug(suggested 'Abilify' for obvious results): ")

In [39]:
# button send
button_send2 = widgets.Button(
                description='View Result',
                tooltip='View',
                style={'description_width': 'initial'}
            )

output2 = widgets.Output()

def on_button_clicked2(event):
    with output2:
        clear_output()
        try:
            drug_user_ratings_2 = drugmat2.sort_values(user_input_2.value, ascending = False).head()
            similar_drug = drug_user_ratings_2.corrwith(drugmat2) 
            corr_Drug = pd.DataFrame(similar_drug, columns =['Correlation']) 
            corr_Drug.dropna(inplace = True) 
            print(f"The drug input is {user_input_2.value}.")
            print("")
            print("Recommended Drug As Below:")
            print(corr_Drug.head(20) )
            print("")
        except Exception as e:
            print("Not recommendation available...")

button_send2.on_click(on_button_clicked2)

vbox_result2 = widgets.VBox([button_send2, output2])

In [40]:
# stacked right hand side
text_0 = widgets.HTML(value="<h1>Welcome to JD System!</h1>")
text_2 = widgets.HTML(value="<h3>Recommend drug based on similar drug of </h3>")

vbox_text2 = widgets.VBox([text_0, text_2, user_input_2, vbox_result2])

In [41]:
page2 = widgets.HBox([vbox_headline, vbox_text2])
display(page2)

HBox(children=(VBox(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00`\x00`\x00\x00\xff…

## C. Simplest Recommendation Approach - Method I (based on rating)

In [42]:
# to test the drug matrix
user_input_3 = widgets.Text(placeholder="Enter a condition: ")

In [43]:
# button send
button_send3 = widgets.Button(
                description='View Result',
                tooltip='View',
                style={'description_width': 'initial'}
            )

output3 = widgets.Output()

def on_button_clicked3(event):
    with output3:
        clear_output()
        try:
            df_condition_1 = df_train.loc[(df_train["condition"] == user_input_3.value)]
            drug_based_on_condition = df_condition_1.groupby(['drugName']+['condition'])['rating'].value_counts().sort_values(ascending=False)
            df_condition_2 = df_condition_1.sort_values('rating', ascending=False)

            print(f"The input is {user_input_3.value}.")
            print("")
            print("Recommended Drug As Below:")
            print(drug_based_on_condition[:10])
            print("")
            print(f"Top 5 Drug Recommendation For {user_input_3.value}.")
            print(df_condition_2[['drugName', 'condition', 'rating']].head(5))
            print("")
        except Exception as e:
            print("Not recommendation available...")

button_send3.on_click(on_button_clicked3)

vbox_result3 = widgets.VBox([button_send3, output3])

In [44]:
# stacked right hand side
text_0 = widgets.HTML(value="<h1>Welcome to JD System!</h1>")
text_3 = widgets.HTML(value="<h3>Recommend drug based on rating of drug on medicating </h3>")

vbox_text3 = widgets.VBox([text_0, text_3, user_input_3, vbox_result3])

In [45]:
page3 = widgets.HBox([vbox_headline, vbox_text3])
display(page3)

HBox(children=(VBox(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00`\x00`\x00\x00\xff…

## D. Simplest Recommendation Approach - Method II (based on IMDB score)

In [46]:
# to test the drug matrix
user_input_4 = widgets.Text(placeholder="Enter a condition: ")

In [47]:
# Declaration
m = df_train['rating'].quantile(0.90) 
C = df_train['rating'].mean()

# Function 
def weighted_rating(df_train, m=m, C=C):
    v = df_train['rating']
    R = df_train['rating']
    # Calculation based on the IMDB formula
    return (v/(v+m) * R) + (m/(m+v) * C)

# button send
button_send4 = widgets.Button(
                description='View Result',
                tooltip='View',
                style={'description_width': 'initial'}
            )

output4 = widgets.Output()

def on_button_clicked4(event):
    with output4:
        clear_output()
        
        try:
            pre_condition = df_train  
            pre_condition['score'] = pre_condition.apply(weighted_rating, axis=1)
            pre_condition['score'].head()
            df_condition_1 = pre_condition.loc[(df_train["condition"] == user_input_4.value)]
            drug_based_on_condition = df_condition_1.groupby(['drugName']+['condition']+['rating']+['score'])['score'].value_counts().sort_values(ascending=False)
            df_condition_2 = df_condition_1.sort_values('score', ascending=False)

            print(f"The input is {user_input_4.value}.")
            print("")
            print("Recommended Drug As Below:")
            print(drug_based_on_condition[:10])
            print("")
            print(f"Top 5 Drug Recommendation For {user_input_4.value}.")
            print(df_condition_2[['drugName', 'condition', 'rating', 'score']].head(5))
            print("")
        except Exception as e:
            print("Not recommendation available...")

button_send4.on_click(on_button_clicked4)

vbox_result4 = widgets.VBox([button_send4, output4])

In [48]:
# stacked right hand side
text_0 = widgets.HTML(value="<h1>Welcome to JD System!</h1>")
text_4 = widgets.HTML(value="<h3>Recommend drug based on IMDB score of drug on medicating </h3>")

vbox_text4 = widgets.VBox([text_0, text_4, user_input_4, vbox_result4])

In [49]:
page4 = widgets.HBox([vbox_headline, vbox_text4])
display(page4)

HBox(children=(VBox(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00`\x00`\x00\x00\xff…

## E. Content-based Recommendation Approach - Method I (based on dominant topic)

In [50]:
# # Import
# df_train = pd.read_csv(r'df_train.csv')
# df_topic = pd.read_csv(r'df_dominant_topic.csv')
# df = pd.concat([df_train, df_topic], axis=1, join='inner')
# del df['review']
# drug_per_condition = df.groupby(['condition'])['drugName'].nunique().sort_values(ascending=False)
# condition_1=drug_per_condition[:10].keys()
# df_top_10=df[df['condition'].isin(condition_1)]
# top_10=df_top_10.groupby(['condition']).Dominant_Topic.value_counts(normalize=True)
# top_10

In [51]:
# to test the drug matrix
user_input_5 = widgets.Text(placeholder="Enter a condition (e.g. Acne): ")

In [52]:
# button send
button_send5 = widgets.Button(
                description='View Result',
                tooltip='View',
                style={'description_width': 'initial'}
            )

output5 = widgets.Output()

def on_button_clicked5(event):
    with output5:
        clear_output()
        try:
            result=df_top_10.loc[df_top_10['condition'] == user_input_5.value]
            result = result.groupby(['drugName'])['Perc_Contribution'].nunique().sort_values(ascending=False)
            print(f"The input is {user_input_5.value}.")
            print("")
            print("Recommended Drug As Below:")
            print(result[:3])
            print("")
        except Exception as e:
            print("Not recommendation available...")
        
button_send5.on_click(on_button_clicked5)

vbox_result5 = widgets.VBox([button_send5, output5])

In [53]:
# stacked right hand side
text_0 = widgets.HTML(value="<h1>Welcome to JD System!</h1>")
text_5 = widgets.HTML(value="<h3>Recommend drug based on dominant topic of </h3>")

vbox_text5 = widgets.VBox([text_0, text_5, user_input_5, vbox_result5])

In [54]:
page5 = widgets.HBox([vbox_headline, vbox_text5])
display(page5)

HBox(children=(VBox(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00`\x00`\x00\x00\xff…

In [55]:
# to remove not needed requirements
#!pip list --format=freeze > requirements.txt