In [22]:
import numpy as np
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.stats import spearmanr, pearsonr
from nltk.corpus import stopwords
from wordcloud import WordCloud, STOPWORDS

In [23]:
drug_train = pd.read_csv("Data/drugLibTrain_raw.tsv",delimiter='\t',encoding='utf-8')
drug_test = pd.read_csv("Data/drugLibTest_raw.tsv",delimiter='\t',encoding='utf-8')

In [24]:
drug=pd.concat([drug_train,drug_test], axis=0, join='outer', ignore_index=False, keys=None,
          levels=None, names=None, verify_integrity=False, copy=True)

In [25]:
print(drug_train.shape)
print(drug_test.shape)
print(drug.shape)

(3107, 9)
(1036, 9)
(4143, 9)


In [26]:
drug['rating'].nunique()

10

In [27]:
drug.head()

Unnamed: 0.1,Unnamed: 0,urlDrugName,rating,effectiveness,sideEffects,condition,benefitsReview,sideEffectsReview,commentsReview
0,2202,enalapril,4,Highly Effective,Mild Side Effects,management of congestive heart failure,slowed the progression of left ventricular dys...,"cough, hypotension , proteinuria, impotence , ...","monitor blood pressure , weight and asses for ..."
1,3117,ortho-tri-cyclen,1,Highly Effective,Severe Side Effects,birth prevention,Although this type of birth control has more c...,"Heavy Cycle, Cramps, Hot Flashes, Fatigue, Lon...","I Hate This Birth Control, I Would Not Suggest..."
2,1146,ponstel,10,Highly Effective,No Side Effects,menstrual cramps,I was used to having cramps so badly that they...,Heavier bleeding and clotting than normal.,I took 2 pills at the onset of my menstrual cr...
3,3947,prilosec,3,Marginally Effective,Mild Side Effects,acid reflux,The acid reflux went away for a few months aft...,"Constipation, dry mouth and some mild dizzines...",I was given Prilosec prescription at a dose of...
4,1951,lyrica,2,Marginally Effective,Severe Side Effects,fibromyalgia,I think that the Lyrica was starting to help w...,I felt extremely drugged and dopey. Could not...,See above


In [28]:
drug.columns

Index(['Unnamed: 0', 'urlDrugName', 'rating', 'effectiveness', 'sideEffects',
       'condition', 'benefitsReview', 'sideEffectsReview', 'commentsReview'],
      dtype='object')

In [29]:
drug['sideEffects'].unique(),drug['sideEffects'].nunique()

(array(['Mild Side Effects', 'Severe Side Effects', 'No Side Effects',
        'Extremely Severe Side Effects', 'Moderate Side Effects'],
       dtype=object), 5)

# Gaussian Discriminant Analysis(GDA)

In [5]:
class GaussianDiscriminantAnalysis:
    def __init__(self,epsilon=10e-10):
        self.epsilon=epsilon
        
    def phi(self, y):
        return (np.sum(y==1))/len(y)
    
    def mu(self,X,y,k):
        return (np.sum(X[y==k],axis=0))/np.sum(y==k)
    
    def mu_0(self,X,y):
        return (np.sum(X[y==0],axis=0))/np.sum(y==0)
    
    def mu_1(self,X,y):
        return (np.sum(X[y==1],axis=0))/np.sum(y==1)
    
    def covariance(self,X,y):
        
        sigma= np.zeros((len(X),len(X)))
        M = np.zeros_like(X)
        M[y==1] = self.mu1
        M[y==0] = self.mu0
        
        return (((X-M).T)@(X-M))/len(y)
    
    def prob_y0(self):
        return 1-self.fi
    
    def prob_y1(self,y):
        return self.fi
        
    def prob_class0(self,X):
        half_len = X.shape[1]/2
        det_cvar = np.sqrt(np.linalg.det(self.cvar ))
        inv_cvar = np.linalg.inv(self.cvar + (self.epsilon*np.eye(len(self.cvar))))
        A=((X - self.mu0)@inv_cvar)
        #print('dima',A)
        B=(X-self.mu0)
        #print('dimb',B)
        C=np.sum(np.multiply(A,B),axis=1)
        #print('dimC',C)
        return np.exp( -0.5*C)*(1/((2*np.pi)**half_len)*det_cvar)
        
    
    def prob_class1(self,X):
        half_len = X.shape[1]/2
        det_cvar = np.sqrt(np.linalg.det(self.cvar ))
        inv_cvar = np.linalg.inv(self.cvar + (self.epsilon*np.eye(len(self.cvar))))
        AA=((X-self.mu1)@inv_cvar)
        BB=(X-self.mu1)
        CC=np.sum(np.multiply(AA,BB),axis=1)
        return np.exp( -0.5*CC)*(1/((2*np.pi)**half_len)*det_cvar)
    
    def train(self, X, y):
        self.fi = self.phi(y)
        self.mu0 = self.mu_0(X,y)
        self.mu1 =self.mu_1(X,y)
        self.cvar = self.covariance(X, y)
        
    def predict(self, X):
        proby0=1-self.fi
        proby1=self.fi
        probclass0=self.prob_class0(X)
        probclass1=self.prob_class1(X)
    
        prediction_prob = np.concatenate((probclass0*proby0.reshape(-1,1),
                                          probclass1*proby1.reshape(-1,1)))
        
        return np.argmax(prediction_prob.T,axis=1)

In [4]:
GDA = GaussianDiscriminantAnalysis()

#  Naive Bayes (NB)

In [10]:
class BernoulliNaiveBayes:
    
    def __init__(self):
        pass
        
        
    def phiy(self,y): 
        return np.sum(y==1)/len(y)
    
    def phix1y0(self,X,y):
        phi_list=[]
        for i in range(X.shape[1]):
            X_i=X[:,i]
            select_y=X_i[y==0]
            phi_list.append((np.sum(select_y==1)+1)/(np.sum(y==0)+X.shape[1]))
        
        return phi_list
    
    def phix0y0(self,X,y):
        
        phi_list=[]
        for i in range(X.shape[1]):
            X_i=X[:,i]
            select_y=X_i[y==0]
            phi_list.append((np.sum(select_y==0)+1)/(np.sum(y==0)+X.shape[1]))
         
        return phi_list
    
    def phix1y1(self,X,y):
        
        phi_list=[]
        for i in range(X.shape[1]):
            X_i=X[:,i]
            select_y=X_i[y==1]
            phi_list.append((np.sum(select_y==1)+1)/(np.sum(y==1)+X.shape[1]))

        return phi_list 

    
    def phix0y1(self,X,y):
        
        phi_list=[]
        for i in range(X.shape[1]):
            X_i=X[:,i]
            select_y=X_i[y==1]
            phi_list.append((np.sum(select_y==0)+1)/(np.sum(y==1)+X.shape[1]))


        return phi_list

    
    def probabxy1(self,X):
        probxy1=[]
        arr1=np.zeros_like(X)
        for i in range(X.shape[0]):
            X_i=X[i,:]
            prob=1
            for j in range(len(X_i)):
                
                if X_i[j]==1:
                    prob *=  self.fix1y1[j]
                    
                else:
                    prob *= self.fix0y1[j]
                arr1[i,j]=prob
            probxy1.append(prob)
        return  probxy1
    
    def probabxy0(self,X):
        
        probxy0=[]
        arr0=np.zeros_like(X)
        
        for i in range(X.shape[0]):
            X_i=X[i,:]
            prob=1
            for j in range(len(X_i)):
                if X_i[j]==1:
                    prob *= self.fix1y0[j]
                else:
                    prob *= self.fix0y0[j]
                arr0[i,j]=prob
            probxy0.append(prob)
        return probxy0
    
    def fit_train(self, X,y):
        self.fiy = self.phiy(y)
        self.fix1y0  = self.phix1y0(X,y)
        self.fix0y0 = self.phix0y0(X,y)
        self.fix1y1  = self.phix1y1(X,y)
        self.fix0y1  = self.phix0y1(X,y)
        
    def predict(self,X) :
        proby1 = self.fiy
        proby0 = 1 - self.fiy
        
        probxy0=np.array(self.probabxy0(X)).reshape(-1,1)
        probxy1=np.array(self.probabxy1(X)).reshape(-1,1)
        proby0x=probxy0*proby0
        proby1x=probxy1*proby1
    
        prediction_prob = np.concatenate((proby0x, proby1x),axis=1)
        return np.argmax(prediction_prob,axis=1)

In [11]:
NB = BernoulliNaiveBayes()