### import libraries

In [1]:
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder

### Read CSV

In [2]:
df = pd.read_csv("dataset.csv")

In [14]:
df.head()

Unnamed: 0,drugName,condition,review,sentiment,ENRS
0,Levonorgestrel,Emergency Contraception,pulled cummed bit took plan b hours later took...,1,Normal
1,Ethinyl estradiol / levonorgestrel,Birth Control,pill many years doctor changed rx chateal effe...,1,acne
2,Nexplanon,Birth Control,started nexplanon months ago minimal amount co...,1,acne
3,Etonogestrel,Birth Control,nexplanon job worry free sex thing periods som...,1,depressed
4,Sertraline,Depression,week zoloft anxiety mood swings take mg mornin...,1,zoloft anxiety mood


### Label Encoding

In [15]:
label_encoder = LabelEncoder() 
df['drugName']  = label_encoder.fit_transform(df['drugName'].astype(str))
label_encoder = LabelEncoder() 
df['condition']  = label_encoder.fit_transform(df['condition'].astype(str))
label_encoder = LabelEncoder() 
df['ENRS']  = label_encoder.fit_transform(df['ENRS'].astype(str))

In [16]:
df.head()


Unnamed: 0,drugName,condition,review,sentiment,ENRS
0,16,55,pulled cummed bit took plan b hours later took...,1,0
1,10,36,pill many years doctor changed rx chateal effe...,1,51
2,22,36,started nexplanon months ago minimal amount co...,1,51
3,13,36,nexplanon job worry free sex thing periods som...,1,1546
4,25,51,week zoloft anxiety mood swings take mg mornin...,1,4857


In [18]:
df = df[['drugName', 'condition', 'sentiment', 'ENRS']]
X = df.drop(['drugName'], axis=1)
y = df['drugName']
df.head()

Unnamed: 0,drugName,condition,sentiment,ENRS
0,16,55,1,0
1,10,36,1,51
2,22,36,1,51
3,13,36,1,1546
4,25,51,1,4857


### train test split

In [19]:
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  

In [20]:
X_train.head()

Unnamed: 0,condition,sentiment,ENRS
37662,36,-1,935
26392,36,1,2965
22019,36,-1,3465
9473,81,-1,3434
1442,21,-1,1527


### Random Forest

In [21]:
from sklearn.ensemble import RandomForestClassifier
#Create a Gaussian Classifier
from sklearn.model_selection import GridSearchCV
# Create the parameter grid based on the results of random search 
param_grid = {
    'bootstrap': [True],
    'max_depth': [80],
    'max_features': [2],
    'min_samples_leaf': [3],
    'min_samples_split': [10],
    'n_estimators': [1000]
}
s=RandomForestClassifier(bootstrap=True,max_depth=80,max_features=2,min_samples_leaf=3,min_samples_split=10,n_estimators=1000)
s.fit(X_train, y_train)
y_pred = s.predict(X_test)
# print('Accuracy: %s' % accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred)) 

              precision    recall  f1-score   support

           0       0.66      0.61      0.64       200
           1       0.34      0.37      0.35       189
           2       0.45      0.68      0.54       203
           3       0.45      0.50      0.47       164
           4       0.30      0.24      0.27       177
           5       0.18      0.08      0.11       169
           6       0.37      0.27      0.32       182
           7       0.57      0.21      0.31       179
           8       0.25      0.27      0.26       162
           9       0.25      0.46      0.33       233
          10       0.46      0.18      0.26       388
          11       0.26      0.25      0.26       548
          12       0.38      0.31      0.34       428
          13       0.24      0.62      0.34       639
          14       0.74      0.61      0.67       216
          15       0.07      0.01      0.02       231
          16       0.53      0.64      0.58       753
          17       0.21    

### K-Neighbour

In [22]:
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

s=KNeighborsClassifier(n_neighbors=49, weights='distance')
s.fit(X_train, y_train)
y_pred = s.predict(X_test)
print('Accuracy: %s' % accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred)) 



Accuracy: 0.383069047321763
              precision    recall  f1-score   support

           0       0.54      0.55      0.54       200
           1       0.31      0.43      0.36       189
           2       0.38      0.57      0.46       203
           3       0.45      0.44      0.45       164
           4       0.13      0.08      0.10       177
           5       0.18      0.17      0.17       169
           6       0.28      0.27      0.28       182
           7       0.37      0.13      0.19       179
           8       0.21      0.19      0.20       162
           9       0.25      0.42      0.31       233
          10       0.42      0.19      0.26       388
          11       0.24      0.32      0.28       548
          12       0.34      0.29      0.31       428
          13       0.24      0.48      0.32       639
          14       0.65      0.56      0.61       216
          15       0.00      0.00      0.00       231
          16       0.50      0.68      0.58       753

### Support Vector Machine

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC 
from sklearn.metrics import classification_report
param_grid = {'gamma': [1, 0.1, 0.01], 
              'gamma':['scale', 'auto'],
              'kernel': ['linear','rbf']} 
s = GridSearchCV(SVC(), param_grid, refit = True,cv=3, verbose = 3,n_jobs=-1,scoring='accuracy')
s.fit(X_train, y_train)
y_pred = s.predict(X_test)
print(classification_report(y_test, y_pred))