#### Author:----> Abhishek Kumar
#### Project:---> Medicine Recommendation System Prediction ML Project

![Project Banner.jpg](attachment:c2e5d209-7cbb-4d31-912e-64c2cbf0b448.jpg)

![image 4.jpg](attachment:3feb5133-b538-446f-a74b-dbe37151d76f.jpg)

![image 5.jpg](attachment:a6c4d9f6-85ba-44f2-9e87-65873206ac17.jpg)

#### Step1:---> Import All the important Library

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline



#### Step2:----> Load the Dataset from the medicine CSV File

In [33]:
data = pd.read_csv("medicine.csv")



### Step3:----> Read the Top 5 Rows of dataset

In [34]:
data.head()

Unnamed: 0,index,Drug_Name,Reason,Description
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,Acne,Mild to moderate acne (spots)
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,Acne,A RET 0.025% is a prescription medicine that i...
2,3,ACGEL CL NANO Gel 15gm,Acne,It is used to treat acne vulgaris in people 12...
3,4,ACGEL NANO Gel 15gm,Acne,It is used to treat acne vulgaris in people 12...
4,5,Acleen 1% Lotion 25ml,Acne,treat the most severe form of acne (nodular ac...


### Step4:----> Read the last 5 Rows of the dataset

In [35]:
data.shape

(9720, 4)

In [36]:
data.isnull().sum()

index          0
Drug_Name      0
Reason         0
Description    0
dtype: int64

In [37]:
data.dropna(inplace=True)

In [38]:
data.duplicated().sum()

0

In [39]:
data.describe()

Unnamed: 0,index
count,9720.0
mean,4860.5
std,2806.066642
min,1.0
25%,2430.75
50%,4860.5
75%,7290.25
max,9720.0


In [40]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9720 entries, 0 to 9719
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   index        9720 non-null   int64 
 1   Drug_Name    9720 non-null   object
 2   Reason       9720 non-null   object
 3   Description  9720 non-null   object
dtypes: int64(1), object(3)
memory usage: 303.9+ KB


In [41]:
data['Description']

0                           Mild to moderate acne (spots)
1       A RET 0.025% is a prescription medicine that i...
2       It is used to treat acne vulgaris in people 12...
3       It is used to treat acne vulgaris in people 12...
4       treat the most severe form of acne (nodular ac...
                              ...                        
9715                              used for treating warts
9716                        used to soften the skin cells
9717                                       used for scars
9718                                      used for wounds
9719    used to treat and remove raised warts (usually...
Name: Description, Length: 9720, dtype: object

In [42]:
data['Description'].apply(lambda x : x.split())

0                     [Mild, to, moderate, acne, (spots)]
1       [A, RET, 0.025%, is, a, prescription, medicine...
2       [It, is, used, to, treat, acne, vulgaris, in, ...
3       [It, is, used, to, treat, acne, vulgaris, in, ...
4       [treat, the, most, severe, form, of, acne, (no...
                              ...                        
9715                         [used, for, treating, warts]
9716                 [used, to, soften, the, skin, cells]
9717                                   [used, for, scars]
9718                                  [used, for, wounds]
9719    [used, to, treat, and, remove, raised, warts, ...
Name: Description, Length: 9720, dtype: object

In [43]:
data['Description'] = data['Description'].apply(lambda x : x.split())
data['Reason'] = data['Reason'].apply(lambda x : x.split())

In [44]:
data.tail()

Unnamed: 0,index,Drug_Name,Reason,Description
9715,9716,T Muce Ointment 5gm,[Wound],"[used, for, treating, warts]"
9716,9717,Wokadine 10% Solution 100mlWokadine Solution 5...,[Wound],"[used, to, soften, the, skin, cells]"
9717,9718,Wokadine M Onit 10gm,[Wound],"[used, for, scars]"
9718,9719,Wound Fix Solution 100ml,[Wound],"[used, for, wounds]"
9719,9720,Wounsol Ointment 15gm,[Wound],"[used, to, treat, and, remove, raised, warts, ..."


In [45]:
data['Description'] = data['Description'].apply(lambda x : [i.replace(" ", "") for i in x])

In [52]:
data['tags'] = data['Description'] + data['Reason']

In [53]:
data.head()

Unnamed: 0,index,Drug_Name,Reason,Description,tags
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,[Acne],"[Mild, to, moderate, acne, (spots)]","[Mild, to, moderate, acne, (spots), Acne]"
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,[Acne],"[A, RET, 0.025%, is, a, prescription, medicine...","[A, RET, 0.025%, is, a, prescription, medicine..."
2,3,ACGEL CL NANO Gel 15gm,[Acne],"[It, is, used, to, treat, acne, vulgaris, in, ...","[It, is, used, to, treat, acne, vulgaris, in, ..."
3,4,ACGEL NANO Gel 15gm,[Acne],"[It, is, used, to, treat, acne, vulgaris, in, ...","[It, is, used, to, treat, acne, vulgaris, in, ..."
4,5,Acleen 1% Lotion 25ml,[Acne],"[treat, the, most, severe, form, of, acne, (no...","[treat, the, most, severe, form, of, acne, (no..."


In [54]:
new_data = data[['index', 'Drug_Name', 'tags']]

In [55]:
new_data.head()

Unnamed: 0,index,Drug_Name,tags
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,"[Mild, to, moderate, acne, (spots), Acne]"
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,"[A, RET, 0.025%, is, a, prescription, medicine..."
2,3,ACGEL CL NANO Gel 15gm,"[It, is, used, to, treat, acne, vulgaris, in, ..."
3,4,ACGEL NANO Gel 15gm,"[It, is, used, to, treat, acne, vulgaris, in, ..."
4,5,Acleen 1% Lotion 25ml,"[treat, the, most, severe, form, of, acne, (no..."


In [57]:
new_data['tags'] = new_data['tags'].apply(lambda x : " ".join(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_data['tags'] = new_data['tags'].apply(lambda x : " ".join(x))


In [59]:
new_data['tags'] = new_data['tags'].apply(lambda x : x.lower())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_data['tags'] = new_data['tags'].apply(lambda x : x.lower())


In [60]:
new_data.head()

Unnamed: 0,index,Drug_Name,tags
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,mild to moderate acne (spots) acne
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,a ret 0.025% is a prescription medicine that i...
2,3,ACGEL CL NANO Gel 15gm,it is used to treat acne vulgaris in people 12...
3,4,ACGEL NANO Gel 15gm,it is used to treat acne vulgaris in people 12...
4,5,Acleen 1% Lotion 25ml,treat the most severe form of acne (nodular ac...


In [61]:
import nltk
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [62]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(stop_words = "english", max_features = 5000)

In [63]:
def stem(text):
    y = []
    for i in text.split():
        y.append(ps.stem(i))
    return " ".join(y)

In [64]:
stem("You are so so beautiful")

'you are so so beauti'

In [65]:
new_data['tags'] = new_data['tags'].apply(stem)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_data['tags'] = new_data['tags'].apply(stem)


In [66]:
new_data.head()

Unnamed: 0,index,Drug_Name,tags
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,mild to moder acn (spots) acn
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,a ret 0.025% is a prescript medicin that is us...
2,3,ACGEL CL NANO Gel 15gm,it is use to treat acn vulgari in peopl 12 yea...
3,4,ACGEL NANO Gel 15gm,it is use to treat acn vulgari in peopl 12 yea...
4,5,Acleen 1% Lotion 25ml,treat the most sever form of acn (nodular acne...


In [68]:
vectors = cv.fit_transform(new_data['tags']).toarray()

In [69]:
vectors.shape

(9720, 806)

In [71]:
cv.get_feature_names_out()

array(['025', '12', '16', '18', 'abdomin', 'abl', 'ach', 'acid', 'acn',
       'acne', 'acquir', 'action', 'activ', 'acut', 'acute', 'adequ',
       'adhd', 'adjunct', 'adolesc', 'adult', 'adults', 'affect', 'ag',
       'age', 'aids', 'allerg', 'allergen', 'allergi', 'allow', 'alon',
       'alzheim', 'alzheimer', 'alzheimerâ', 'amoebiasi', 'anaemia',
       'anal', 'angina', 'angl', 'ani', 'ankylos', 'anorexia', 'anoth',
       'anti', 'antioxid', 'antipsychot', 'antiretrovir', 'anxieti',
       'anxiou', 'anxious', 'apnoea', 'appear', 'appetit', 'appetite',
       'appli', 'appropri', 'area', 'arrhythmia', 'arrhythmiasi',
       'arteri', 'arthralgia', 'arthriti', 'associ', 'atherothrombot',
       'athleteâ', 'atop', 'atrial', 'attack', 'awak', 'b1', 'b2', 'b3',
       'b5', 'b6', 'babi', 'backache', 'bacteri', 'bacteria', 'balanc',
       'balanitis', 'bandag', 'becom', 'behaviour', 'beliefs', 'benefit',
       'beta', 'biliari', 'biotin', 'bite', 'blackhead', 'blackheads',
      

In [72]:
from sklearn.metrics.pairwise import cosine_similarity

In [74]:
similarity = cosine_similarity(vectors)

In [75]:
similarity[1]

array([0.25197632, 1.        , 0.25660012, ..., 0.19245009, 0.1490712 ,
       0.0860663 ])

In [78]:
def recommendation(medicine):
    medicine_index = new_data[new_data['Drug_Name'] == medicine].index[0]
    distance = similarity[medicine_index]
    medicine_list = sorted(list(enumerate(distance)), reverse=True, key = lambda x : x[1])[1:6]
    for i in medicine_list:
        print(new_data.iloc[i[0]].Drug_Name)

In [80]:
recommendation("ACGEL CL NANO Gel 15gm")

ACGEL NANO Gel 15gm
Acnehit Gel 15gm
Acnelak Soap 75gm
Acnetor AD 1% Ointment 15gm
Acnetor AD Cream 15Acnetor AD Gel 15gm


In [81]:
import pickle
pickle.dump(new_data.to_dict(), open("medicine_dict.pkl", "wb"))
pickle.dump(similarity, open("similarity.pkl", "wb"))