# Importing packages

In [None]:
import numpy as np
import pandas as pd 

# Loading the dataset

In [None]:
med = pd.read_csv('medicine.csv')

In [None]:
med.head()

Unnamed: 0,index,Drug_Name,Reason,Description
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,Acne,Mild to moderate acne (spots)
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,Acne,A RET 0.025% is a prescription medicine that i...
2,3,ACGEL CL NANO Gel 15gm,Acne,It is used to treat acne vulgaris in people 12...
3,4,ACGEL NANO Gel 15gm,Acne,It is used to treat acne vulgaris in people 12...
4,5,Acleen 1% Lotion 25ml,Acne,treat the most severe form of acne (nodular ac...


In [None]:
med.shape

(9720, 4)

# Data Cleaning: Checking for NULL values

In [None]:
med.isnull().sum()

index          0
Drug_Name      0
Reason         0
Description    0
dtype: int64

In [None]:
med.dropna(inplace=True)

# Checking for redundancy

In [None]:
med.duplicated().sum()

0

# Data Analysis

In [None]:
med['Description']

0                           Mild to moderate acne (spots)
1       A RET 0.025% is a prescription medicine that i...
2       It is used to treat acne vulgaris in people 12...
3       It is used to treat acne vulgaris in people 12...
4       treat the most severe form of acne (nodular ac...
                              ...                        
9715                              used for treating warts
9716                        used to soften the skin cells
9717                                       used for scars
9718                                      used for wounds
9719    used to treat and remove raised warts (usually...
Name: Description, Length: 9720, dtype: object

# Splitting the words in 'Description' and 'Reason' column using split() method

In [None]:
med['Description'].apply(lambda x:x.split())

0                     [Mild, to, moderate, acne, (spots)]
1       [A, RET, 0.025%, is, a, prescription, medicine...
2       [It, is, used, to, treat, acne, vulgaris, in, ...
3       [It, is, used, to, treat, acne, vulgaris, in, ...
4       [treat, the, most, severe, form, of, acne, (no...
                              ...                        
9715                         [used, for, treating, warts]
9716                 [used, to, soften, the, skin, cells]
9717                                   [used, for, scars]
9718                                  [used, for, wounds]
9719    [used, to, treat, and, remove, raised, warts, ...
Name: Description, Length: 9720, dtype: object

In [None]:
med['Reason'] = med['Reason'].apply(lambda x:x.split())
med['Description'] = med['Description'].apply(lambda x:x.split())

# Eliminating unwanted spaces 

In [None]:
med['Description'] = med['Description'].apply(lambda x:[i.replace(" ","") for i in x])

In [None]:
med['Description']

0                     [Mild, to, moderate, acne, (spots)]
1       [A, RET, 0.025%, is, a, prescription, medicine...
2       [It, is, used, to, treat, acne, vulgaris, in, ...
3       [It, is, used, to, treat, acne, vulgaris, in, ...
4       [treat, the, most, severe, form, of, acne, (no...
                              ...                        
9715                         [used, for, treating, warts]
9716                 [used, to, soften, the, skin, cells]
9717                                   [used, for, scars]
9718                                  [used, for, wounds]
9719    [used, to, treat, and, remove, raised, warts, ...
Name: Description, Length: 9720, dtype: object

In [None]:
med['Reason'] = med['Reason'].apply(lambda x:[i.replace(" ","") for i in x])

In [None]:
med['Reason']

0        [Acne]
1        [Acne]
2        [Acne]
3        [Acne]
4        [Acne]
         ...   
9715    [Wound]
9716    [Wound]
9717    [Wound]
9718    [Wound]
9719    [Wound]
Name: Reason, Length: 9720, dtype: object

# Concatenating Description and Reason columns

In [None]:
med['tags'] = med['Description'] + med['Reason'] 

In [None]:
new_df = med[['index','Drug_Name','tags']]

In [None]:
new_df

Unnamed: 0,index,Drug_Name,tags
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,"[Mild, to, moderate, acne, (spots), Acne]"
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,"[A, RET, 0.025%, is, a, prescription, medicine..."
2,3,ACGEL CL NANO Gel 15gm,"[It, is, used, to, treat, acne, vulgaris, in, ..."
3,4,ACGEL NANO Gel 15gm,"[It, is, used, to, treat, acne, vulgaris, in, ..."
4,5,Acleen 1% Lotion 25ml,"[treat, the, most, severe, form, of, acne, (no..."
...,...,...,...
9715,9716,T Muce Ointment 5gm,"[used, for, treating, warts, Wound]"
9716,9717,Wokadine 10% Solution 100mlWokadine Solution 5...,"[used, to, soften, the, skin, cells, Wound]"
9717,9718,Wokadine M Onit 10gm,"[used, for, scars, Wound]"
9718,9719,Wound Fix Solution 100ml,"[used, for, wounds, Wound]"


In [None]:
new_df['tags'].apply(lambda x:" ".join(x))

0                      Mild to moderate acne (spots) Acne
1       A RET 0.025% is a prescription medicine that i...
2       It is used to treat acne vulgaris in people 12...
3       It is used to treat acne vulgaris in people 12...
4       treat the most severe form of acne (nodular ac...
                              ...                        
9715                        used for treating warts Wound
9716                  used to soften the skin cells Wound
9717                                 used for scars Wound
9718                                used for wounds Wound
9719    used to treat and remove raised warts (usually...
Name: tags, Length: 9720, dtype: object

# Reading the new dataframe

In [None]:
new_df

Unnamed: 0,index,Drug_Name,tags
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,"[Mild, to, moderate, acne, (spots), Acne]"
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,"[A, RET, 0.025%, is, a, prescription, medicine..."
2,3,ACGEL CL NANO Gel 15gm,"[It, is, used, to, treat, acne, vulgaris, in, ..."
3,4,ACGEL NANO Gel 15gm,"[It, is, used, to, treat, acne, vulgaris, in, ..."
4,5,Acleen 1% Lotion 25ml,"[treat, the, most, severe, form, of, acne, (no..."
...,...,...,...
9715,9716,T Muce Ointment 5gm,"[used, for, treating, warts, Wound]"
9716,9717,Wokadine 10% Solution 100mlWokadine Solution 5...,"[used, to, soften, the, skin, cells, Wound]"
9717,9718,Wokadine M Onit 10gm,"[used, for, scars, Wound]"
9718,9719,Wound Fix Solution 100ml,"[used, for, wounds, Wound]"


In [None]:
new_df['tags'] = new_df['tags'].apply(lambda x:" ".join(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].apply(lambda x:" ".join(x))


In [None]:
new_df

Unnamed: 0,index,Drug_Name,tags
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,Mild to moderate acne (spots) Acne
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,A RET 0.025% is a prescription medicine that i...
2,3,ACGEL CL NANO Gel 15gm,It is used to treat acne vulgaris in people 12...
3,4,ACGEL NANO Gel 15gm,It is used to treat acne vulgaris in people 12...
4,5,Acleen 1% Lotion 25ml,treat the most severe form of acne (nodular ac...
...,...,...,...
9715,9716,T Muce Ointment 5gm,used for treating warts Wound
9716,9717,Wokadine 10% Solution 100mlWokadine Solution 5...,used to soften the skin cells Wound
9717,9718,Wokadine M Onit 10gm,used for scars Wound
9718,9719,Wound Fix Solution 100ml,used for wounds Wound


# Converting the words into lower case

In [None]:
new_df['tags'] = new_df['tags'].apply(lambda x:x.lower())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].apply(lambda x:x.lower())


In [None]:
new_df

Unnamed: 0,index,Drug_Name,tags
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,m i l d t o m o d e r a c n ( s p o t ...
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,a r e t 0 . 0 2 5 % i s a p r e s c ...
2,3,ACGEL CL NANO Gel 15gm,i t i s u s e t o t r e a t a c n ...
3,4,ACGEL NANO Gel 15gm,i t i s u s e t o t r e a t a c n ...
4,5,Acleen 1% Lotion 25ml,t r e a t t h e m o s t s e v e r f o ...
...,...,...,...
9715,9716,T Muce Ointment 5gm,u s e f o r t r e a t w a r t w o u n d
9716,9717,Wokadine 10% Solution 100mlWokadine Solution 5...,u s e t o s o f t e n t h e s k i n ...
9717,9718,Wokadine M Onit 10gm,u s e f o r s c a r w o u n d
9718,9719,Wound Fix Solution 100ml,u s e f o r w o u n d w o u n d


# importing the nltk library

In [None]:
import nltk

In [None]:
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [None]:
!pip install -U scikit-learn scipy matplotlib

Requirement already up-to-date: scikit-learn in c:\users\admin\anaconda3\lib\site-packages (1.2.1)
Requirement already up-to-date: scipy in c:\users\admin\anaconda3\lib\site-packages (1.10.0)
Requirement already up-to-date: matplotlib in c:\users\admin\anaconda3\lib\site-packages (3.7.0)


# Convert a text documents to a matrix of token counts.

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(stop_words='english',max_features=5000)

In [None]:
def stem(text):
  y = []

  for i in text.split():
    y.append(ps.stem(i))

  return " ".join(y) 

# Stemming 

In [None]:
new_df['tags'] = new_df['tags'].apply(stem)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].apply(stem)


In [None]:
cv.fit_transform(new_df['tags']).toarray().shape

(9720, 806)

In [None]:
vectors = cv.fit_transform(new_df['tags']).toarray()

In [None]:
cv.get_feature_names()

['025',
 '12',
 '16',
 '18',
 'abdomin',
 'abl',
 'ach',
 'acid',
 'acn',
 'acne',
 'acquir',
 'action',
 'activ',
 'acut',
 'acute',
 'adequ',
 'adhd',
 'adjunct',
 'adolesc',
 'adult',
 'adults',
 'affect',
 'ag',
 'age',
 'aids',
 'allerg',
 'allergen',
 'allergi',
 'allow',
 'alon',
 'alzheim',
 'alzheimer',
 'alzheimerâ',
 'amoebiasi',
 'anaemia',
 'anal',
 'angina',
 'angl',
 'ani',
 'ankylos',
 'anorexia',
 'anoth',
 'anti',
 'antioxid',
 'antipsychot',
 'antiretrovir',
 'anxieti',
 'anxiou',
 'anxious',
 'apnoea',
 'appear',
 'appetit',
 'appetite',
 'appli',
 'appropri',
 'area',
 'arrhythmia',
 'arrhythmiasi',
 'arteri',
 'arthralgia',
 'arthriti',
 'associ',
 'atherothrombot',
 'athleteâ',
 'atop',
 'atrial',
 'attack',
 'awak',
 'b1',
 'b2',
 'b3',
 'b5',
 'b6',
 'babi',
 'backache',
 'bacteri',
 'bacteria',
 'balanc',
 'balanitis',
 'bandag',
 'becom',
 'behaviour',
 'beliefs',
 'benefit',
 'beta',
 'biliari',
 'biotin',
 'bite',
 'blackhead',
 'blackheads',
 'bleaches',
 

# Measuring Cosine Similarity

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
cosine_similarity(vectors)

array([[1.        , 0.25197632, 0.43643578, ..., 0.        , 0.        ,
        0.        ],
       [0.25197632, 1.        , 0.25660012, ..., 0.19245009, 0.1490712 ,
        0.0860663 ],
       [0.43643578, 0.25660012, 1.        , ..., 0.11111111, 0.0860663 ,
        0.0993808 ],
       ...,
       [0.        , 0.19245009, 0.11111111, ..., 1.        , 0.77459667,
        0.2981424 ],
       [0.        , 0.1490712 , 0.0860663 , ..., 0.77459667, 1.        ,
        0.34641016],
       [0.        , 0.0860663 , 0.0993808 , ..., 0.2981424 , 0.34641016,
        1.        ]])

In [None]:
similarity = cosine_similarity(vectors)

In [None]:
similarity[3]

array([0.43643578, 0.25660012, 1.        , ..., 0.11111111, 0.0860663 ,
       0.0993808 ])

# Recommendation of similar medicines for common illness

In [None]:
def recommend(medicine):
    medicine_index = new_df[new_df['Drug_Name'] == medicine].index[0]
    distances = similarity[medicine_index]
    medicines_list = sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[1:6]
    
    for i in medicines_list:
        print(new_df.iloc[i[0]].Drug_Name)
    

In [None]:
recommend("Abendol Tablet 1'S")

3A 500mg Tablet 3's3A 250mg Tablet 6's
Abact 500mg Tablet 10'S
Abendol Tablet 1'S
Adflox Oz Tablet 10's
Aequimox 250Mg Capsule 10'sAequimox 500Mg Capsule 10's


In [None]:
import pickle

In [None]:
pickle.dump(new_df.to_dict(),open('medicine_dict.pkl','wb'))

In [None]:
pickle.dump(similarity,open('similarity.pkl','wb'))