In [135]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [136]:
df=pd.read_csv('df.csv')

In [137]:
df.head()

Unnamed: 0,index,Drug_Name,Reason,Description
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,Acne,Mild to moderate acne (spots)
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,Acne,A RET 0.025% is a prescription medicine that i...
2,3,ACGEL CL NANO Gel 15gm,Acne,It is used to treat acne vulgaris in people 12...
3,4,ACGEL NANO Gel 15gm,Acne,It is used to treat acne vulgaris in people 12...
4,5,Acleen 1% Lotion 25ml,Acne,treat the most severe form of acne (nodular ac...


In [139]:
df.shape

(9720, 4)

In [140]:
df.isnull().sum()

index          0
Drug_Name      0
Reason         0
Description    0
dtype: int64

In [141]:
df.dropna(inplace=True)

In [142]:
df.duplicated().sum()

0

In [143]:
df['Description']

0                           Mild to moderate acne (spots)
1       A RET 0.025% is a prescription medicine that i...
2       It is used to treat acne vulgaris in people 12...
3       It is used to treat acne vulgaris in people 12...
4       treat the most severe form of acne (nodular ac...
                              ...                        
9715                              used for treating warts
9716                        used to soften the skin cells
9717                                       used for scars
9718                                      used for wounds
9719    used to treat and remove raised warts (usually...
Name: Description, Length: 9720, dtype: object

In [144]:
df['Drug_Name']

0                    A CN Gel(Topical) 20gmA CN Soap 75gm
1       A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...
2                                  ACGEL CL NANO Gel 15gm
3                                     ACGEL NANO Gel 15gm
4                                   Acleen 1% Lotion 25ml
                              ...                        
9715                                  T Muce Ointment 5gm
9716    Wokadine 10% Solution 100mlWokadine Solution 5...
9717                                 Wokadine M Onit 10gm
9718                             Wound Fix Solution 100ml
9719                                Wounsol Ointment 15gm
Name: Drug_Name, Length: 9720, dtype: object

In [145]:
df['Description'].apply(lambda x:x.split())

0                     [Mild, to, moderate, acne, (spots)]
1       [A, RET, 0.025%, is, a, prescription, medicine...
2       [It, is, used, to, treat, acne, vulgaris, in, ...
3       [It, is, used, to, treat, acne, vulgaris, in, ...
4       [treat, the, most, severe, form, of, acne, (no...
                              ...                        
9715                         [used, for, treating, warts]
9716                 [used, to, soften, the, skin, cells]
9717                                   [used, for, scars]
9718                                  [used, for, wounds]
9719    [used, to, treat, and, remove, raised, warts, ...
Name: Description, Length: 9720, dtype: object

In [146]:
df['Reason'] = df['Reason'].apply(lambda x:x.split())
df['Description'] = df['Description'].apply(lambda x:x.split())

In [147]:
df['Description'] = df['Description'].apply(lambda x:[i.replace(" ","") for i in x])

In [148]:
df['tags'] = df['Description'] + df['Reason'] 

In [149]:
df1 = df[['index','Drug_Name','tags']]

In [150]:
df1

Unnamed: 0,index,Drug_Name,tags
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,"[Mild, to, moderate, acne, (spots), Acne]"
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,"[A, RET, 0.025%, is, a, prescription, medicine..."
2,3,ACGEL CL NANO Gel 15gm,"[It, is, used, to, treat, acne, vulgaris, in, ..."
3,4,ACGEL NANO Gel 15gm,"[It, is, used, to, treat, acne, vulgaris, in, ..."
4,5,Acleen 1% Lotion 25ml,"[treat, the, most, severe, form, of, acne, (no..."
...,...,...,...
9715,9716,T Muce Ointment 5gm,"[used, for, treating, warts, Wound]"
9716,9717,Wokadine 10% Solution 100mlWokadine Solution 5...,"[used, to, soften, the, skin, cells, Wound]"
9717,9718,Wokadine M Onit 10gm,"[used, for, scars, Wound]"
9718,9719,Wound Fix Solution 100ml,"[used, for, wounds, Wound]"


In [151]:
df1['tags'].apply(lambda x:" ".join(x))

0                      Mild to moderate acne (spots) Acne
1       A RET 0.025% is a prescription medicine that i...
2       It is used to treat acne vulgaris in people 12...
3       It is used to treat acne vulgaris in people 12...
4       treat the most severe form of acne (nodular ac...
                              ...                        
9715                        used for treating warts Wound
9716                  used to soften the skin cells Wound
9717                                 used for scars Wound
9718                                used for wounds Wound
9719    used to treat and remove raised warts (usually...
Name: tags, Length: 9720, dtype: object

In [152]:
df1

Unnamed: 0,index,Drug_Name,tags
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,"[Mild, to, moderate, acne, (spots), Acne]"
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,"[A, RET, 0.025%, is, a, prescription, medicine..."
2,3,ACGEL CL NANO Gel 15gm,"[It, is, used, to, treat, acne, vulgaris, in, ..."
3,4,ACGEL NANO Gel 15gm,"[It, is, used, to, treat, acne, vulgaris, in, ..."
4,5,Acleen 1% Lotion 25ml,"[treat, the, most, severe, form, of, acne, (no..."
...,...,...,...
9715,9716,T Muce Ointment 5gm,"[used, for, treating, warts, Wound]"
9716,9717,Wokadine 10% Solution 100mlWokadine Solution 5...,"[used, to, soften, the, skin, cells, Wound]"
9717,9718,Wokadine M Onit 10gm,"[used, for, scars, Wound]"
9718,9719,Wound Fix Solution 100ml,"[used, for, wounds, Wound]"


In [153]:
df1['tags'] = df1['tags'].apply(lambda x:" ".join(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['tags'] = df1['tags'].apply(lambda x:" ".join(x))


In [154]:
df1

Unnamed: 0,index,Drug_Name,tags
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,Mild to moderate acne (spots) Acne
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,A RET 0.025% is a prescription medicine that i...
2,3,ACGEL CL NANO Gel 15gm,It is used to treat acne vulgaris in people 12...
3,4,ACGEL NANO Gel 15gm,It is used to treat acne vulgaris in people 12...
4,5,Acleen 1% Lotion 25ml,treat the most severe form of acne (nodular ac...
...,...,...,...
9715,9716,T Muce Ointment 5gm,used for treating warts Wound
9716,9717,Wokadine 10% Solution 100mlWokadine Solution 5...,used to soften the skin cells Wound
9717,9718,Wokadine M Onit 10gm,used for scars Wound
9718,9719,Wound Fix Solution 100ml,used for wounds Wound


In [155]:
df1.loc[:,'tags'] = df1['tags'].apply(lambda x: x.lower())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


In [156]:
df1

Unnamed: 0,index,Drug_Name,tags
0,1,A CN Gel(Topical) 20gmA CN Soap 75gm,mild to moderate acne (spots) acne
1,2,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,a ret 0.025% is a prescription medicine that i...
2,3,ACGEL CL NANO Gel 15gm,it is used to treat acne vulgaris in people 12...
3,4,ACGEL NANO Gel 15gm,it is used to treat acne vulgaris in people 12...
4,5,Acleen 1% Lotion 25ml,treat the most severe form of acne (nodular ac...
...,...,...,...
9715,9716,T Muce Ointment 5gm,used for treating warts wound
9716,9717,Wokadine 10% Solution 100mlWokadine Solution 5...,used to soften the skin cells wound
9717,9718,Wokadine M Onit 10gm,used for scars wound
9718,9719,Wound Fix Solution 100ml,used for wounds wound


In [157]:
vectorizer = TfidfVectorizer(stop_words="english")
features = vectorizer.fit_transform(df1["tags"])

In [170]:
df1['tags']

0                      mild to moderate acne (spots) acne
1       a ret 0.025% is a prescription medicine that i...
2       it is used to treat acne vulgaris in people 12...
3       it is used to treat acne vulgaris in people 12...
4       treat the most severe form of acne (nodular ac...
                              ...                        
9715                        used for treating warts wound
9716                  used to soften the skin cells wound
9717                                 used for scars wound
9718                                used for wounds wound
9719    used to treat and remove raised warts (usually...
Name: tags, Length: 9720, dtype: object

In [158]:
similarities = cosine_similarity(features)

In [165]:
def get_recommendations(patient_text):
    # Preprocess the input text
    patient_text = patient_text.lower()
    
    # Extract features
    patient_features = vectorizer.transform([patient_text])
    
    # Compute similarities
    similarities = cosine_similarity(patient_features, features)[0]
    
    # Get indices of top similar documents
    indices = similarities.argsort()[::-1][:10]
    
    # Get recommendations
    recommendations = df1.iloc[indices]["Drug_Name"].values.tolist()
    
    return recommendations

In [171]:
get_recommendations("fever")

['Calipar 250mg Syrup 60ml',
 'Babygesic 250mg Syrup 60mlBabygesic 125mg Syrup 60ml',
 'Coldmine Syrup 60ml',
 "Calpol 100mg Drops 15mlCalpol 650mg Tablet 10'SCalpol 500mg Tablet 500'SCalpol 500mg Tablet 10'S",
 "Cachpar 500mg Tablet 10'S",
 "Brucet 400/333mg Tablet 3'S",
 'Brikamol 250mg Syrup 60ml',
 "Biopyrin 650mg Tablet 15'S",
 "Bestogesic Fast 650mg Tablet 10'S",
 'Biocetamol 60mg Syrup 60mlBiocetamol 100mg Drops 10mlBiocetamol 150mg Injection 2ml']