In [16]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import ListedColormap
import os
import glob
import re
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
import scipy.sparse as sp
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np



In [17]:
# Load the dataset with low_memory=False to avoid the DtypeWarning
file_path = 'med_dataset_cleaned.csv'
medicine_data = pd.read_csv(file_path, low_memory=False)
medicine_data.head()


Unnamed: 0,id,name,substitute0,substitute1,sideEffect0,sideEffect1,sideEffect2,sideEffect3,sideEffect4,use0,use1,use2
0,1,augmentin 625 duo tablet,Penciclav 500 mg/125 mg Tablet,Moxikind-CV 625 Tablet,Vomiting,Nausea,Diarrhea,N,N,Treatment of Bacterial infections,N,N
1,2,azithral 500 tablet,Zithrocare 500mg Tablet,Azax 500 Tablet,Vomiting,Nausea,Abdominal pain,Diarrhea,N,Treatment of Bacterial infections,N,N
2,3,ascoril ls syrup,Solvin LS Syrup,Ambrodil-LX Syrup,Nausea,Vomiting,Diarrhea,Upset stomach,Stomach pain,Treatment of Cough with mucus,N,N
3,5,avil 25 tablet,Eralet 25mg Tablet,N,Sleepiness,Dryness in mouth,N,N,N,Treatment of Allergic conditions,N,N
4,6,allegra-m tablet,Emlukast-FX Tablet,LCFEX-Mont Tablet,Nausea,Diarrhea,Vomiting,Skin rash,Flu-like symptoms,Treatment of Sneezing and runny nose due to al...,N,N


Step 1: Data Preparation

In [24]:
# Clean text data
def clean_text(text):
    return text.lower().replace('_', ' ')

text_columns = ['name',  'use0', 'use1']

for col in text_columns:
    medicine_data[col] = medicine_data[col].apply(clean_text)

medicine_data['combined_text'] = medicine_data[text_columns].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)

# Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)  # Further reduce the number of features
X = vectorizer.fit_transform(medicine_data['combined_text'])
y = medicine_data['name']

# Fit the LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit(y)

# Encode the target labels
y_encoded = label_encoder.transform(y)

# Verify that the encoded labels are within the valid range
print(f"Labels range: {y_encoded.min()} to {y_encoded.max()}")




Labels range: 0 to 199127


Step 2: Define the Patient Questionnaire Function

In [19]:
def collect_patient_data():
    patient_data = {
        'primary_reason': input("What is your primary reason for seeking medication? "),
        'allergies': input("Do you have any known allergies or sensitivities to medications? "),
        'current_medications': input("Are you currently taking any other medications (prescription, over-the-counter, supplements)? "),
        'adverse_reactions': input("Have you had any adverse reactions to medications in the past? If so, please describe. "),
        'chronic_conditions': input("Do you have any chronic medical conditions (e.g., diabetes, hypertension, asthma)? "),
        'symptoms': input("Can you describe your symptoms in detail? When did they start? "),
        'symptom_severity': input("How severe are your symptoms? Have they been getting better, worse, or staying the same? "),
    }
    return patient_data

Step 3: Collect Patient Data and Feature Engineering

In [22]:
# Collect patient data
patient_data = collect_patient_data()

# Convert patient data to DataFrame for easy manipulation
patient_df = pd.DataFrame([patient_data])

# Process text data (e.g., combine all text fields into one column)
text_fields = ['primary_reason']

# Combine text fields into a single feature for vectorization
patient_df['combined_text'] = patient_df[text_fields].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)

# Vectorize the patient's text data
patient_vector = vectorizer.transform(patient_df['combined_text'])




In [23]:

# Calculate the cosine similarity between the patient vector and medication vectors
similarities = cosine_similarity(patient_vector, X)

# Find the index of the medication with the highest similarity
best_match_index = similarities.argmax()

# Get the recommended medicine name
recommended_medicine = medicine_data.iloc[best_match_index]['name']
print(f"Recommended Medicine: {recommended_medicine}")


Recommended Medicine: domaspas tablet


Step 4: Model Training

Step 5: Recommendation System