4. Collect User Input and Make Predictions

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import spacy
from keras.models import Sequential
from keras.layers import Dense, Input
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import multiprocessing as mp

# Function to preprocess text using spaCy
nlp = spacy.load('en_core_web_sm', disable=['parser', 'ner'])

def preprocess_text(text):
    doc = nlp(text)
    return ' '.join([token.lemma_ for token in doc if not token.is_stop and not token.is_punct])

# Function to preprocess and vectorize user input
def preprocess_user_input(user_input, vectorizer):
    combined_text = ' '.join([preprocess_text(value) for value in user_input.values()])
    processed_text = vectorizer.transform([combined_text])
    return processed_text

# Load cleaned data
cleaned_file_path = 'cleaned_medicine_dataset.csv'
data = pd.read_csv(cleaned_file_path, low_memory=False)

# Combine relevant columns into a single 'text' column for processing
def combine_text(row):
    return ' '.join([
        str(row['substitute0']),
        str(row['substitute1']),
        str(row['substitute2']),
        str(row['substitute3']),
        str(row['substitute4']),
        str(row['sideEffect0']),
        str(row['sideEffect1']),
        str(row['sideEffect2']),
        str(row['use0']),
        str(row['use1']),
        str(row['Chemical Class']),
        str(row['Therapeutic Class']),
        str(row['Action Class'])
    ])

data['combined_text'] = data.apply(combine_text, axis=1)

# Preprocess the combined text in parallel
def parallelize_dataframe(df, func):
    num_cores = mp.cpu_count()
    df_split = np.array_split(df, num_cores)
    pool = mp.Pool(num_cores)
    df = pd.concat(pool.map(func, df_split))
    pool.close()
    pool.join()
    return df

def preprocess_texts(df):
    df['combined_text'] = df['combined_text'].apply(preprocess_text)
    return df

data = parallelize_dataframe(data, preprocess_texts)

# Use transpose explicitly if DataFrame operations involve axis swapping
data = data.transpose().transpose()

# Vectorize text using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(data['combined_text'])
y = data['name']

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Build and compile model
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train model with reduced sample size for quick testing
sample_size = 5000  # Reduce this for quicker runs
X_train_sample, _, y_train_sample, _ = train_test_split(X_train, y_train, train_size=sample_size, random_state=42)

# Train model
model.fit(X_train_sample.toarray(), y_train_sample, epochs=5, batch_size=32, validation_data=(X_test.toarray(), y_test))

# Get user input
user_input = {
    'primary_reason': input("What is your primary reason for seeking medication? "),
    'allergies': input("Do you have any known allergies or sensitivities to medications? "),
    'current_medications': input("Are you currently taking any other medications (prescription, over-the-counter, supplements)? "),
    'adverse_reactions': input("Have you had any adverse reactions to medications in the past? If so, please describe. "),
    'chronic_conditions': input("Do you have any chronic medical conditions (e.g., diabetes, hypertension, asthma)? "),
    'symptoms': input("Can you describe your symptoms in detail? When did they start? "),
    'symptom_severity': input("How severe are your symptoms? Have they been getting better, worse, or staying the same? ")
}

# Preprocess and vectorize user input
user_vector = preprocess_user_input(user_input, vectorizer)

# Predict medication
user_prediction = model.predict(user_vector.toarray())
predicted_medicine_index = user_prediction.argmax(axis=1)
recommended_medicine = label_encoder.inverse_transform(predicted_medicine_index)

print(f"Recommended Medicine: {recommended_medicine[0]}")