In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import spacy
from keras.models import Sequential
from keras.layers import Dense, Input
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Load data
data = pd.read_csv('medicine_dataset.csv')

# Preprocess text using spaCy
nlp = spacy.load('en_core_web_sm')

def preprocess_text(text):
    doc = nlp(text)
    return ' '.join([token.lemma_ for token in doc if not token.is_stop and not token.is_punct])

data['processed_text'] = data.apply(lambda row: ' '.join([
    preprocess_text(row['primary_reason']),
    preprocess_text(row['allergies']),
    preprocess_text(row['current_medications']),
    preprocess_text(row['adverse_reactions']),
    preprocess_text(row['chronic_conditions']),
    preprocess_text(row['symptoms']),
    preprocess_text(row['symptom_severity'])
]), axis=1)

# Vectorize text using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(data['processed_text'])
y = data['recommended_medication']

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Build and compile model
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train model
model.fit(X_train.toarray(), y_train, epochs=10, batch_size=32, validation_data=(X_test.toarray(), y_test))

# Predict and evaluate
y_pred = model.predict(X_test.toarray())
y_pred_classes = y_pred.argmax(axis=1)
print(classification_report(y_test, y_pred_classes))

# Function to preprocess and vectorize user input
def preprocess_user_input(user_input):
    combined_text = ' '.join([preprocess_text(value) for value in user_input.values()])
    processed_text = vectorizer.transform([combined_text])
    return processed_text

# Get user input
user_input = {
    'primary_reason': input("What is your primary reason for seeking medication? "),
    'allergies': input("Do you have any known allergies or sensitivities to medications? "),
    'current_medications': input("Are you currently taking any other medications (prescription, over-the-counter, supplements)? "),
    'adverse_reactions': input("Have you had any adverse reactions to medications in the past? If so, please describe. "),
    'chronic_conditions': input("Do you have any chronic medical conditions (e.g., diabetes, hypertension, asthma)? "),
    'symptoms': input("Can you describe your symptoms in detail? When did they start? "),
    'symptom_severity': input("How severe are your symptoms? Have they been getting better, worse, or staying the same? ")
}

# Preprocess and vectorize user input
user_vector = preprocess_user_input(user_input)

# Predict medication
user_prediction = model.predict(user_vector)
predicted_medicine_index = user_prediction.argmax(axis=1)
recommended_medicine = label_encoder.inverse_transform(predicted_medicine_index)

print(f"Recommended Medicine: {recommended_medicine[0]}")

NameError: name '_C' is not defined