In [10]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# Load the data
data = pd.read_csv('medical_data.csv')

data = data.dropna()

# Combine Symptoms, Causes, and Gender columns to create input features
data['Combined'] = data['Symptoms'] + ' ' + data['Causes'] + ' ' + data['Gender']

# Prepare the target variable (Disease and Medicine)
target_disease = data['Disease']
target_medicine = data['Medicine']

# Text vectorization using CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(data['Combined'])

# Split the data into training and testing sets for Disease prediction
X_train_disease, X_test_disease, y_train_disease, y_test_disease = train_test_split(X, target_disease, test_size=0.2, random_state=42)

# Train the model for Disease prediction
disease_classifier = MultinomialNB()
disease_classifier.fit(X_train_disease, y_train_disease)

# Make predictions for Disease
y_pred_disease = disease_classifier.predict(X_test_disease)

# Split the data into training and testing sets for Medicine recommendation
X_train_medicine, X_test_medicine, y_train_medicine, y_test_medicine = train_test_split(X, target_medicine, test_size=0.2, random_state=42)

# Train the model for Medicine recommendation
medicine_classifier = MultinomialNB()
medicine_classifier.fit(X_train_medicine, y_train_medicine)

# Make predictions for Medicine
y_pred_medicine = medicine_classifier.predict(X_test_medicine)

# Function to predict Disease and Medicine based on symptoms
def predict_disease_and_medicine(symptoms):
    symptoms = ' '.join(symptoms)
    symptoms_input = vectorizer.transform([symptoms])
    
    predicted_disease = disease_classifier.predict(symptoms_input)
    predicted_medicine = medicine_classifier.predict(symptoms_input)
    
    return predicted_disease[0], predicted_medicine[0]

Accuracy for Disease prediction: 0.6122448979591837


In [11]:
# Save the disease and medicine classifiers
import pickle

with open('disease_classifier.pkl', 'wb') as f:
    pickle.dump(disease_classifier, f)

with open('medicine_classifier.pkl', 'wb') as f:
    pickle.dump(medicine_classifier, f)