In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
import pickle  # Import pickle for model saving

# Load your large dataset
df = pd.read_csv('train_updated.csv')

# Use the first 500 rows
df_subset = df.head(500)

# Create a pipeline with a TF-IDF vectorizer and a RandomForest classifier
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('rf', RandomForestClassifier())
])

# Train the model
pipeline.fit(df_subset['question'], df_subset['cop'])

# Save the trained model as a pickle file
with open('medical_chatbot_model.pkl', 'wb') as file:
    pickle.dump(pipeline, file)



In [None]:
with open('/content/medical_chatbot_model.pkl', 'rb') as model_file:
    model = pickle.load(model_file)

# Welcome message
print("Hi, I am a medical chatbot. Please tell me your symptoms, and I will try to predict your disorder.")

# Example usage in a continuous loop
while True:
    user_input = input("User: ")

    # Use the trained RF model for prediction
    predicted_disorder = model.predict([user_input])[0]

    # Output the result
    print(f"I think you may have a disorder related to {predicted_disorder}. Sorry to hear that. I hope you feel well.\n")

    # Ask for another input or exit
    another_input = input("Do you have more symptoms to share? (yes/no): ")
    if another_input.lower() != 'yes':
        print("Thank you for using the medical chatbot. Take care!")
        break