In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import nltk
import spacy
from nltk.tokenize import word_tokenize
from nltk.stem import SnowballStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import VotingClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from collections import Counter as ctr
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [3]:
# nltk.download('punkt')
# nltk.download('averaged_perceptron_tagger')

In [4]:
# import requests

# API_URL = "https://api-inference.huggingface.co/models/kormilitzin/en_core_med7_lg"
# headers = {"Authorization": "Bearer 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"}

# def query(payload):
# 	response = requests.post(API_URL, headers=headers, json=payload)
# 	return response.json()

# output = query({
# 	"inputs": "I have been experiencing symptoms such as high fever, red spots on my body whcih is causing itching and they are getting bigger and swollen",
# })

In [5]:
# output

In [6]:
!python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [7]:
import spacy
nlp = spacy.load("en_core_web_lg")

In [8]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/HealthMate/Symptom2Disease.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,label,text
0,0,Psoriasis,I have been experiencing a skin rash on my arm...
1,1,Psoriasis,"My skin has been peeling, especially on my kne..."
2,2,Psoriasis,I have been experiencing joint pain in my fing...
3,3,Psoriasis,"There is a silver like dusting on my skin, esp..."
4,4,Psoriasis,"My nails have small dents or pits in them, and..."


In [9]:
df.drop(['Unnamed: 0'],axis=1,inplace=True)

In [10]:
ctr(df['label'])

Counter({'Psoriasis': 50,
         'Varicose Veins': 50,
         'Typhoid': 50,
         'Chicken pox': 50,
         'Impetigo': 50,
         'Dengue': 50,
         'Fungal infection': 50,
         'Common Cold': 50,
         'Pneumonia': 50,
         'Dimorphic Hemorrhoids': 50,
         'Arthritis': 50,
         'Acne': 50,
         'Bronchial Asthma': 50,
         'Hypertension': 50,
         'Migraine': 50,
         'Cervical spondylosis': 50,
         'Jaundice': 50,
         'Malaria': 50,
         'urinary tract infection': 50,
         'allergy': 50,
         'gastroesophageal reflux disease': 50,
         'drug reaction': 50,
         'peptic ulcer disease': 50,
         'diabetes': 50})

In [11]:
df.sample(10)

Unnamed: 0,label,text
355,Common Cold,"I'm constantly sneezing, and the cold is makin..."
523,Arthritis,My neck has been extremely stiff and my muscle...
151,Chicken pox,I'm feeling fatigued and have no energy. I can...
1129,peptic ulcer disease,I unknowingly lose weight and find it difficul...
420,Pneumonia,I can't seem to get enough air and I'm sweatin...
535,Arthritis,"My muscles have been feeling feeble recently, ..."
115,Typhoid,I have been experiencing a lot of bloating and...
839,Jaundice,"I've been feeling extremely scratchy, sick, an..."
117,Typhoid,"I've been feeling exhausted and weak, and I ca..."
976,allergy,"Along with losing my appetite, I've been havin..."


In [12]:
def preprocess_text(text):
    doc = nlp(text)
    stemmed_tokens = []
    for token in doc:
        if token.is_alpha and not token.is_stop:
            lemma = token.lemma_.lower()
            stemmed_tokens.append(lemma)
    processed_text = ' '.join(stemmed_tokens)

    return processed_text

df['text'] = df['text'].apply(preprocess_text)

In [13]:
df.text[639]

'fever roof weak tired cough lot mucous manage symptom hard make upset'

In [14]:
df.sample(5)

Unnamed: 0,label,text
344,Fungal infection,lot itch skin occasionally turn rash odd patch...
221,Impetigo,sore face near nose lip sore cause discomfort ...
768,Cervical spondylosis,pain chronic cough muscle weakness bother conc...
147,Typhoid,lose lot weight week eat nausea vomiting follo...
720,Migraine,experience digestive issue include acidity ind...


In [15]:
tfidf_vectorizer = TfidfVectorizer()
X = tfidf_vectorizer.fit_transform(df['text'])

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['label'])

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
base_models = [

    ('rf', RandomForestClassifier()),
    ('lr', LogisticRegression()),
    ('svm', SVC(kernel='linear', probability=True))
]

In [18]:
voting_classifier = VotingClassifier(estimators=base_models, voting='hard')

In [19]:
voting_classifier.fit(X_train, y_train)

In [20]:
accuracy = voting_classifier.score(X_test, y_test)
print("Accuracy:", accuracy)

Accuracy: 0.9791666666666666


In [29]:
import joblib

joblib.dump(voting_classifier, '/content/drive/MyDrive/Colab Notebooks/HealthMate/model.pkl')

['/content/drive/MyDrive/Colab Notebooks/HealthMate/model.pkl']

In [30]:
joblib.dump(tfidf_vectorizer, '/content/drive/MyDrive/Colab Notebooks/HealthMate/tfidf.pkl')

['/content/drive/MyDrive/Colab Notebooks/HealthMate/tfidf.pkl']

In [31]:
joblib.dump(label_encoder, '/content/drive/MyDrive/Colab Notebooks/HealthMate/label_encoder.pkl')

['/content/drive/MyDrive/Colab Notebooks/HealthMate/label_encoder.pkl']

In [32]:
loaded_model = joblib.load('/content/drive/MyDrive/Colab Notebooks/HealthMate/model.pkl')

In [33]:
# Example
sample_text = "I have been experiencing symptoms such as high fever, red spots on my body whcih is causing itching and they are getting bigger and swollen."
sample_text_processed = preprocess_text(sample_text)
sample_text_transformed = tfidf_vectorizer.transform([sample_text_processed])
predicted_label = label_encoder.inverse_transform(loaded_model.predict(sample_text_transformed))

print("Predicted Label:", predicted_label)

Predicted Label: ['Chicken pox']


In [34]:
# Example
sample_text = "My fever is really high, and I'm having trouble catching my breath. I'm sweating a lot, feeling cold and tired, and my heart is beating really fast. I also have some brownish phlegm coming up."
sample_text_processed = preprocess_text(sample_text)
sample_text_transformed = tfidf_vectorizer.transform([sample_text_processed])
predicted_label = label_encoder.inverse_transform(voting_classifier.predict(sample_text_transformed))

print("Predicted Label:", predicted_label)

Predicted Label: ['Pneumonia']


In [35]:
text = 'i been realli weari and ill i been suffer from..'

sample_text = text
sample_text_processed = preprocess_text(sample_text)
sample_text_transformed = tfidf_vectorizer.transform([sample_text_processed])
predicted_label = label_encoder.inverse_transform(voting_classifier.predict(sample_text_transformed))

print("Predicted Label:", predicted_label)

Predicted Label: ['drug reaction']
