In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("/content/Symptom2Disease.csv",index_col = 0)
df.head()

Unnamed: 0,label,text
0,Psoriasis,I have been experiencing a skin rash on my arm...
1,Psoriasis,"My skin has been peeling, especially on my kne..."
2,Psoriasis,I have been experiencing joint pain in my fing...
3,Psoriasis,"There is a silver like dusting on my skin, esp..."
4,Psoriasis,"My nails have small dents or pits in them, and..."


In [3]:
df.label.unique()
df['label_num']= df.label.map(dict(zip(pd.Series(df['label'].unique()),pd.Series([i for i in range(24)]))))

df.head()

Unnamed: 0,label,text,label_num
0,Psoriasis,I have been experiencing a skin rash on my arm...,0
1,Psoriasis,"My skin has been peeling, especially on my kne...",0
2,Psoriasis,I have been experiencing joint pain in my fing...,0
3,Psoriasis,"There is a silver like dusting on my skin, esp...",0
4,Psoriasis,"My nails have small dents or pits in them, and...",0


In [4]:
lookup = dict(zip(pd.Series(df['label'].unique()),pd.Series([i for i in range(24)])))

In [5]:
print(lookup)

{'Psoriasis': 0, 'Varicose Veins': 1, 'Typhoid': 2, 'Chicken pox': 3, 'Impetigo': 4, 'Dengue': 5, 'Fungal infection': 6, 'Common Cold': 7, 'Pneumonia': 8, 'Dimorphic Hemorrhoids': 9, 'Arthritis': 10, 'Acne': 11, 'Bronchial Asthma': 12, 'Hypertension': 13, 'Migraine': 14, 'Cervical spondylosis': 15, 'Jaundice': 16, 'Malaria': 17, 'urinary tract infection': 18, 'allergy': 19, 'gastroesophageal reflux disease': 20, 'drug reaction': 21, 'peptic ulcer disease': 22, 'diabetes': 23}


In [7]:
! pip install spacy



In [9]:
!python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [10]:
import spacy

nlp = spacy.load("en_core_web_lg")

def preprocess(text):
    list =[]
    for token in nlp(text):
        if token.is_space or token.is_punct:
            continue
        list.append(token.lemma_)
    return ' '.join(list)

In [11]:
df['preprocess'] = df['text'].apply(preprocess)

In [12]:
df['vector'] = df.preprocess.apply(lambda text: nlp(text).vector)
df.head()

Unnamed: 0,label,text,label_num,preprocess,vector
0,Psoriasis,I have been experiencing a skin rash on my arm...,0,I have be experience a skin rash on my arm leg...,"[-1.0520097, 2.0068107, -3.1425354, 1.3052415,..."
1,Psoriasis,"My skin has been peeling, especially on my kne...",0,my skin have be peel especially on my knee elb...,"[-1.3448839, 1.2253065, -4.0693727, 0.5828706,..."
2,Psoriasis,I have been experiencing joint pain in my fing...,0,I have be experience joint pain in my finger w...,"[-0.039949566, 1.4880179, -3.136055, 0.0100825..."
3,Psoriasis,"There is a silver like dusting on my skin, esp...",0,there be a silver like dust on my skin especia...,"[-1.5876127, 1.5015007, -3.8816297, 1.3765275,..."
4,Psoriasis,"My nails have small dents or pits in them, and...",0,my nail have small dent or pit in they and the...,"[-0.80246216, 2.5354614, -4.1618123, -0.066844..."


In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    df['vector'].values,df['label_num'],test_size= 0.20 , random_state = 23 , stratify= df['label_num'])

In [14]:
X_train_2d = np.stack(X_train)
X_test_2d = np.stack(X_test)

In [15]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train_2d)
X_test_scaled = scaler.transform(X_test_2d)

In [16]:
from sklearn.svm import SVC

model = SVC(C=1, kernel='poly', degree=3, gamma="scale")

model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)

In [17]:
from sklearn.metrics import classification_report

print('\n\n\n Classification Report   :\n\n\n ' , classification_report(y_test,y_pred))




 Classification Report   :


                precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.91      1.00      0.95        10
           2       0.89      0.80      0.84        10
           3       0.83      1.00      0.91        10
           4       1.00      1.00      1.00        10
           5       1.00      0.80      0.89        10
           6       0.91      1.00      0.95        10
           7       1.00      0.90      0.95        10
           8       1.00      1.00      1.00        10
           9       1.00      1.00      1.00        10
          10       0.91      1.00      0.95        10
          11       1.00      1.00      1.00        10
          12       1.00      1.00      1.00        10
          13       1.00      0.90      0.95        10
          14       0.82      0.90      0.86        10
          15       1.00      0.90      0.95        10
          16       0.91      1.00      0.95    

In [18]:
t1 = "Bad cramps with bruise marks"

In [19]:
tp1 = preprocess(t1)

In [20]:
tp1 = nlp(tp1).vector

In [21]:
tp1 = tp1.reshape(1,-1)

In [22]:
tp1 = scaler.transform(tp1)

In [23]:
pred = model.predict(tp1)

In [24]:
print(pred)

[0]


In [25]:
value = [i for i in lookup if lookup[i]==pred[0]]
print(value[0])

Psoriasis


In [26]:
import joblib
joblib.dump(model, 'model.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [29]:
df.to_csv('lookup.csv')

In [30]:
df.text.iloc[34]

"My nails are starting to have small pits on them. I am worried and don't know what is causing it. Also, my joints pain and there are rashes on my arms and back."

In [31]:
df.text.iloc[34]

"My nails are starting to have small pits on them. I am worried and don't know what is causing it. Also, my joints pain and there are rashes on my arms and back."