In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, Dense
from sklearn.metrics import classification_report

# Load your dataset
data = pd.read_csv("C:/Users/VIDYA RAJU/Downloads/depdata.csv")  



# Label encode the target variable
le = LabelEncoder()
data['Department_encoded'] = le.fit_transform(data['Department'])
num_departments = len(data['Department'].unique())

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['Symptoms'])
X = tokenizer.texts_to_sequences(data['Symptoms'])
max_len = max(len(seq) for seq in X)
X = pad_sequences(X, maxlen=max_len, padding='post')

# Convert labels to one-hot encoded format
y = pd.get_dummies(data['Department_encoded']).values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the CNN model
input_layer = Input(shape=(max_len,))
embedding_layer = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=32, input_length=max_len)(input_layer)
conv1d_layer = Conv1D(filters=128, kernel_size=5, activation='relu')(embedding_layer)
pooling_layer = GlobalMaxPooling1D()(conv1d_layer)
output_layer = Dense(num_departments, activation='softmax')(pooling_layer)

cnn_model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
cnn_model.fit(X_train, y_train, epochs=50, validation_data=(X_test, y_test))

# Evaluate the model
y_pred = cnn_model.predict(X_test)
y_pred = np.argmax(y_pred, axis=-1)

# Print classification report
print("\nClassification Report:")
print(classification_report(np.argmax(y_test, axis=-1), y_pred))


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       1.00      1.00      1.00         1
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         8
           4       1.00      1.00      1.00         3
           5       1.00      1.00      1.00         2
      

In [2]:
# Read the symptom from a text file
with open("C:/Users/VIDYA RAJU/Downloads/symptom.txt", "r") as file:
    new_symptoms = file.read()

# Tokenize and pad the new symptom paragraph
new_X = tokenizer.texts_to_sequences([new_symptoms])
new_X = pad_sequences(new_X, maxlen=max_len, padding='post')

# Predict the department
predicted_department = cnn_model.predict(new_X)
predicted_department = le.inverse_transform([np.argmax(predicted_department)])

print("Predicted Department:", predicted_department)

Predicted Department: ['Hematology']
