In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer


In [3]:
# Sample list of disease symptoms
symptoms = [
    "Fever, Cough, Shortness of breath",
    "Headache, Fatigue",
    "Sore throat, Runny nose, Sneezing",
    "Fever, Chills, Muscle pain",
    "Increased thirst, Frequent urination",
    "High blood pressure",
    "Wheezing, Shortness of breath",
    "Severe diarrhea, Vomiting",
    "Chest pain, Difficulty breathing",
    "Rash, Itching",
]

# Sample list of priority levels (1 for high, 2 for medium, 3 for low)
priority_levels = [1, 2, 2, 1, 3, 1, 2, 1, 1, 2]

# Create a dictionary with symptoms and priority levels
data = {
    'Symptoms': symptoms,
    'Priority_Level': priority_levels
}

# Create a DataFrame using pandas
df = pd.DataFrame(data)

In [4]:
print(df)


                               Symptoms  Priority_Level
0     Fever, Cough, Shortness of breath               1
1                     Headache, Fatigue               2
2     Sore throat, Runny nose, Sneezing               2
3            Fever, Chills, Muscle pain               1
4  Increased thirst, Frequent urination               3
5                   High blood pressure               1
6         Wheezing, Shortness of breath               2
7             Severe diarrhea, Vomiting               1
8      Chest pain, Difficulty breathing               1
9                         Rash, Itching               2


In [5]:
# Initialize the CountVectorizer
vectorizer = CountVectorizer()

# Fit and transform the symptoms to a bag-of-words representation
X = vectorizer.fit_transform(df['Symptoms'])

# Split the data into features (X) and target (y)
y = df['Priority_Level']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
# Initialize the logistic regression model
model = LogisticRegression()

# Train the model using the training data
model.fit(X_train, y_train)


In [7]:
# Make predictions on the test set
y_pred = model.predict(X_test)


In [8]:
# Convert the X_test sparse matrix to a DataFrame
X_test_df = pd.DataFrame(X_test.toarray(), columns=vectorizer.get_feature_names_out())

In [9]:
# Get the appointment booking time as the index of the test set DataFrame
appointment_times = X_test_df.index

In [10]:
# Create a DataFrame with the symptoms, predicted priority levels, and appointment booking time
X_test_with_time = pd.DataFrame({
    'Symptoms': df.loc[X_test_df.index, 'Symptoms'],
    'Predicted_Priority': y_pred,
    'Appointment_Time': appointment_times
})


In [11]:
# Sort the test set by predicted priority levels and appointment booking time
X_test_with_time = X_test_with_time.sort_values(by=['Predicted_Priority', 'Appointment_Time'])


In [12]:
# Assign tokens based on the order of booking
X_test_with_time['Token'] = range(1, len(X_test_with_time) + 1)


In [13]:
print(X_test_with_time[['Symptoms', 'Predicted_Priority', 'Token']])

                            Symptoms  Predicted_Priority  Token
0  Fever, Cough, Shortness of breath                   1      1
1                  Headache, Fatigue                   1      2
