In [1]:
import pymongo
from datetime import datetime
client = pymongo.MongoClient('mongodb://localhost:27017/')
db = client['physician_checkup']
try:
    collection = db['physicians']
    print('Connected to MongoDB successfully.')
except pymongo.errors.ConnectionFailure:
    print('Could not connect to MongoDB.')

Connected to MongoDB successfully.


In [2]:
physician_data = []

for physician in collection.find():
    # Extract relevant fields and convert to the desired format
    physician_info = {
        "UID": physician["UID"],
        "name": physician["name"],
        "specialization": physician["specialization"],
        "working_hours_start": physician["SOD"],
        "working_hours_end": physician["EOD"],
        "week_offs": [day.strip() for day in physician["Holiday"].split(",")],
        "availability_label": physician["Available"]
    }
    
    physician_data.append(physician_info)
    

In [3]:
print(physician_data)

[{'UID': 123, 'name': 'Hrishit', 'specialization': 'Surgeon', 'working_hours_start': '9:00', 'working_hours_end': '14:00', 'week_offs': ['Tuesday'], 'availability_label': True}, {'UID': 1234, 'name': 'Vishal', 'specialization': 'Dentist', 'working_hours_start': '10:00', 'working_hours_end': '16:00', 'week_offs': ['Sunday'], 'availability_label': True}, {'UID': 1001, 'name': 'John Doe', 'specialization': 'Cardiologist', 'working_hours_start': '08:00', 'working_hours_end': '16:00', 'week_offs': ['Saturday', 'Sunday'], 'availability_label': True}, {'UID': 1002, 'name': 'Jane Smith', 'specialization': 'Pediatrician', 'working_hours_start': '09:00', 'working_hours_end': '17:00', 'week_offs': ['Sunday'], 'availability_label': True}, {'UID': 1003, 'name': 'Robert Johnson', 'specialization': 'Dermatologist', 'working_hours_start': '10:00', 'working_hours_end': '18:00', 'week_offs': ['Thursday'], 'availability_label': False}, {'UID': 1004, 'name': 'Emily Wilson', 'specialization': 'Orthopedic S

In [4]:
import json
import pulp
from datetime import datetime
import pandas as pd

In [5]:
df = pd.DataFrame(physician_data)

In [6]:
def time_to_seconds(time_str):
    time_obj = datetime.strptime(time_str, '%H:%M')
    return (time_obj - datetime(time_obj.year, time_obj.month, time_obj.day)).total_seconds()

# Apply the function to the DataFrame
df['working_hours_start'] = df['working_hours_start'].apply(time_to_seconds)
df['working_hours_end'] = df['working_hours_end'].apply(time_to_seconds)

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [8]:
df = df.drop(columns=['name'])
df = df.drop(columns=['UID'])

In [9]:
# Features (X): Use relevant columns except for "availability_label"
X = df.drop("availability_label", axis=1)

# Label (y): "availability_label" column
y = df["availability_label"]


In [10]:
X = pd.get_dummies(X, columns=['specialization'], prefix=['specialization'])


In [11]:
days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Create columns for each day and set them to True by default
for day in days_of_week:
    X[day] = True




In [12]:
for index, row in X.iterrows():
    for day in days_of_week:
        if day in row['week_offs']:
            X.at[index, day] = False

# Drop the original 'week_offs' column if needed
X.drop(columns=['week_offs'], inplace=True)

In [13]:
print(X)

    working_hours_start  working_hours_end  specialization_Cardiologist  \
0               32400.0            50400.0                        False   
1               36000.0            57600.0                        False   
2               28800.0            57600.0                         True   
3               32400.0            61200.0                        False   
4               36000.0            64800.0                        False   
5               27000.0            55800.0                        False   
6               30600.0            59400.0                        False   
7               34200.0            63000.0                        False   
8               28800.0            57600.0                        False   
9               34200.0            63000.0                        False   
10              30600.0            59400.0                        False   
11              36000.0            64800.0                        False   
12              32400.0  

In [14]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X, y)

In [15]:
patient_appointment_info = {
    'day_of_week': 'Monday',        # Day of the week for the appointment
    'working_hours_start': 32400.0,         # Start time in seconds since midnight
    'working_hours_end': 50200.0,           # End time in seconds since midnight
    'specialization': 'specialization_Surgeon',  # Example: Specialization requirement
    # Add other specialization flags as needed
}

In [16]:
pdf = pd.DataFrame([patient_appointment_info])

In [17]:
spec = ['specialization_Cardiologist', 'specialization_Dentist', 'specialization_Dermatologist', 'specialization_ENT Specialist', 'specialization_Gastroenterologist', 'specialization_Neurologist', 'specialization_Ophthalmologist', 'specialization_Orthopedic Surgeon', 'specialization_Pediatrician', 'specialization_Psychiatrist', 'specialization_Rheumatologist', 'specialization_Surgeon', 'specialization_Urologist']

# Create columns for each day and set them to True by default
for column in spec:
    pdf[column] = False


In [18]:
pdf[patient_appointment_info['specialization']] = True

# Drop the 'specialization' column
pdf.drop(columns=['specialization'], inplace=True)

print(pdf)

  day_of_week  working_hours_start  working_hours_end  \
0      Monday              32400.0            50200.0   

   specialization_Cardiologist  specialization_Dentist  \
0                        False                   False   

   specialization_Dermatologist  specialization_ENT Specialist  \
0                         False                          False   

   specialization_Gastroenterologist  specialization_Neurologist  \
0                              False                       False   

   specialization_Ophthalmologist  specialization_Orthopedic Surgeon  \
0                           False                              False   

   specialization_Pediatrician  specialization_Psychiatrist  \
0                        False                        False   

   specialization_Rheumatologist  specialization_Surgeon  \
0                          False                    True   

   specialization_Urologist  
0                     False  


In [19]:
days_of_week_columns = [
    'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'
]

for column in days_of_week_columns:
    pdf[column] = False

# Set the specified day of the week flag to True
pdf[patient_appointment_info['day_of_week']] = True

# Drop the 'day_of_week' column
pdf.drop(columns=['day_of_week'], inplace=True)

In [20]:
print(pdf)

   working_hours_start  working_hours_end  specialization_Cardiologist  \
0              32400.0            50200.0                        False   

   specialization_Dentist  specialization_Dermatologist  \
0                   False                         False   

   specialization_ENT Specialist  specialization_Gastroenterologist  \
0                          False                              False   

   specialization_Neurologist  specialization_Ophthalmologist  \
0                       False                           False   

   specialization_Orthopedic Surgeon  ...  specialization_Rheumatologist  \
0                              False  ...                          False   

   specialization_Surgeon  specialization_Urologist  Monday  Tuesday  \
0                    True                     False    True    False   

   Wednesday  Thursday  Friday  Saturday  Sunday  
0      False     False   False     False   False  

[1 rows x 22 columns]


In [21]:
predictions = model.predict(pdf)
print(predictions)

[ True]


In [22]:
true_predictions = [doctor for doctor, is_true in zip(physician_data, predictions) if is_true]
print(true_predictions)

[{'UID': 123, 'name': 'Hrishit', 'specialization': 'Surgeon', 'working_hours_start': '9:00', 'working_hours_end': '14:00', 'week_offs': ['Tuesday'], 'availability_label': True}]
