# <span style="color: blue"> Chatbot Project - Doctor and Patient Corpus</span>

### Imports

In [1]:
import sys
import tensorflow as tf
import numpy as np






In [2]:
import json

import os
from dotenv import load_dotenv

import openai
from openai import OpenAI

Note: to run this you need to connect to your env and OpenAI API key.

In [3]:
load_dotenv()

True

In [4]:
client = OpenAI()

### Call to OpenAI's Models

In [5]:
#this function completes the roles and users with content as the generated medical network
#information as the prompt

def chat_complete_prompt(prompt):
    # query against the model "gpt-4o"
    messages = [{"role": "user", "content": prompt}]
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        temperature=0.3, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message.content

In [6]:
#this function will complete the message response

def chat_complete_messages(messages, temperature=0.2):
    # query against the model "gpt-4o"
    response = client.chat.completions.create(
        model="gpt-4o",
        response_format={ "type": "json_object" },
        messages= messages,
        temperature=temperature, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message.content

### Doctor Data Generation (Example)

In [18]:
prompt = f"""
Please generate 20 random doctor names, along with a specialty, availability, their location at the 
Family Medicine Building or Main Hospital and if they are accepting new patients. 
Please include at least 5 general practioners.
The response must be in json format without any markdown formatting or extra text, 
and it should contain the following keys: doctor_name, doctor_specialty, availability, location, and accepting_new_patients.
"""


response = chat_complete_prompt(prompt)

In [19]:
response

'[\n    {\n        "doctor_name": "Dr. Emily Carter",\n        "doctor_specialty": "General Practitioner",\n        "availability": "Mon-Fri 9am-5pm",\n        "location": "Family Medicine Building",\n        "accepting_new_patients": true\n    },\n    {\n        "doctor_name": "Dr. Michael Thompson",\n        "doctor_specialty": "Pediatrician",\n        "availability": "Mon-Fri 8am-4pm",\n        "location": "Main Hospital",\n        "accepting_new_patients": true\n    },\n    {\n        "doctor_name": "Dr. Sarah Johnson",\n        "doctor_specialty": "General Practitioner",\n        "availability": "Tue-Thu 10am-6pm",\n        "location": "Family Medicine Building",\n        "accepting_new_patients": false\n    },\n    {\n        "doctor_name": "Dr. David Lee",\n        "doctor_specialty": "Cardiologist",\n        "availability": "Mon-Wed 9am-3pm",\n        "location": "Main Hospital",\n        "accepting_new_patients": true\n    },\n    {\n        "doctor_name": "Dr. Lisa Brown",\n 

In [20]:
print(response)

[
    {
        "doctor_name": "Dr. Emily Carter",
        "doctor_specialty": "General Practitioner",
        "availability": "Mon-Fri 9am-5pm",
        "location": "Family Medicine Building",
        "accepting_new_patients": true
    },
    {
        "doctor_name": "Dr. Michael Thompson",
        "doctor_specialty": "Pediatrician",
        "availability": "Mon-Fri 8am-4pm",
        "location": "Main Hospital",
        "accepting_new_patients": true
    },
    {
        "doctor_name": "Dr. Sarah Johnson",
        "doctor_specialty": "General Practitioner",
        "availability": "Tue-Thu 10am-6pm",
        "location": "Family Medicine Building",
        "accepting_new_patients": false
    },
    {
        "doctor_name": "Dr. David Lee",
        "doctor_specialty": "Cardiologist",
        "availability": "Mon-Wed 9am-3pm",
        "location": "Main Hospital",
        "accepting_new_patients": true
    },
    {
        "doctor_name": "Dr. Lisa Brown",
        "doctor_specialty": "Gene

In [21]:
doctor_data = response

### Patient Data Generation

In [11]:
prompt_patient = f"""
Please generate 40 random patient names, along with a date of birth, randomized medical record number, and what symptoms they are experiecing and what type of doctor they would like to see.
The response must be in json format without any markdown formatting or extra text, and it should contain the following keys: patient_name, date_of_birth, medical_record_number, symptoms, doctor_type_requested.
"""

response_patient = chat_complete_prompt(prompt_patient)

In [12]:
print(response_patient)

[
    {
        "patient_name": "John Smith",
        "date_of_birth": "1985-06-15",
        "medical_record_number": "MRN123456",
        "symptoms": "Headache, nausea",
        "doctor_type_requested": "Neurologist"
    },
    {
        "patient_name": "Emily Johnson",
        "date_of_birth": "1990-03-22",
        "medical_record_number": "MRN123457",
        "symptoms": "Cough, fever",
        "doctor_type_requested": "Pulmonologist"
    },
    {
        "patient_name": "Michael Brown",
        "date_of_birth": "1978-12-30",
        "medical_record_number": "MRN123458",
        "symptoms": "Chest pain",
        "doctor_type_requested": "Cardiologist"
    },
    {
        "patient_name": "Jessica Davis",
        "date_of_birth": "1982-01-05",
        "medical_record_number": "MRN123459",
        "symptoms": "Joint pain, swelling",
        "doctor_type_requested": "Rheumatologist"
    },
    {
        "patient_name": "David Wilson",
        "date_of_birth": "1995-07-19",
        "med

In [13]:
patient_data = response_patient

### Save Doctor and Patient Data as JSON

In [14]:
import json

In [15]:
with open('patient_data.json', 'w', encoding='utf-8') as f:
    json.dump(patient_data, f, ensure_ascii=False, indent=4)

In [22]:
with open('doctor_data.json', 'w', encoding='utf-8') as f:
    json.dump(doctor_data, f, ensure_ascii=False, indent=4)