In [None]:
!pip install transformers datasets torch flask apscheduler

In [None]:
!pip install tf-keras

In [None]:
import os
import xml.etree.ElementTree as ET
import pandas as pd
from datasets import Dataset

# Path to the MedQuAD dataset
medquad_path = "MedQuAD" 
def parse_medquad(medquad_path):
    questions = []
    answers = []
    
    # Iterate through QA folders
    for folder in os.listdir(medquad_path):
        folder_path = os.path.join(medquad_path, folder)
        if os.path.isdir(folder_path):  # Ignore files like license.txt and readme.txt
            print(f"Processing folder: {folder}")  # Debugging
            # Iterate through XML files in the folder
            for file in os.listdir(folder_path):
                if file.endswith(".xml"):
                    file_path = os.path.join(folder_path, file)
                    try:
                        tree = ET.parse(file_path)
                        root = tree.getroot()
                        
                        # Parse questions and answers
                        for qa in root.findall(".//QAPairs/QAPair"):
                            question_elem = qa.find("Question")
                            answer_elem = qa.find("Answer")
                            
                            # Ensure non-None values for question and answer
                            question = question_elem.text if question_elem is not None else ""
                            answer = answer_elem.text if answer_elem is not None else ""
                            
                            # Check if both question and answer are non-empty
                            if question and question.strip() and answer and answer.strip():
                                questions.append(question.strip())
                                answers.append(answer.strip())
                    except ET.ParseError:
                        print(f"Skipping malformed XML file: {file_path}")  # Handle malformed XML

    # Create a DataFrame
    return pd.DataFrame({"question": questions, "answer": answers})

# Parse the MedQuAD dataset
medquad_df = parse_medquad(medquad_path)

# Preview the dataset
print(medquad_df.head())
print(f"Total QA pairs: {len(medquad_df)}")

In [None]:
# Convert DataFrame to Hugging Face Dataset
def convert_to_hf_dataset(df):
    prompts = [f"Question: {row['question']}\nAnswer:" for _, row in df.iterrows()]
    completions = [f" {row['answer']}" for _, row in df.iterrows()]
    return Dataset.from_dict({"prompt": prompts, "completion": completions})

hf_dataset = convert_to_hf_dataset(medquad_df)

# Split into training and validation sets
train_test_split = hf_dataset.train_test_split(test_size=0.2)
train_dataset = train_test_split["train"]
val_dataset = train_test_split["test"]

# Preview a sample from the dataset
print("Dataset prepared:")
print(train_dataset[0])


In [None]:
!pip install transformers torch

In [None]:
!pip install --upgrade accelerate

In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments

# Load GPT-2 and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token  # Set padding token to EOS

model = GPT2LMHeadModel.from_pretrained("gpt2")

# Tokenize the data
def preprocess_data(batch):
    inputs = tokenizer(batch["prompt"], truncation=True, max_length=256, padding="max_length")  
    labels = tokenizer(batch["completion"], truncation=True, max_length=256, padding="max_length")["input_ids"]
    inputs["labels"] = labels
    return inputs

tokenized_train = train_dataset.map(preprocess_data, batched=True)
tokenized_val = val_dataset.map(preprocess_data, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    save_total_limit=2,
    push_to_hub=False,
    no_cuda=True,  
)


# Define the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
)

# Fine-tune the model
trainer.train()

# Save the model
model.save_pretrained("./fine_tuned_gpt2")
tokenizer.save_pretrained("./fine_tuned_gpt2")


In [None]:
!pip install twilio

In [None]:
from flask import Flask, request, jsonify
from flask_cors import CORS
from apscheduler.schedulers.background import BackgroundScheduler
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import sqlite3
import datetime
from pyngrok import ngrok
import logging
from twilio.rest import Client

# Initialize Flask app
app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}})

# Initialize the scheduler
scheduler = BackgroundScheduler()
scheduler.start()
logging.basicConfig(level=logging.DEBUG)

# Load the fine-tuned GPT-2 model
model = GPT2LMHeadModel.from_pretrained("./fine_tuned_gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("./fine_tuned_gpt2")

# SQLite database setup
conn = sqlite3.connect("healthcare.db", check_same_thread=False)
cursor = conn.cursor()
cursor.execute(
    "CREATE TABLE IF NOT EXISTS appointments (id INTEGER PRIMARY KEY, name TEXT, date TEXT, time TEXT)"
)
cursor.execute(
    "CREATE TABLE IF NOT EXISTS reminders (id INTEGER PRIMARY KEY, name TEXT, medication TEXT, reminder_time TEXT)"
)
#cursor.execute("ALTER TABLE reminders ADD COLUMN phone_number TEXT")
cursor.execute("PRAGMA table_info(reminders)")
print(cursor.fetchall())

conn.commit()

@app.route("/")
def home():
    return "Welcome to the Healthcare Chatbot App!"

@app.route("/general_info", methods=["POST"])
def general_info():
    query = request.json.get("query", "")
    if not query:
        return jsonify({"error": "Query parameter is required"}), 400

    # Prepare input for GPT-2
    input_text = f"Please provide a detailed response to the following healthcare question:\n\nQuestion: {query}\nAnswer:"
    encoded_input = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)

    # Generate response
    # Generate response
    outputs = model.generate(
        input_ids=encoded_input["input_ids"],
        attention_mask=encoded_input["attention_mask"],  # Pass attention mask
        max_length=150,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        top_k=50,
        top_p=0.95,
        temperature=0.7,
        do_sample=True,  # Enable sampling
        pad_token_id=tokenizer.eos_token_id,  # Explicitly set pad token
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Return response
    return jsonify({"response": response})


# Appointment booking
@app.route("/book_appointment", methods=["POST"])
def book_appointment():
    data = request.json
    name = data["name"]
    date = data["date"]
    time = data["time"]
    cursor.execute("INSERT INTO appointments (name, date, time) VALUES (?, ?, ?)", (name, date, time))
    conn.commit()
    return jsonify({"message": f"Appointment booked for {name} on {date} at {time}."})

# Appointments Viewing
@app.route("/search_appointments", methods=["GET"])
def search_appointments():
    # Retrieve query parameters
    name = request.args.get("name")
    date = request.args.get("date")
    time = request.args.get("time")

    # Base query
    query = "SELECT * FROM appointments WHERE 1=1"
    params = []

    # Add conditions dynamically based on provided parameters
    if name:
        query += " AND name = ?"
        params.append(name)
    if date:
        query += " AND date = ?"
        params.append(date)
    if time:
        query += " AND time = ?"
        params.append(time)

    # Execute the query
    cursor.execute(query, tuple(params))
    appointments = cursor.fetchall()

    # Format results as JSON
    result = [
        {"id": row[0], "name": row[1], "date": row[2], "time": row[3]}
        for row in appointments
    ]
    return jsonify(result)


@app.route("/set_reminder", methods=["POST"])
def set_reminder():
    data = request.json
    name = data["name"]
    medication = data["medication"]
    reminder_time = data["reminder_time"]
    phone_number = data["phone_number"]  

    # Validate reminder_time
    try:
        reminder_time_obj = datetime.datetime.strptime(reminder_time, "%Y-%m-%d %H:%M:%S")
        if reminder_time_obj < datetime.datetime.now():
            return jsonify({"error": "Reminder time cannot be in the past"}), 400
    except ValueError:
        return jsonify({"error": "Invalid reminder time format. Use YYYY-MM-DD HH:MM:SS"}), 400

    # Save to database
    cursor.execute(
        "INSERT INTO reminders (name, medication, reminder_time, phone_number) VALUES (?, ?, ?, ?)",
        (name, medication, reminder_time, phone_number),
    )
    conn.commit()

    # Schedule the reminder
    try:
        scheduler.add_job(
            func=send_sms_reminder,
            trigger="date",
            run_date=reminder_time_obj,
            args=[name, medication, phone_number],  # Pass the phone number
        )
    except Exception as e:
        return jsonify({"error": f"Failed to schedule the reminder: {str(e)}"}), 500

    return jsonify({"message": f"Reminder set for {name} to take {medication} at {reminder_time}."})

#Remainders Viewing
@app.route("/view_reminders", methods=["GET"])
def view_reminders():
    cursor.execute("SELECT * FROM reminders")
    reminders = cursor.fetchall()

    # Convert the result into a list of dictionaries for easy JSON serialization
    result = [
        {"id": row[0], "name": row[1], "medication": row[2], "reminder_time": row[3]}
        for row in reminders
    ]
    return jsonify(result)


# Function to send SMS reminder
def send_sms_reminder(name, medication, phone_number):
    
    # Twilio credentials
    TWILIO_ACCOUNT_SID = os.getenv("TWILIO_ACCOUNT_SID")
    TWILIO_AUTH_TOKEN = os.getenv("TWILIO_AUTH_TOKEN")  
    
    # Initialize the Twilio client globally
    client = Client(account_sid, auth_token)
    
    # Message content
    message_body = f"Reminder: {name}, it's time to take your medication: {medication}."

    # Send the SMS
    try:
        message = client.messages.create(
            body=message_body,
            from_="+18449174827",  
            to="+17167300171"
        )
        print(f"Medication remainder sent to {phone_number}, SID: {message.sid}")
    except Exception as e:
        print(f"Failed to send SMS: {e}")

    
    
if __name__ == "__main__":
    public_url = ngrok.connect(5001)  
    print(f" * Ngrok Tunnel: {public_url}") 
    app.run(host="0.0.0.0", port=5001)
