In [1]:
pip install pymupdf


Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install transformers


Note: you may need to restart the kernel to use updated packages.


In [3]:
# Import required libraries
import fitz  # PyMuPDF
import json

# Function to extract text from PDF and save as JSON
def pdf_to_json(pdf_path, json_path):
    # Open the PDF file
    doc = fitz.open(pdf_path)
    
    # Initialize a dictionary to store text by pages
    pdf_data = {"pages": []}
    
    # Loop through each page
    for page_num in range(doc.page_count):
        # Select page
        page = doc[page_num]
        # Extract text
        page_text = page.get_text()
        # Append page content to the data structure
        pdf_data["pages"].append({
            "page_number": page_num + 1,
            "content": page_text
        })
    
    # Save data to JSON file
    with open(json_path, 'w', encoding='utf-8') as json_file:
        json.dump(pdf_data, json_file, ensure_ascii=False, indent=4)

    print(f"PDF content has been saved to {json_path} as JSON.")

# Example usage with your specified file path
pdf_to_json("/Users/vaishakhishah/Desktop/BABOK_Guide_v3_Member.pdf", "BABOK_Guide_v3_Member.json")

PDF content has been saved to BABOK_Guide_v3_Member.json as JSON.


In [None]:
from dotenv import load_dotenv
import os
import openai
import fitz  # PyMuPDF for PDF text extraction
import json
from flask import Flask, request, jsonify, render_template_string

# Load environment variables
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

if not openai.api_key:
    raise SystemExit("Error: OPENAI_API_KEY is not set. Please check your .env file.")

# Initialize Flask app
app = Flask(__name__)

# Function to load BABOK principles from a JSON file
def load_babok_principles(file_path='BABOK_Guide_v3_Member.json'):
    with open(file_path, 'r') as f:
        principles = json.load(f)
    return principles

# Function to extract text from the case study PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page_num in range(doc.page_count):
        page = doc[page_num]
        text += page.get_text()
    return text

# Load case study and BABOK principles once when the server starts
case_study_text = extract_text_from_pdf("/Users/vaishakhishah/Desktop/case_study.pdf")
babok_principles = load_babok_principles("BABOK_Guide_v3_Member.json")

# Function to format the prompt
def format_babok_prompt(question, case_study, babok_principles):
    prompt = (
        "You are a BABOK-certified business analyst. Use the following case study and BABOK principles "
        "to answer questions based on business analysis best practices.\n\n"
        "Case Study:\n" + case_study + "\n\n"
        "BABOK Principles:\n"
    )
    for principle in babok_principles:
        prompt += f"- {principle}\n"
    prompt += "\nAnswer the following question as a BABOK-certified business analyst:\n\n"
    prompt += f"Question: {question}\n\nAnswer:"
    return prompt

# Function to generate an answer
def generate_answer(question):
    prompt = format_babok_prompt(question, case_study_text, babok_principles)
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=300
    )
    answer = response.choices[0].message['content'].strip()
    return answer

# Enhanced HTML template for the chatbot page
HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>BABOK Chatbot</title>
    <style>
        body {
            font-family: 'Arial', sans-serif;
            background-color: #f4f4f9;
            margin: 0;
            padding: 0;
            display: flex;
            justify-content: center;
            align-items: center;
            height: 100vh;
        }
        #chatbox {
            width: 100%;
            max-width: 500px;
            background-color: #ffffff;
            border-radius: 10px;
            box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
            overflow: hidden;
        }
        #header {
            background-color: #007bff;
            color: white;
            padding: 10px;
            text-align: center;
            font-size: 1.5em;
        }
        #messages {
            padding: 10px;
            height: 400px;
            overflow-y: auto;
            background-color: #f9f9f9;
            border-bottom: 1px solid #ddd;
        }
        .message {
            margin: 10px 0;
            display: flex;
            align-items: flex-start;
        }
        .message.bot {
            color: #007bff;
            justify-content: flex-start;
        }
        .message.user {
            color: #28a745;
            justify-content: flex-end;
        }
        .message span {
            padding: 10px;
            border-radius: 10px;
            max-width: 80%;
        }
        .bot span {
            background-color: #e3f2fd;
            text-align: left;
        }
        .user span {
            background-color: #d4edda;
            text-align: right;
        }
        #input-container {
            display: flex;
            padding: 10px;
            background-color: #ffffff;
        }
        #question {
            flex-grow: 1;
            padding: 10px;
            font-size: 1em;
            border: 1px solid #ddd;
            border-radius: 5px;
            margin-right: 10px;
        }
        #send {
            padding: 10px 20px;
            font-size: 1em;
            background-color: #007bff;
            color: white;
            border: none;
            border-radius: 5px;
            cursor: pointer;
            transition: background-color 0.3s;
        }
        #send:hover {
            background-color: #0056b3;
        }
        #footer {
            text-align: center;
            padding: 5px;
            font-size: 0.9em;
            color: #666;
        }
    </style>
</head>
<body>

<div id="chatbox">
    <div id="header">
        BABOK Chatbot
    </div>
    <div id="messages">
        <div class="message bot"><span>Welcome to the BABOK Chatbot! How may I assist you today?</span></div>
    </div>
    <div id="input-container">
        <input type="text" id="question" placeholder="Type your question here...">
        <button id="send">Send</button>
    </div>
    <div id="footer">
        Powered by AI | BABOK Framework
    </div>
</div>

<script>
    document.getElementById('send').onclick = async function() {
        const question = document.getElementById('question').value.trim();
        if (!question) return;

        // Display user message
        const messagesDiv = document.getElementById('messages');
        messagesDiv.innerHTML += `<div class="message user"><span>${question}</span></div>`;
        messagesDiv.scrollTop = messagesDiv.scrollHeight;
        document.getElementById('question').value = '';

        // Send question to the backend
        try {
            const response = await fetch('/ask', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json',
                },
                body: JSON.stringify({ question })
            });

            const data = await response.json();
            const answer = data.answer || "Sorry, I couldn't understand your question.";

            // Display bot answer
            messagesDiv.innerHTML += `<div class="message bot"><span>${answer}</span></div>`;
            messagesDiv.scrollTop = messagesDiv.scrollHeight;
        } catch (error) {
            messagesDiv.innerHTML += `<div class="message bot"><span>Oops! Something went wrong. Please try again.</span></div>`;
        }
    };
</script>

</body>
</html>
"""

# Route to serve the chatbot page
@app.route('/')
def index():
    return render_template_string(HTML_TEMPLATE)

# API endpoint for the chatbot
@app.route('/ask', methods=['POST'])
def ask_bot():
    data = request.get_json()
    question = data.get("question")
    if not question:
        return jsonify({"error": "No question provided"}), 400
    answer = generate_answer(question)
    return jsonify({"answer": answer})

# Run the app on all interfaces to make it accessible publicly
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5001)
    
# please run the following command from terminal to generate a link that is accessible from any device
# ngrok http 5001


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5001
 * Running on http://192.168.1.107:5001
Press CTRL+C to quit
192.168.1.107 - - [19/Nov/2024 12:18:22] "GET / HTTP/1.1" 200 -
192.168.1.107 - - [19/Nov/2024 12:28:28] "POST /ask HTTP/1.1" 200 -
192.168.1.107 - - [19/Nov/2024 12:29:55] "POST /ask HTTP/1.1" 200 -
192.168.1.107 - - [19/Nov/2024 12:34:15] "GET / HTTP/1.1" 200 -
192.168.1.107 - - [19/Nov/2024 14:27:53] "GET / HTTP/1.1" 200 -
192.168.1.107 - - [19/Nov/2024 14:27:59] "GET / HTTP/1.1" 200 -
192.168.1.107 - - [21/Nov/2024 12:03:38] "GET / HTTP/1.1" 200 -
192.168.1.107 - - [21/Nov/2024 12:53:26] "POST /ask HTTP/1.1" 200 -
192.168.1.107 - - [21/Nov/2024 12:58:19] "POST /ask HTTP/1.1" 200 -
192.168.1.107 - - [21/Nov/2024 13:02:56] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [21/Nov/2024 13:07:04] "GET / HTTP/1.1" 200 -
192.168.1.107 - - [21/Nov/2024 13:34:44] "GET / HTTP/1.1" 200 -
192.168.1.107 - - [21/Nov/2024 16:01:32] "POST /ask HTTP/1.1" 200 -
192.168.1.107 - -