In [None]:
# Install required packages with the correct OpenAI version
!pip install openai==0.28
!pip install PyMuPDF sentence-transformers nltk pandas

# ------------------------------------------------------
# Hybrid Script for MedBot Syllabus Processing & Study Plan Generation
# Features:
#   - Uses Sentence Transformers for semantic filtering ("DeepSeek-like" approach)
#   - Uses OpenAI's GPT-3.5-turbo (pinned version) for extraction
#   - Detects if the uploaded PDF is likely a syllabus
#   - Allows manual editing of the extracted events
#   - Extracts academic topics exactly as listed in the syllabus and schedules weekly revision tasks
#   - Schedules assignment preparation for a full week before due dates
# ------------------------------------------------------

import fitz  # for PDF processing
import re
import json
import pandas as pd
import nltk
import openai
import torch
from datetime import datetime, timedelta
from google.colab import files
from nltk.tokenize import sent_tokenize
from sentence_transformers import SentenceTransformer, util

# Download required NLTK resources
nltk.download('punkt')
nltk.download('punkt_tab')

openai.api_key = "sk-cZbL5KHmx8nDzCDpVs0eq88z6LPq_YGPrjCOMyMSOdT3BlbkFJ-sbY35BLry9WJfwigH3ABIGW07QFwqGVY2snK7XfIA"

# ✅ Upload PDF manually in Colab
uploaded = files.upload()
if not uploaded:
    raise Exception("No file uploaded!")
pdf_path = list(uploaded.keys())[0]

# --- Step 1: Extract the full text from the PDF ---
def extract_pdf_text(pdf_path):
    try:
        doc = fitz.open(pdf_path)
    except Exception as e:
        print("❌ Failed to open PDF:", e)
        return ""
    raw_text = []
    for page in doc:
        try:
            text = page.get_text("text")
            raw_text.append(text)
        except Exception as e:
            print("❌ Failed to extract text from a page:", e)
    return " ".join(raw_text)

full_text = extract_pdf_text(pdf_path)
if not full_text.strip():
    raise Exception("No text extracted from PDF.")

# --- Syllabus Detection ---
def is_likely_syllabus(text):
    keywords = ["syllabus", "course", "instructor", "schedule", "assignment", "exam", "reading week", "credits", "topic"]
    text_lower = text.lower()
    count = sum(1 for kw in keywords if kw in text_lower)
    return count >= 2

if not is_likely_syllabus(full_text):
    print("⚠️ Warning: This document does not appear to be a typical syllabus. Proceed? (y/n)")
    if input().strip().lower() != "y":
        raise Exception("User aborted. Please upload a valid syllabus PDF.")

# --- Step 2: Semantic Filtering using Sentence Transformers ---
def filter_text_semantically(text, top_k=10):
    sentences = sent_tokenize(text)
    if not sentences:
        return text
    sem_model = SentenceTransformer('all-MiniLM-L6-v2')
    query = "important dates event lecture assignment exam"
    query_embedding = sem_model.encode(query, convert_to_tensor=True)
    sentence_embeddings = sem_model.encode(sentences, convert_to_tensor=True)
    cos_scores = util.cos_sim(query_embedding, sentence_embeddings)[0]
    top_results = torch.topk(cos_scores, k=min(top_k, len(sentences)))
    top_sentences = [sentences[idx] for idx in top_results[1].cpu().numpy()]
    return " ".join(top_sentences)

filtered_text = filter_text_semantically(full_text, top_k=10)
print("Filtered text (first 500 characters):")
print(filtered_text[:500])

# --- Utility: Clean raw GPT response (remove markdown code fences) ---
def clean_response(raw_response):
    cleaned = raw_response.strip()
    if cleaned.startswith("```"):
        lines = cleaned.splitlines()
        if lines[0].startswith("```"):
            lines = lines[1:]
        if lines and lines[-1].startswith("```"):
            lines = lines[:-1]
        cleaned = "\n".join(lines).strip()
    return cleaned

# --- Step 3: Prepare a prompt for GPT-3.5-turbo to extract structured events ---
events_prompt = f"""
Extract all important dates and their corresponding event types from the following text.
Return structured JSON in the following format exactly:
{{
    "course_name": "Course Name",
    "events": [
        {{"date": "YYYY-MM-DD", "name": "Event Name", "type": "event_type"}}
    ]
}}
The event types should be one of: lecture, assignment, exam, lab, general.
Text:
{filtered_text}
"""
print("\nEvents Prompt (first 500 characters):")
print(events_prompt[:500])

def extract_events_with_gpt(prompt):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are an assistant that extracts structured data from academic syllabi."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.3
        )
        raw_response = response["choices"][0]["message"]["content"].strip()
        print("\nRaw GPT response for events:\n", raw_response)
        raw_response = clean_response(raw_response)
        events_data = json.loads(raw_response)
        if "events" not in events_data or not isinstance(events_data["events"], list):
            print("❌ No valid 'events' detected in extracted data!")
            return {}
        return events_data
    except Exception as e:
        print("❌ Error during GPT extraction of events:", e)
        return {}

extracted_events = extract_events_with_gpt(events_prompt)

# --- Step 4: Allow manual editing of the extracted events JSON ---
print("\nExtracted Events JSON:")
print(json.dumps(extracted_events, indent=4))
if input("\nWould you like to manually edit the extracted events? (y/n): ").strip().lower() == "y":
    print("Please paste your revised JSON below (end with an empty line):")
    lines = []
    while True:
        line = input()
        if line.strip() == "":
            break
        lines.append(line)
    try:
        extracted_events = json.loads("\n".join(lines))
    except Exception as e:
        print("❌ Error parsing manual JSON. Using original extracted data.")

# --- Step 4.5: Extract Topics from the Syllabus using GPT ---
def extract_topics(text):
    topics_prompt = f"""
Extract the list of academic topics covered in the following syllabus text.
These topics are usually listed under a header like "Topic Overview" or similar.
Return structured JSON in the following format:
{{
    "topics": [
        "Topic 1",
        "Topic 2",
        "Topic 3"
    ]
}}
Preserve the exact wording as found in the syllabus.
Text:
{text}
    """
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are an assistant that extracts academic topics from course syllabi."},
                {"role": "user", "content": topics_prompt}
            ],
            temperature=0.3
        )
        raw_topic_response = response["choices"][0]["message"]["content"].strip()
        print("\nRaw GPT response for topics:\n", raw_topic_response)
        raw_topic_response = clean_response(raw_topic_response)
        topics_data = json.loads(raw_topic_response)
        if "topics" not in topics_data or not isinstance(topics_data["topics"], list):
            print("❌ No valid topics detected!")
            return []
        return topics_data["topics"]
    except Exception as e:
        print("❌ Error during topics extraction:", e)
        return []

topics_list = extract_topics(full_text)
print("\nExtracted Topics:")
print(topics_list)

# --- Step 5: Determine the Course Start Date ---
def get_course_start_date(extracted_data):
    for event in extracted_data.get("events", []):
        if "first day of classes" in event.get("name", "").lower():
            date_str = event.get("date", "").strip()
            try:
                return datetime.strptime(date_str, "%Y-%m-%d")
            except Exception:
                continue
    return None

course_start = get_course_start_date(extracted_events)
if not course_start:
    print("❌ Could not determine course start date from events.")
    user_date = input("Please enter the course start date (YYYY-MM-DD): ").strip()
    try:
        course_start = datetime.strptime(user_date, "%Y-%m-%d")
    except Exception:
        print("❌ Invalid date entered. Using today's date as fallback.")
        course_start = datetime.today()

# --- Step 6: Generate a study plan based on the extracted events ---
def generate_study_plan(extracted_data):
    if not extracted_data:
        print("❌ No structured data extracted. Aborting study plan generation!")
        return []
    plan = []
    for event in extracted_data.get("events", []):
        event_date_str = event.get("date", "").strip()
        try:
            event_date = None if event_date_str.upper() == "TBA" else datetime.strptime(event_date_str, "%Y-%m-%d")
        except Exception as e:
            print(f"❌ Invalid date format for event '{event.get('name', 'Unnamed Event')}', skipping event. Error: {e}")
            continue

        task_name = f"{event.get('name', 'Unnamed Event')} for {extracted_data.get('course_name', 'Unknown Course')}"

        if event["type"] == "lecture":
            if event_date:
                plan.append({
                    "date": (event_date - timedelta(days=2)).strftime("%Y-%m-%d"),
                    "task": f"Prepare for {task_name}",
                    "category": "study"
                })
                plan.append({
                    "date": (event_date + timedelta(days=2)).strftime("%Y-%m-%d"),
                    "task": f"Review {task_name}",
                    "category": "review"
                })
            else:
                plan.append({
                    "date": event_date_str,
                    "task": f"{event['name']} (TBA) for {extracted_data.get('course_name', 'Unknown Course')}",
                    "category": "major event"
                })
        else:
            if event_date:
                if event["type"] == "assignment":
                    # Schedule a full week of preparation tasks.
                    for i in range(7):
                        plan.append({
                            "date": (event_date - timedelta(days=i)).strftime("%Y-%m-%d"),
                            "task": f"Work on {task_name}",
                            "category": "assignment"
                        })
                elif event["type"] == "lab":
                    plan.append({
                        "date": (event_date - timedelta(days=1)).strftime("%Y-%m-%d"),
                        "task": f"Prepare for {task_name}",
                        "category": "lab prep"
                    })
                elif event["type"] == "exam":
                    for i in range(7):
                        plan.append({
                            "date": (event_date - timedelta(days=i)).strftime("%Y-%m-%d"),
                            "task": f"Revise for {task_name}",
                            "category": "exam prep"
                        })
                else:
                    plan.append({
                        "date": event_date.strftime("%Y-%m-%d"),
                        "task": f"Review details for {task_name}",
                        "category": "general"
                    })
                plan.append({
                    "date": event_date.strftime("%Y-%m-%d"),
                    "task": f"{event['name']}",
                    "category": "major event"
                })
            else:
                plan.append({
                    "date": event_date_str,
                    "task": f"{event['name']}",
                    "category": "major event"
                })
    return plan

study_plan_events = generate_study_plan(extracted_events)

# --- Step 7: Schedule Weekly Topic Revision Tasks ---
def schedule_topic_revisions(plan, topics, start_date):
    topic_tasks = []
    for i, topic in enumerate(topics):
        # Schedule one revision per week starting from the course start date
        revision_date = start_date + timedelta(weeks=i)
        topic_tasks.append({
            "date": revision_date.strftime("%Y-%m-%d"),
            "task": f"Revise Topic {i+1}: {topic}",
            "category": "topic revision"
        })
    return plan + topic_tasks

final_plan = study_plan_events
if topics_list:
    final_plan = schedule_topic_revisions(final_plan, topics_list, course_start)
else:
    print("No topics extracted; skipping topic revision scheduling.")

# --- Step 8: Final Sorting ---
def sort_key(x):
    if x["date"].upper() == "TBA":
        return "9999-12-31"
    return x["date"]

final_plan = sorted(final_plan, key=lambda x: sort_key(x))

if final_plan:
    df = pd.DataFrame(final_plan)
    print("\n✅ Generated Study Plan:")
    from google.colab.data_table import DataTable
    display(DataTable(df))
else:
    print("❌ No study plan generated!")







Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.59.9
    Uninstalling openai-1.59.9:
      Successfully uninstalled openai-1.59.9
Successfully installed openai-0.28.0
Collecting PyMuPDF
  Downloading pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Saving COSC3P32OutlineWinter2025 (1).pdf to COSC3P32OutlineWinter2025 (1).pdf


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Filtered text (first 500 characters):
Important dates for Winter 2025 (D3) 
The most recent listing of Important Dates for all durations is at https://brocku.ca/important-
dates/all/ 
First day of classes: 6th January  
Last day of lectures: 4th April (note: final lecture for this course is 31st March)  
Reading Week: 17th – 21st February 
Last Day of Exams: 24th April 
Deadline for withdrawal without academic penalty: 6th March. Course Calendar Description 
Fundamental database concepts: specification, design and applications; vari

Events Prompt (first 500 characters):

Extract all important dates and their corresponding event types from the following text.
Return structured JSON in the following format exactly:
{
    "course_name": "Course Name",
    "events": [
        {"date": "YYYY-MM-DD", "name": "Event Name", "type": "event_type"}
    ]
}
The event types should be one of: lecture, assignment, exam, lab, general.
Text:
Important dates for Winter 2025 (D3) 
The most recent listin

Unnamed: 0,date,task,category
0,2025-01-04,Prepare for First day of classes for Introduct...,study
1,2025-01-06,Revise Topic 1: Introduction to Database Systems,topic revision
2,2025-01-08,Review First day of classes for Introduction t...,review
3,2025-01-13,Revise Topic 2: The Entity-Relationship (ER) M...,topic revision
4,2025-01-20,Revise Topic 3: Introduction to the Relational...,topic revision
...,...,...,...
73,2025-04-21,Revise for Last Day of Exams for Introduction ...,exam prep
74,2025-04-22,Revise for Last Day of Exams for Introduction ...,exam prep
75,2025-04-23,Revise for Last Day of Exams for Introduction ...,exam prep
76,2025-04-24,Revise for Last Day of Exams for Introduction ...,exam prep


In [None]:
print(final_plan)  # Check if the study plan is generated properly


[{'date': '2025-01-04', 'task': 'Prepare for First day of classes for Introduction to Database Systems', 'category': 'study'}, {'date': '2025-01-06', 'task': 'Revise Topic 1: Introduction to Database Systems', 'category': 'topic revision'}, {'date': '2025-01-08', 'task': 'Review First day of classes for Introduction to Database Systems', 'category': 'review'}, {'date': '2025-01-13', 'task': 'Revise Topic 2: The Entity-Relationship (ER) Model', 'category': 'topic revision'}, {'date': '2025-01-20', 'task': 'Revise Topic 3: Introduction to the Relational Model', 'category': 'topic revision'}, {'date': '2025-01-27', 'task': 'Revise Topic 4: Query Languages: Relational Algebra and SQL', 'category': 'topic revision'}, {'date': '2025-01-29', 'task': 'Work on Assignment 1 Due Date for Introduction to Database Systems', 'category': 'assignment'}, {'date': '2025-01-30', 'task': 'Work on Assignment 1 Due Date for Introduction to Database Systems', 'category': 'assignment'}, {'date': '2025-01-31',

In [None]:
import json
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build

# Path to the uploaded JSON key file
SERVICE_ACCOUNT_FILE = "/content/medbot-449621-a4fad6989c13.json"  # Change this to your uploaded file name

# Define the required scopes
SCOPES = ["https://www.googleapis.com/auth/calendar"]

# Authenticate using the service account
creds = Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)

# Build the Google Calendar API service
service = build("calendar", "v3", credentials=creds)

print("✅ Google Calendar API Authentication Successful with Service Account!")


✅ Google Calendar API Authentication Successful with Service Account!


In [None]:


# Function to insert events from your final study plan into Google Calendar
def insert_events_to_calendar(events):
    for event in events:
        # Skip events with non-standard dates (like "TBA")
        if event["date"].upper() == "TBA":
            print(f"Skipping event '{event['task']}' with date TBA.")
            continue

        start_date = event["date"]
        try:
            dt = datetime.strptime(start_date, "%Y-%m-%d")
        except Exception as e:
            print(f"Skipping event '{event['task']}' due to invalid date format: {start_date}")
            continue
        end_date = (dt + timedelta(days=1)).strftime("%Y-%m-%d")

        event_body = {
            "summary": f"{event['task']} ({event['category']})",
            "start": {"date": start_date, "timeZone": "UTC"},
            "end": {"date": end_date, "timeZone": "UTC"}
        }

        try:
            created_event = service.events().insert(calendarId="ebad.khan5487@gmail.com", body=event_body).execute()
            print(f"Created event: {created_event.get('summary')} on {start_date}")
            print("Event link:", created_event.get("htmlLink"))
        except Exception as e:
            print(f"Error inserting event '{event['task']}' on {start_date}: {e}")



In [None]:
insert_events_to_calendar(final_plan)


Created event: Prepare for First day of classes for Introduction to Database Systems (study) on 2025-01-04
Event link: https://www.google.com/calendar/event?eid=bHB1NjNlZWxka2ExYW45YmE1cjR1aXVvbjAgZWJhZC5raGFuNTQ4N0Bt
Created event: Revise Topic 1: Introduction to Database Systems (topic revision) on 2025-01-06
Event link: https://www.google.com/calendar/event?eid=ZGpzMTVvYThzamxxaGZuamwzMHR2dmZmbTQgZWJhZC5raGFuNTQ4N0Bt
Created event: Review First day of classes for Introduction to Database Systems (review) on 2025-01-08
Event link: https://www.google.com/calendar/event?eid=NmYyNWNudWxnYzg5anQ2NjluYmQ0ZWI0YTQgZWJhZC5raGFuNTQ4N0Bt
Created event: Revise Topic 2: The Entity-Relationship (ER) Model (topic revision) on 2025-01-13
Event link: https://www.google.com/calendar/event?eid=c2l1Ymk2MmwzMzV0YTRobjZtcWJndjNjanMgZWJhZC5raGFuNTQ4N0Bt
Created event: Revise Topic 3: Introduction to the Relational Model (topic revision) on 2025-01-20
Event link: https://www.google.com/calendar/event?eid=O

In [None]:
from datetime import datetime, timedelta

def list_events():
    now = datetime.utcnow().isoformat() + "Z"  # 'Z' indicates UTC time
    print("📅 Fetching upcoming events...\n")

    events_result = service.events().list(
        calendarId="primary",
        timeMin=now,
        maxResults=50,  # Adjust as needed
        singleEvents=True,
        orderBy="startTime"
    ).execute()

    events = events_result.get("items", [])

    if not events:
        print("❌ No upcoming events found.")
        return

    for event in events:
        start = event["start"].get("date", event["start"].get("dateTime"))
        print(f"✅ {event['summary']} on {start}")

list_events()


📅 Fetching upcoming events...

✅ Work on Assignment 1 Due Date for Introduction to Database Systems (assignment) on 2025-02-01
✅ Work on Assignment 1 Due Date for Introduction to Database Systems (assignment) on 2025-02-02
✅ Revise Topic 5: Schema Refinement and Normal Forms (topic revision) on 2025-02-03
✅ Work on Assignment 1 Due Date for Introduction to Database Systems (assignment) on 2025-02-03
✅ Revise for Midterm for Introduction to Database Systems (exam prep) on 2025-02-04
✅ Work on Assignment 1 Due Date for Introduction to Database Systems (assignment) on 2025-02-04
✅ Assignment 1 Due Date (major event) on 2025-02-04
✅ Revise for Midterm for Introduction to Database Systems (exam prep) on 2025-02-05
✅ Revise for Midterm for Introduction to Database Systems (exam prep) on 2025-02-06
✅ Revise for Midterm for Introduction to Database Systems (exam prep) on 2025-02-07
✅ Revise for Midterm for Introduction to Database Systems (exam prep) on 2025-02-08
✅ Revise for Midterm for Intr

# New Section Attempting for scalability


In [None]:
!rm -rf ~/.config/gcloud


In [None]:
from google.colab import auth
import google.auth
from googleapiclient.discovery import build

# Authenticate user with Google Account
auth.authenticate_user()

# Define the required authentication scopes
SCOPES = [
    "https://www.googleapis.com/auth/calendar",
    "https://www.googleapis.com/auth/calendar.events",
    "https://www.googleapis.com/auth/calendar.readonly"
]

# Force Google API to use the correct credentials with scopes
creds, _ = google.auth.default(scopes=SCOPES)

# Build the Google Calendar API service
service = build("calendar", "v3", credentials=creds)

print("✅ Google Calendar API Authentication Successful!")


✅ Google Calendar API Authentication Successful!


In [None]:
def list_calendars():
    """Fetch and display all available calendars for the authenticated user."""
    print("📅 Fetching available calendars...\n")

    try:
        calendar_list = service.calendarList().list().execute()
        for cal in calendar_list["items"]:
            print(f"✅ {cal['summary']} - ID: {cal['id']}")
    except Exception as e:
        print(f"❌ Error fetching calendars: {e}")

list_calendars()




📅 Fetching available calendars...

❌ Error fetching calendars: <HttpError 403 when requesting https://www.googleapis.com/calendar/v3/users/me/calendarList?alt=json returned "Request had insufficient authentication scopes.". Details: "[{'message': 'Insufficient Permission', 'domain': 'global', 'reason': 'insufficientPermissions'}]">
