In [1]:
# 🧠 SMART PERSONAL TASK & REMINDER ASSISTANT USING GEN AI (GEMINI 2.0)

# ✅ SECTION 1: INTRODUCTION
"""
Welcome to the Capstone Project for the Kaggle 5-Day Gen AI Intensive.

🌟 Project Title: Smart Personal Task & Reminder Assistant

📌 Problem: Most people manage tasks across chats, documents, and mental notes.
This app helps users extract and organize tasks/reminders from raw text and uploaded files
using Gemini 2.0 with Generative AI techniques.

💡 Solution: A GenAI-powered assistant that:
- Extracts tasks from user text or documents (Few-shot prompting)
- Retrieves relevant past tasks (RAG)
- Understands user-uploaded content (Document Understanding)

We aim to demonstrate the use of:
1. Retrieval-Augmented Generation (RAG)
2. Few-shot Prompting
3. Document Understanding

Bonus goals: Context caching, MLOps-friendly logging, and structured JSON outputs.

We'll build this step by step below 👇
"""

"\nWelcome to the Capstone Project for the Kaggle 5-Day Gen AI Intensive.\n\n🌟 Project Title: Smart Personal Task & Reminder Assistant\n\n📌 Problem: Most people manage tasks across chats, documents, and mental notes.\nThis app helps users extract and organize tasks/reminders from raw text and uploaded files\nusing Gemini 2.0 with Generative AI techniques.\n\n💡 Solution: A GenAI-powered assistant that:\n- Extracts tasks from user text or documents (Few-shot prompting)\n- Retrieves relevant past tasks (RAG)\n- Understands user-uploaded content (Document Understanding)\n\nWe aim to demonstrate the use of:\n1. Retrieval-Augmented Generation (RAG)\n2. Few-shot Prompting\n3. Document Understanding\n\nBonus goals: Context caching, MLOps-friendly logging, and structured JSON outputs.\n\nWe'll build this step by step below 👇\n"

In [2]:
!pip install faiss-cpu -q pandas numpy

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m50.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib

Collecting google-api-python-client
  Downloading google_api_python_client-2.166.0-py2.py3-none-any.whl.metadata (6.6 kB)
Downloading google_api_python_client-2.166.0-py2.py3-none-any.whl (13.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.2/13.2 MB[0m [31m92.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: google-api-python-client
  Attempting uninstall: google-api-python-client
    Found existing installation: google-api-python-client 2.160.0
    Uninstalling google-api-python-client-2.160.0:
      Successfully uninstalled google-api-python-client-2.160.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pydrive2 1.21.3 requires cryptography<44, but you have cryptography 44.0.2 which is incompatible.
pydrive2 1.21.3 requires pyOpenSSL<=24.2.1,>=19.1.0, but you have pyopenssl 25.0.0 which is incompatib

In [4]:
import os
import pandas as pd
import numpy as np
from google import generativeai as genai
from google.genai import types
from kaggle_secrets import UserSecretsClient
import faiss
import json

  warn(


In [5]:
# Load API Key from Kaggle Secrets
GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [6]:
genai.configure(api_key=GOOGLE_API_KEY)


In [7]:
# Create a FAISS index for semantic task search (RAG memory)
embedding_dim = 768  # Gemini embedding size
task_texts = []
task_metadata = []
index = faiss.IndexFlatIP(embedding_dim)

In [8]:
# Sample few-shot examples to teach Gemini how to extract tasks
FEW_SHOT_EXAMPLES = [
    {
        "input": "Remind me to submit the assignment before Sunday night",
        "output": {
            "task": "Submit the assignment",
            "time": "Sunday night",
            "priority": "high"
        }
    },
    {
        "input": "Check groceries and restock if fridge is empty",
        "output": {
            "task": "Restock groceries",
            "condition": "if fridge is empty",
            "priority": "medium"
        }
    },
    {
        "input": "If weather is good, go for a morning walk at 6 AM",
        "output": {
            "task": "Go for morning walk",
            "time": "6 AM",
            "condition": "If weather is good"
        }
    }
]

# Construct the few-shot prompt
prompt_header = """You are a smart assistant that extracts reminders from messages.
Return a JSON object with: task (required), time (optional), condition (optional), priority (optional)."""

few_shot_prompt = prompt_header
for ex in FEW_SHOT_EXAMPLES:
    few_shot_prompt += f"\nInput: {ex['input']}\nOutput: {ex['output']}"


In [9]:
def extract_task_from_input(user_input):
    prompt = f"{few_shot_prompt}\n\nInput: {user_input}\nOutput:"
    model = genai.GenerativeModel("gemini-2.0-flash")
    response = model.generate_content(
        contents=[prompt],
        generation_config={"candidate_count": 1}  # ✅ FIXED: use dict instead of GenerationConfig object
    )
    return response.text.strip()


In [10]:
# Try extracting a task from a sample user message
sample_input = "Schedule a Zoom call with the recruiter next Tuesday at 3 PM"
output = extract_task_from_input(sample_input)
print(f"\n📌 Extracted Task: {output}\n")



📌 Extracted Task: ```json
{"task": "Schedule a Zoom call with the recruiter", "time": "next Tuesday at 3 PM"}
```



In [11]:
"""
Next Step: Extract Tasks from Uploaded Notes/Documents
This will allow users to:
- Upload meeting notes, planning docs, or long messages.
- Automatically extract multiple tasks/reminders from the document.
"""

'\nNext Step: Extract Tasks from Uploaded Notes/Documents\nThis will allow users to:\n- Upload meeting notes, planning docs, or long messages.\n- Automatically extract multiple tasks/reminders from the document.\n'

In [12]:
from IPython.display import Markdown

def extract_tasks_from_document(doc_text):
    doc_prompt = f"""
You are a smart assistant that reads notes and documents and extracts actionable tasks.
Return the tasks as a list of JSON objects. Each task should contain:
- "task" (required)
- "time" (optional)
- "condition" (optional)
- "priority" (optional)

Example Input:
"Meeting discussed finalizing budget next Friday and reaching out to the vendor this week."

Output:
[
  {{"task": "Finalize budget", "time": "next Friday"}},
  {{"task": "Reach out to vendor", "time": "this week"}}
]

Input:
\"\"\"{doc_text}\"\"\"

Output:
"""

    model = genai.GenerativeModel("gemini-2.0-flash")
    response = model.generate_content([doc_prompt])
    return response.text.strip()


In [13]:
sample_doc = """
- Follow up with marketing team next Thursday morning
- If client approves, prepare Q3 roadmap by Monday
- Update project dashboard weekly
- Schedule demo with engineering next Friday
"""

results = extract_tasks_from_document(sample_doc)
print("📝 Extracted Tasks from Document:\n")
display(Markdown(f"```json\n{results}\n```"))


📝 Extracted Tasks from Document:



```json
```json
[
  {"task": "Follow up with marketing team", "time": "next Thursday morning"},
  {"task": "Prepare Q3 roadmap", "condition": "If client approves", "time": "by Monday"},
  {"task": "Update project dashboard", "time": "weekly"},
  {"task": "Schedule demo with engineering", "time": "next Friday"}
]
```
```

In [14]:
 #Gemini just turned your document into clean, structured, actionable tasks. Well done!

In [15]:
def clean_json_string(json_string):
    # Remove Markdown-style code blocks like ```json ... ```
    if json_string.startswith("```"):
        json_string = json_string.strip("`")  # remove leading/trailing backticks
        lines = json_string.splitlines()
        json_string = "\n".join(line for line in lines if not line.strip().startswith("json"))
    return json_string.strip()


In [16]:
def save_tasks_to_memory(json_string, source="user input"):
    json_string = clean_json_string(json_string)  # 🧹 Clean first
    tasks = json.loads(json_string)
    
    for task_obj in tasks:
        task_text = task_obj.get("task", "")
        if not task_text:
            continue

        embedding = get_gemini_embedding(task_text)
        index.add(np.array([embedding]))

        task_texts.append(task_text)
        task_metadata.append({
            "task": task_text,
            "time": task_obj.get("time"),
            "condition": task_obj.get("condition"),
            "priority": task_obj.get("priority"),
            "source": source
        })

        print("✅ Saved:", task_text)


In [17]:
# import ast
# #
# def safe_json_parse(text):
#     try:
#         return json.loads(text)
#     except json.JSONDecodeError:
#         try:
#             return ast.literal_eval(text)
#         except:
#             return []


In [18]:
print("🧐 Debug - results:\n", results)

🧐 Debug - results:
 ```json
[
  {"task": "Follow up with marketing team", "time": "next Thursday morning"},
  {"task": "Prepare Q3 roadmap", "condition": "If client approves", "time": "by Monday"},
  {"task": "Update project dashboard", "time": "weekly"},
  {"task": "Schedule demo with engineering", "time": "next Friday"}
]
```


In [19]:
def get_gemini_embedding(text):
    response = genai.embed_content(
        model="models/text-embedding-004",
        content=text,
        task_type="semantic_similarity"
    )
    return np.array(response["embedding"], dtype=np.float32)


In [20]:
save_tasks_to_memory(results, source="sample document")


✅ Saved: Follow up with marketing team
✅ Saved: Prepare Q3 roadmap
✅ Saved: Update project dashboard
✅ Saved: Schedule demo with engineering


In [21]:
#All your tasks were successfully saved into memory using Gemini embeddings + FAISS

In [22]:
# View everything saved so far
import pandas as pd

def show_all_saved_tasks():
    return pd.DataFrame(task_metadata)

show_all_saved_tasks()


Unnamed: 0,task,time,condition,priority,source
0,Follow up with marketing team,next Thursday morning,,,sample document
1,Prepare Q3 roadmap,by Monday,If client approves,,sample document
2,Update project dashboard,weekly,,,sample document
3,Schedule demo with engineering,next Friday,,,sample document


In [23]:
#Let’s search saved tasks using similarity

In [24]:
def query_similar_tasks(user_query, top_k=3):
    query_embedding = get_gemini_embedding(user_query)
    scores, indices = index.search(np.array([query_embedding]), top_k)

    print(f"\n🔍 Top {top_k} matches for: \"{user_query}\"")
    for i, idx in enumerate(indices[0]):
        if idx < len(task_texts):
            task = task_metadata[idx]
            print(f"{i+1}. ✅ Task: {task['task']} (Source: {task['source']})")

# 🔎 Try with this:
query_similar_tasks("marketing goals")



🔍 Top 3 matches for: "marketing goals"
1. ✅ Task: Follow up with marketing team (Source: sample document)
2. ✅ Task: Prepare Q3 roadmap (Source: sample document)
3. ✅ Task: Update project dashboard (Source: sample document)


In [25]:
def export_tasks_to_csv(filename="tasks.csv"):
    df = pd.DataFrame(task_metadata)
    df.to_csv(filename, index=False)
    print(f"✅ Exported to '{filename}'")

# Run this:
export_tasks_to_csv()


✅ Exported to 'tasks.csv'


In [26]:
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)


In [27]:
pd.read_csv("tasks.csv").head()


Unnamed: 0,task,time,condition,priority,source
0,Follow up with marketing team,next Thursday morning,,,sample document
1,Prepare Q3 roadmap,by Monday,If client approves,,sample document
2,Update project dashboard,weekly,,,sample document
3,Schedule demo with engineering,next Friday,,,sample document


In [28]:
import re
from datetime import datetime

def normalize_day(text):
    weekdays = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
    for day in weekdays:
        if re.search(day, text.lower()):
            return day.capitalize()
    return None


In [29]:
def simulate_reminders(task_list, today=None):
    if not today:
        today = datetime.now().strftime("%A").lower()
    if not task_list:
        raise ValueError("Task list should be not empty")
        
    print(f"\n📅 Simulating reminders for: {today.capitalize()}\n")
    for task in task_list:
        task_time = str(task.get("time", "")).lower()
        task_desc = task.get("task", "")
        normalized_time = normalize_day(task_time)

        # Reminder fires if time is set as "weekly" or includes today's weekday
        if "weekly" in task_time or (normalized_time and today in normalized_time.lower()):
            print(f"🔔 Reminder: {task_desc} → Time: {task_time}")


In [30]:
simulate_reminders(task_metadata, today="friday")



📅 Simulating reminders for: Friday

🔔 Reminder: Update project dashboard → Time: weekly
🔔 Reminder: Schedule demo with engineering → Time: next friday


In [31]:
from datetime import datetime
from collections import defaultdict

# 🔁 Memory to learn from user phrasing behavior
phrase_history = defaultdict(list)

# 📚 Learning helper: stores resolved times for vague phrases
def update_phrase_history(phrase, resolved_time):
    phrase = phrase.strip().lower()
    if resolved_time not in phrase_history[phrase]:
        phrase_history[phrase].append(resolved_time)

# 🧠 Smarter default time inference
def infer_default_time(phrase, now=None):
    if now is None:
        now = datetime.now()
    
    phrase = phrase.lower().strip()

    # 🔍 Check learned history first
    if phrase in phrase_history and phrase_history[phrase]:
        return phrase_history[phrase][-1]  # Use most recent learned time

    # 🕒 Rule-based default mappings
    keyword_map = {
        "afternoon": "12:10 PM",
        "night": "7:45 PM",
        "early morning": "7:00 AM",
        "morning": "9:00 AM",
        "evening": "6:00 PM",
    }

    for key, value in keyword_map.items():
        if key in phrase:
            return value

    if "next week" in phrase:
        return "10:00 AM"
    elif "next" in phrase or any(day in phrase for day in ["friday", "monday", "tuesday", "wednesday", "thursday"]):
        return "10:00 AM"

    return "12:30 PM"  # 🛑 Fallback time


In [32]:
test_inputs = ["next Friday", "afternoon", "evening", "night", "early morning", "next week", "monday"]
for t in test_inputs:
    print(f"{t} ➤ {infer_default_time(t)}")


next Friday ➤ 10:00 AM
afternoon ➤ 12:10 PM
evening ➤ 6:00 PM
night ➤ 7:45 PM
early morning ➤ 7:00 AM
next week ➤ 10:00 AM
monday ➤ 10:00 AM


In [33]:
update_phrase_history("next week", "4:45 PM")  # User starts preferring this
print("next week ➤", infer_default_time("next week"))  # Will return 2:45 PM now


next week ➤ 4:45 PM


In [34]:
# ⏱️ Update task before saving: Fill inferred time if missing or vague
def normalize_task_time(task):
    vague_time = task.get("time", "").strip().lower()
    if vague_time:
        inferred = infer_default_time(vague_time)
        task["normalized_time"] = inferred
    else:
        task["normalized_time"] = "12:30 PM"  # generic fallback
    return task




In [35]:
print("Type:", type(results))
print("Value:", results)


Type: <class 'str'>
Value: ```json
[
  {"task": "Follow up with marketing team", "time": "next Thursday morning"},
  {"task": "Prepare Q3 roadmap", "condition": "If client approves", "time": "by Monday"},
  {"task": "Update project dashboard", "time": "weekly"},
  {"task": "Schedule demo with engineering", "time": "next Friday"}
]
```


In [36]:
def save_tasks_to_memory(json_input, source="user input"):
    # ✅ Handle if it's a string with markdown or backticks
    if isinstance(json_input, str):
        json_input = json_input.strip()
        if json_input.startswith("```json"):
            json_input = json_input.replace("```json", "").strip()
        if json_input.endswith("```"):
            json_input = json_input[:-3].strip()
        try:
            tasks = json.loads(json_input)
        except json.JSONDecodeError:
            print("❌ Still invalid JSON after cleaning.")
            return

    elif isinstance(json_input, list):
        tasks = json_input
    else:
        print("❌ Unsupported input type.")
        return

    for task_obj in tasks:
        task_text = task_obj.get("task", "")
        if not task_text.strip():
            continue

        resolved_time = task_obj.get("time", "")
        # resolved_time = assign_feasible_time(raw_time)

        embedding = get_gemini_embedding(task_text)
        index.add(np.array([embedding]))

        task_texts.append(task_text)
        task_metadata.append({
            **task_obj,
            "resolved_time": resolved_time,
            "source": source
        })

        print(f"✅ Saved: {task_text} ⏰ → {resolved_time}")


In [37]:
print("Type:", type(results))
print("Value:", results)

Type: <class 'str'>
Value: ```json
[
  {"task": "Follow up with marketing team", "time": "next Thursday morning"},
  {"task": "Prepare Q3 roadmap", "condition": "If client approves", "time": "by Monday"},
  {"task": "Update project dashboard", "time": "weekly"},
  {"task": "Schedule demo with engineering", "time": "next Friday"}
]
```


In [38]:
save_tasks_to_memory(results, source="sample document")


✅ Saved: Follow up with marketing team ⏰ → next Thursday morning
✅ Saved: Prepare Q3 roadmap ⏰ → by Monday
✅ Saved: Update project dashboard ⏰ → weekly
✅ Saved: Schedule demo with engineering ⏰ → next Friday


In [39]:
phrase_history = {
    "next friday": ["7:00 PM", "6:30 PM"],
    "afternoon": ["12:10 PM", "2:00 PM"],
    "evening": ["6:00 PM", "6:00 PM"]
}


In [40]:
# View learned phrase behavior
print("\n🧠 Learned Phrases & Times:")
for phrase, times in phrase_history.items():
    print(f"{phrase} → {times}")



🧠 Learned Phrases & Times:
next friday → ['7:00 PM', '6:30 PM']
afternoon → ['12:10 PM', '2:00 PM']
evening → ['6:00 PM', '6:00 PM']


In [41]:
from collections import defaultdict, Counter

# Global phrase history
phrase_history = defaultdict(list)

# Save inferred time to phrase history
def update_phrase_history(vague_phrase, resolved_time):
    if vague_phrase:
        phrase_history[vague_phrase.lower()].append(resolved_time)


In [42]:
import datetime
import os
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

# Authenticate and build calendar service
def setup_google_calendar():
    SCOPES = ['https://www.googleapis.com/auth/calendar']
    flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
    creds = flow.run_local_server(port=0)
    service = build('calendar', 'v3', credentials=creds)
    return service


In [43]:
def create_calendar_event(service, task, date_str, time_str="10:00 AM"):
    try:
        dt_str = f"{date_str} {time_str}"
        start_time = datetime.datetime.strptime(dt_str, "%Y-%m-%d %I:%M %p")
        end_time = start_time + datetime.timedelta(hours=1)

        event = {
            'summary': task,
            'start': {
                'dateTime': start_time.isoformat(),
                'timeZone': 'America/Chicago',  # change to your time zone
            },
            'end': {
                'dateTime': end_time.isoformat(),
                'timeZone': 'America/Chicago',
            },
            'reminders': {
                'useDefault': True,
            },
        }

        event_result = service.events().insert(calendarId='primary', body=event).execute()
        print(f"✅ Event created: {event_result.get('htmlLink')}")
    except Exception as e:
        print("❌ Failed to create event:", e)


In [44]:
from google.oauth2 import service_account
from googleapiclient.discovery import build
calendar_id = 'primary'
# Path to the uploaded service account key
SERVICE_ACCOUNT_FILE = '/kaggle/input/credentials/sigma-gateway-367511-a5f3c9c6fcf2.json'
SCOPES = ['https://www.googleapis.com/auth/calendar']

# Create credentials
creds = service_account.Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE, scopes=SCOPES)

# Build the service
calendar_service = build('calendar', 'v3', credentials=creds)


In [45]:
create_calendar_event(calendar_service, task="Demo with marketing", date_str="2025-04-14", time_str="12:10 PM")


✅ Event created: https://www.google.com/calendar/event?eid=dDlpcXN0Ym4zZThoZzAxNmc3NzhyODZhaWMgZ2VuYWlzZXJ2aWNlQHNpZ21hLWdhdGV3YXktMzY3NTExLmlhbS5nc2VydmljZWFjY291bnQuY29t


In [46]:
# Set time range: now to 7 days from now
from datetime import datetime, timedelta
now = datetime.utcnow().isoformat() + 'Z'  # 'Z' indicates UTC time
one_week_later = (datetime.utcnow() + timedelta(days=7)).isoformat() + 'Z'
# Fetch events
events_result = calendar_service.events().list(
    calendarId=calendar_id,
    timeMin=now,
    timeMax=one_week_later,
    maxResults=10,
    singleEvents=True,
    orderBy='startTime'
).execute()

events = events_result.get('items', [])

# Print events
if not events:
    print('No upcoming events found.')
for event in events:
    start = event['start'].get('dateTime', event['start'].get('date'))
    print(f"{start} - {event['summary']}")

calendar_list = calendar_service.calendarList().list().execute()
for calendar_entry in calendar_list.get('items', []):
    print(calendar_entry)


2025-04-14T17:10:00Z - Demo with marketing
2025-04-14T17:10:00Z - Demo with marketing
2025-04-14T17:10:00Z - Demo with marketing
2025-04-14T17:10:00Z - Demo with marketing
2025-04-14T17:10:00Z - Demo with marketing
