# Try to create a summary of what the user did on their phone yesterday

In [1]:
import os
import shutil
import sys
import cv2
import numpy as np
import pandas as pd

from datetime import datetime, timedelta

sys.path.insert(0, "../")
from db import HindsightDB
import utils

In [2]:
db = HindsightDB()

# Look at how many location pings in a day

In [3]:
location_df = db.get_locations()

In [4]:
location_df = utils.add_datetimes(location_df)

In [5]:
location_df['datetime_to_day'] = location_df['datetime_local'].dt.floor('D')

In [6]:
location_df.groupby(['datetime_to_day']).timestamp.count()

datetime_to_day
2024-07-10 00:00:00-04:00    276
2024-07-11 00:00:00-04:00    247
2024-07-12 00:00:00-04:00    106
2024-07-13 00:00:00-04:00    347
2024-07-14 00:00:00-04:00    211
2024-07-15 00:00:00-04:00    815
2024-07-16 00:00:00-04:00    234
2024-07-17 00:00:00-04:00    199
2024-07-18 00:00:00-04:00    170
2024-07-19 00:00:00-04:00    167
2024-07-20 00:00:00-04:00    270
2024-07-21 00:00:00-04:00    354
2024-07-22 00:00:00-04:00    232
2024-07-23 00:00:00-04:00    165
Name: timestamp, dtype: int64

# Try to get daily summary from LLM

In [3]:
frames = db.get_frames()

In [4]:
now_utc = datetime.utcnow()

# Start of yesterday in UTC
yesterday_start = (now_utc - timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
yesterday_end = yesterday_start + timedelta(days=1, microseconds=-1)

# Convert to milliseconds since the epoch
yesterday_start_ms = int(yesterday_start.timestamp() * 1000)
yesterday_end_ms = int(yesterday_end.timestamp() * 1000)

In [5]:
frames_yesterday = frames.loc[(frames['timestamp'] >= yesterday_start_ms) & (frames['timestamp'] <= yesterday_end_ms)]

In [6]:
frames_yesterday = utils.add_datetimes(frames_yesterday)
frames_yesterday = utils.add_usage_ids(frames_yesterday)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['datetime_utc'] = pd.to_datetime(df['timestamp'] / 1000, unit='s', utc=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['datetime_local'] = df['datetime_utc'].apply(lambda x: x.replace(tzinfo=video_timezone).astimezone(local_timezone))


In [7]:
frames_yesterday_ocr = db.get_frames_with_ocr(frame_ids=set(frames_yesterday['id']))
frames_yesterday_ocr = frames_yesterday_ocr.sort_values(by='timestamp', ascending=True)

# Use LLM to create summary

In [8]:
from mlx_lm import load, generate
from config import MLX_LLM_MODEL

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
model, tokenizer = load(MLX_LLM_MODEL) 

Fetching 7 files: 100%|██████████| 7/7 [00:00<00:00, 112923.57it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [10]:
pre_prompt = "Below is text that was on my screen yesterday.\n"
for frame_id in frames_yesterday_ocr.frame_id.unique():
    frames_df = frames_yesterday_ocr.loc[frames_yesterday_ocr['frame_id'] == frame_id]
    frame_text = utils.get_preprompted_text(frames_df, application=frames_df['application'].iloc[0], timestamp=frames_df['timestamp'].iloc[0])
    pre_prompt += frame_text + "\n"

In [14]:
prompt = pre_prompt[1000:3000] + "\nCreate a summary of what the user has been doing:"

In [15]:
response = generate(model, tokenizer, prompt=prompt, max_tokens=1000)

In [16]:
print(response)

 The user has been browsing the internet using the Chrome browser on their Android device. They have been viewing news articles and updates on various topics, including politics and current events. They have also been looking at recipes and food-related content. The user has been scrolling through multiple web pages and has likely been reading and viewing content for an extended period of time.

Note: The timestamps provided are in UTC format and represent the time at which the user took the screenshot. The timestamps are not necessarily indicative of the user's actual activity, but rather the point in time at which the user captured the screenshot.  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  | 