In [122]:
import pandas as pd
from statistics import mean

import sys
sys.path.insert(0, "/Users/connorparish/code/hindsight")

from hindsight_server.db import HindsightDB
from hindsight_server.utils import add_sep_ids, convert_to_continuos_str

In [3]:
db = HindsightDB()

In [76]:
all_frames = db.get_frames(impute_applications=False)

In [151]:
frames = db.search(text="hindsight", impute_applications=False)

In [152]:
hindsight_frames = frames.drop_duplicates(subset="combined_text")

In [153]:
hindsight_frames = hindsight_frames.loc[~hindsight_frames['combined_text'].str.contains("Recording screen")]
hindsight_frames = hindsight_frames.loc[~hindsight_frames['combined_text'].str.contains("Hindsight Server Upload")]

In [154]:
hindsight_frames.groupby(['application']).id.count().sort_values()

application
com-aa-android                                   1
com-amazon-avod-thirdpartyclient                 1
com-polywise-lucid                               1
com-groupme-android                              1
com-google-android-apps-chromecast-app           1
com-chase-sig-android                            1
com-draftkings-sportsbook                        1
net-activitywatch-android                        2
com-google-android-cellbroadcastreceiver         2
com-espn-fantasy-lm-football                     2
com-mobile3                                      2
com-android-vending                              2
com-brave-browser                                3
com-google-android-GoogleCamera                  3
com-duolingo                                     3
com-google-android-apps-photos                   3
com-google-android-providers-media-module        3
com-sleeperbot                                   3
fm-dice                                          3
me-lyft-android    

In [162]:
hindsight_frames.loc[hindsight_frames['application'] == "com-google-android-inputmethod-latin"].iloc[45].path

'/Users/connorparish/.hindsight_server/data/raw_screenshots/2024/06/20/com-google-android-inputmethod-latin/com-google-android-inputmethod-latin_1718850247933.jpg'

In [147]:
hindsight_frames.loc[hindsight_frames['application'] == "com-android-systemui"]

Unnamed: 0,id,path,timestamp,application,combined_text,datetime_utc,datetime_local
2083,43788,/Users/connorparish/.hindsight_server/data/raw...,1716221526044,com-android-systemui,12:12 Screen Recording Location Tracking Start...,2024-05-20 16:12:06.043999910+00:00,2024-05-20 12:12:06.043999910-04:00
2064,43099,/Users/connorparish/.hindsight_server/data/raw...,1716242217550,com-android-systemui,5:56 Screen Recording Location Tracking Start ...,2024-05-20 21:56:57.549999952+00:00,2024-05-20 17:56:57.549999952-04:00
2039,43021,/Users/connorparish/.hindsight_server/data/raw...,1716243262236,com-android-systemui,6:14 |1• Screen Recording Location Tracking St...,2024-05-20 22:14:22.236000061+00:00,2024-05-20 18:14:22.236000061-04:00
2025,42964,/Users/connorparish/.hindsight_server/data/raw...,1716244102827,com-android-systemui,6:28 Screen Recording Location Tracking Start ...,2024-05-20 22:28:22.826999903+00:00,2024-05-20 18:28:22.826999903-04:00
2024,42957,/Users/connorparish/.hindsight_server/data/raw...,1716244132455,com-android-systemui,6:28 1| Screen Recording Location Tracking Sta...,2024-05-20 22:28:52.454999924+00:00,2024-05-20 18:28:52.454999924-04:00
...,...,...,...,...,...,...,...
53236,500525,/Users/connorparish/.hindsight_server/data/raw...,1728483900147,com-android-systemui,"10:25 Wed, Oct 9 • 93% Internet * Bluetooth Fl...",2024-10-09 14:25:00.147000074+00:00,2024-10-09 10:25:00.147000074-04:00
53251,500584,/Users/connorparish/.hindsight_server/data/raw...,1728483989291,com-android-systemui,"10:26 Wed, Oct 9 • 93% Internet * Bluetooth Fl...",2024-10-09 14:26:29.290999889+00:00,2024-10-09 10:26:29.290999889-04:00
53250,500583,/Users/connorparish/.hindsight_server/data/raw...,1728483991409,com-android-systemui,"10:26 Wed, Oct 9 • 93% Internet * Bluetooth Fl...",2024-10-09 14:26:31.408999920+00:00,2024-10-09 10:26:31.408999920-04:00
53246,500569,/Users/connorparish/.hindsight_server/data/raw...,1728483993496,com-android-systemui,"10:26 Wed, Oct 9 • 93% Internet * Bluetooth Fl...",2024-10-09 14:26:33.496000051+00:00,2024-10-09 10:26:33.496000051-04:00


In [163]:
typing_df = hindsight_frames.loc[hindsight_frames['application'] == "com-google-android-inputmethod-latin"]

In [164]:
len(typing_df)

961

In [165]:
typing_df = typing_df.sort_values(by="datetime_utc", ascending=True)
typing_df.to_csv("hindsight_typing_frames.csv", index=False)

# Get all unique text boxes with Hindsight

In [109]:
ocr_res = db.get_frames_with_ocr(frame_ids=list(hindsight_frames['id']))

In [120]:
hindsight_paragraphs = list()
for frame_id in set(ocr_res['frame_id']):
    frame_ocr_res = ocr_res.loc[ocr_res['frame_id'] == frame_id]
    avg_h = mean(frame_ocr_res['h'])
    new_para_thresh = avg_h * 2

    frame_ocr_res = frame_ocr_res.sort_values(by=['y', 'x'])
    
    frame_ocr_res = add_sep_ids(frame_ocr_res, new_para_thresh, "para_id")

    hindsight_paras = frame_ocr_res.loc[frame_ocr_res['text'].str.lower().str.contains("hindsight")].para_id.unique()

    for para_id in hindsight_paras:
        para_df = frame_ocr_res.loc[frame_ocr_res['para_id'] == para_id]
        para_str = convert_to_continuos_str(para_df, newline_threshold=avg_h/2)
        hindsight_paragraphs.append({"frame_id" : frame_id, "text" : para_str})

In [123]:
hindsight_paragraphs_df = pd.DataFrame(hindsight_paragraphs)

In [125]:
hindsight_paragraphs_df = hindsight_paragraphs_df.drop_duplicates(subset=['text'])

In [127]:
hindsight_paragraphs_df['text_len'] = hindsight_paragraphs_df['text'].apply(lambda x: len(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hindsight_paragraphs_df['text_len'] = hindsight_paragraphs_df['text'].apply(lambda x: len(x))


In [130]:
hindsight_paragraphs_df.sort_values(by="text_len", ascending=False)

Unnamed: 0,frame_id,text,text_len
36303,356875,r results for rusers/connor parish/ minos 1e s...,3585
14465,179025,"Connor Parish\n Boston, MA, 0213...",2693
85,393780,OBJECTIVE: Improve the human condition through...,2315
38532,361866,...,2141
26888,337233,There's a lot of talk right now around how the...,2029
24648,332933,There's a lot of talk right now around how th...,1918
42851,375301,We are in a unique moment in history where it ...,1578
25701,334759,Thu Aug 29 4:04 PM reen...,1557
3925,278939,"ey Burns, Ish,... 6:32 PM ...",1393
4767,281479,"Geoffrey Burns, Ish,... 6:32 PM ...",1376


In [133]:
cdf = hindsight_paragraphs_df.loc[hindsight_paragraphs_df['text_len'] >= 50]

In [137]:
for i, row in cdf.iterrows():
    print(row['frame_id'])
    print(row['text'])
    print()

131076
Query: a\Who is Connor Parish?
Result: Long Context
Response: A Biomedical Engineer exploring the
potential unlocked by emerging computer tools
to improve non-computer based fields. He is
also the creator of the open-source Android app
Hindsight.

131174
Query: a\Who is Connor Parish?
Resultir dong Context
Response: A Biomedical Engineer exploring the
potential unlocked by emerging computer tools
to improve non-computer based fields. He is
also the creator of the open-source Android app
Hindsight.Upload
111
  -1
•Unsynced Screenshots: 126

131316
Query: a \Who is Connor Parish?
Result: Long Context
Response: A Biomedical Engineer exploring the
potential unlocked by emerging computer tools
to improve non-computer based fields. He is
also the creator of the open-source Android app
Hindsight.

393488
   me, Alex 11          Sep 11
Hindsight Onboarding
Sounds good! If you could have miniconda a...

393687
Contact info
  Connor Parish
  Working on Hindsight

131562
Hindsight   canuck

# Try to remove close duplicates

In [93]:
from difflib import SequenceMatcher

In [94]:
s = SequenceMatcher(None, "", "")

In [110]:
o = ocr_res.loc[ocr_res['text'].str.lower().str.contains("hindsight")]

In [98]:
starter_row = o.iloc[0]
unique_frames = [(starter_row['frame_id'], starter_row['text'])]
for i, row in o.iterrows():
    s.set_seq1(row['text'])
    u = True
    for _, t in unique_frames:
        s.set_seq2(t)
        if s.ratio() > 0.9:
            u = False
            break
    
    if u:
        unique_frames.append((row["frame_id"], row['text']))
    

In [103]:
for i, u in unique_frames:
    print(i, u)

26008 Hindsight
26008 hindsight.life
26008 Hindsight website is live https://hindsight.life/
26810 Hindsight smfh
24412 hindsight
26017 Hindsight website is live
26017 https://hindsight.life/
26020 Hindsight website is
26029 saved by Hindsight?
26029 directory running the hindsight server.
26029 Is my data secure with Hindsight?
26029 saved within the Hindsight app's data
26030 If you have any questions about Hindsight,
26031 About Hindsight
26031 Hindsight is an open-source android app that
26031 • Hindsight takes a screenshot of your
26031 At Hindsight, we believe technology is falling
26838 Hindsight App
26838 Hindsight Logo
26838 Welcome to Hindsight
26838 Hindsight is designed to help you record your
26838 • cparish312 / hindsight Publ
26033 How much battery does Hindsight use?
26842 recently created hindsight 1. Hindsight is an android app
26039 © 2024 Hindsight. All rights reserved.
26040 How do I start recording with Hindsight?
26042 Closed GitHub - cparish312/hindsight
26874 h

In [106]:
hindsight_frames.loc[hindsight_frames['id'] == 38752].iloc[0]['path']

'/Users/connorparish/.hindsight_server/data/raw_screenshots/2024/05/23/com-reddit-frontpage/com-reddit-frontpage_1716425107315.jpg'

In [None]:
hindsight_frames

In [20]:
t1 = hindsight_frames.iloc[0]['combined_text']
t2 = hindsight_frames.iloc[1]['combined_text']

In [21]:

print(s.ratio())

0.9833333333333333


In [22]:
print(t1)

Verizon 5:10 Tue, Oct 8 64°F © Hindsight Recording • 2:28:12


In [23]:
print(t2)

Verizon 5:10 Tue, Oct 8 64°F © Hindsight Recording • 2:28:10


In [17]:
hindsight_frames

Unnamed: 0,id,path,timestamp,application,combined_text,datetime_utc,datetime_local
53022,498388,/Users/connorparish/.hindsight_server/data/raw...,1728421803162,com-android-systemui,"Verizon 5:10 Tue, Oct 8 64°F © Hindsight Recor...",2024-10-08 21:10:03.161999941+00:00,2024-10-08 17:10:03.161999941-04:00
53021,498387,/Users/connorparish/.hindsight_server/data/raw...,1728421801012,com-android-systemui,"Verizon 5:10 Tue, Oct 8 64°F © Hindsight Recor...",2024-10-08 21:10:01.012000084+00:00,2024-10-08 17:10:01.012000084-04:00
53013,498352,/Users/connorparish/.hindsight_server/data/raw...,1728421798876,com-android-systemui,"Verizon 5:09 Tue, Oct 8 64°F 0 Hindsight Recor...",2024-10-08 21:09:58.875999928+00:00,2024-10-08 17:09:58.875999928-04:00
53015,498357,/Users/connorparish/.hindsight_server/data/raw...,1728421796754,com-android-systemui,"Verizon 5:09 Tue, Oct 8 64°F 0 Hindsight Recor...",2024-10-08 21:09:56.753999949+00:00,2024-10-08 17:09:56.753999949-04:00
53011,498340,/Users/connorparish/.hindsight_server/data/raw...,1728421794631,com-android-systemui,"Verizon 5:09 Tue, Oct 8 64°F 0 Hindsight Recor...",2024-10-08 21:09:54.631000042+00:00,2024-10-08 17:09:54.631000042-04:00
...,...,...,...,...,...,...,...
2440,50017,/Users/connorparish/.hindsight_server/data/raw...,1715716497087,screenshot,"3:54 Tue, May 14 4 $ 100% Internet * Bluetooth...",2024-05-14 19:54:57.086999893+00:00,2024-05-14 15:54:57.086999893-04:00
2441,50019,/Users/connorparish/.hindsight_server/data/raw...,1715716492913,screenshot,"3:54 Tue, May 14 4 $ 100% Internet * Bluetooth...",2024-05-14 19:54:52.913000107+00:00,2024-05-14 15:54:52.913000107-04:00
2442,50021,/Users/connorparish/.hindsight_server/data/raw...,1715716488778,screenshot,3:54 G hindsight 1Password Authentica... + = C...,2024-05-14 19:54:48.778000116+00:00,2024-05-14 15:54:48.778000116-04:00
2443,50022,/Users/connorparish/.hindsight_server/data/raw...,1715716472339,screenshot,"3:54 Tue, May 14 4 $ 100% Internet * Bluetooth...",2024-05-14 19:54:32.338999987+00:00,2024-05-14 15:54:32.338999987-04:00
