In [None]:
# Analyze Historical Calendar Events

In [32]:
import os
import sys
import json
import csv
from datetime import datetime
from pytz import timezone
from importlib import import_module

import pandas as pd

sys.path.append(os.path.abspath('../src'))
from util import parse_dt_to_utc
from gcal.util import get_client
from event import from_gcal_events

In [33]:
USERNAME = 'zack@plenty.ag'
START_DT = datetime(2016, 6, 13)
END_DT = datetime(2021, 8, 27)
LOCAL_TZ = timezone('America/Los_Angeles')

START_DT_STR = START_DT.isoformat() + 'Z'
END_DT_STR = END_DT.isoformat() + 'Z'

In [27]:
CACHE_PATH = 'PLENTY_EVENT_CACHE.csv'

In [28]:
# load events from cache or from gcal

def load_events():
    if not os.path.exists(CACHE_PATH):
        calendar = get_client()

        all_results = []
        req = calendar.events().list(
            calendarId='zack@plenty.ag',
            timeMin=START_DT_STR,
            timeMax=END_DT_STR,
            singleEvents=True,
            orderBy='startTime'
        )
        while req:
            res = req.execute()
            all_results += res['items']
            req = calendar.events().list_next(req, res)
            print(all_results[-1].get('start', {}).get('dateTime'))

        headers = set()
        for e in all_results:
            headers |= e.keys()

        with open(CACHE_PATH, 'w+') as fout:
            writer = csv.DictWriter(fout, fieldnames=headers)
            writer.writeheader()
            for e in all_results:
                writer.writerow(e)

    return pd.read_csv(CACHE_PATH)

gcal_events = load_events()

print('Days:', (END_DT - START_DT).days)
print('Evts:', len(gcal_events))
print('Cols:', gcal_events.columns)

Days: 1263
Evts: 5395
Cols: Index(['originalStartTime', 'status', 'reminders', 'creator', 'id', 'kind',
       'description', 'privateCopy', 'recurringEventId', 'updated',
       'attachments', 'etag', 'guestsCanInviteOthers', 'transparency',
       'created', 'iCalUID', 'guestsCanSeeOtherGuests', 'attendees',
       'location', 'guestsCanModify', 'organizer', 'extendedProperties',
       'visibility', 'end', 'start', 'conferenceData', 'sequence', 'summary',
       'htmlLink', 'hangoutLink'],
      dtype='object')


In [36]:
# transform gcal events to nice event models, and those to dataframe

event_models = from_gcal_events(gcal_events, 'zack.swafford@plenty.ag')
events = pd.DataFrame([e.serialize() for e in event_models])

events.head()

Unnamed: 0,endDtStr,isOneOnOne,location,needsLocation,organizerEmail,startDtStr,title,userAcceptanceStr,userHasConflict,userIsOrganizer
0,2016-06-28T20:30:00+00:00,False,"Wilson Sonsini Goodrich & Rosati, 650 Page Mil...",False,nate@seejanefarm.com,2016-06-28T19:00:00+00:00,HOLD: CS Convo,ACCEPTED,False,False
1,2016-07-01T17:00:00+00:00,False,"1050 College Ave, Menlo Park, CA, 94025, Unite...",False,nate@seejanefarm.com,2016-07-01T16:00:00+00:00,Closet Farm Setup,ACCEPTED,False,False
2,2016-07-07T19:00:00+00:00,False,"1050 College Ave, Menlo Park, CA, 94025",False,zack.swafford@plenty.ag,2016-07-07T16:00:00+00:00,Progress Meeting,ACCEPTED,False,True
3,2016-07-07T20:00:00+00:00,False,Palo Alto TBD,False,nate@seejanefarm.com,2016-07-07T19:00:00+00:00,Lunch,ACCEPTED,False,False
4,2016-07-11T22:00:00+00:00,False,"Participant Dial-In: +1.860.970.0010,100650558",False,zack.swafford@plenty.ag,2016-07-11T21:00:00+00:00,SJF-Bright CS Meetup,ACCEPTED,False,True


In [37]:
def extract_dt_props(dt_str):
    dt = parse_dt_to_utc(dt_str)
    dt_local = dt.astimezone(LOCAL_TZ)
    # todo return all sorts of things like year, month, month in year, etc.
    return {

    }

# todo remove to stop testing
extract_dt_props(events.loc[0, 'startDtStr'])

# todo add this info to the DF for start at least
# events.apply(lambda x: extract_dt_props(x['startDtStr']), axis=1).head()

2016-06-28 19:00:00+00:00


{}