# Every Mickle Calendar Analysis

In [11]:
import os
import pickle
from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import polars as pl
from datetime import datetime, timedelta

In [2]:
gcloud_cred = r"C:\Users\Joshh\OneDrive\Documents\Stocks\Auth\googleCloud_gcal\client_secret_433766140968-mkjmsthdo1vtq0d9sd8nk9mkbeqgg88s.apps.googleusercontent.com.json"

In [3]:
# Set up OAuth 2.0 credentials
SCOPES = ['https://www.googleapis.com/auth/calendar.readonly']

def authenticate_google_calendar(credentials_file = gcloud_cred, token_file='token.pickle'):
    """Authenticate with Google Calendar API and return credentials."""
    creds = None
    
    # Load cached credentials if they exist
    if os.path.exists(token_file):
        try:
            with open(token_file, 'rb') as token:
                creds = pickle.load(token)
        except Exception as e:
            print(f"Error loading cached credentials: {e}")
    
    # If credentials don't exist or are invalid, get new ones
    if not creds or not creds.valid:
        try:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())
            else:
                if not os.path.exists(credentials_file):
                    raise FileNotFoundError(f"Credentials file not found: {credentials_file}")
                
                flow = InstalledAppFlow.from_client_secrets_file(
                    credentials_file, SCOPES)
                creds = flow.run_local_server(port=0)
            
            # Save credentials for future use
            with open(token_file, 'wb') as token:
                pickle.dump(creds, token)
        except Exception as e:
            raise Exception(f"Authentication failed: {e}")
    
    return creds

In [4]:
def get_calendar_events(calendar_id, days=30):
    creds = authenticate_google_calendar()
    service = build('calendar', 'v3', credentials=creds)
    
    # Calculate time bounds
    now = datetime.utcnow()
    time_min = now.isoformat() + 'Z'
    time_max = (now + timedelta(days=days)).isoformat() + 'Z'
    
    # Get events
    events_result = service.events().list(
        calendarId=calendar_id,
        timeMin=time_min,
        timeMax=time_max,
        singleEvents=True,
        orderBy='startTime'
    ).execute()
    
    events = events_result.get('items', [])
    
    # Convert to DataFrame
    event_data = []
    for event in events:
        start = event['start'].get('dateTime', event['start'].get('date'))
        end = event['end'].get('dateTime', event['end'].get('date'))
        
        event_info = {
            'summary': event.get('summary', 'No Title'),
            'start': start,
            'end': end,
            'created': event.get('created', ''),
            'organizer': event.get('organizer', {}).get('email', ''),
            'status': event.get('status', ''),
            'link': event.get('htmlLink', '')
        }
        event_data.append(event_info)
    
    # Create Polars DataFrame
    if event_data:
        df = pl.from_dicts(event_data)
        return df
    else:
        return pl.DataFrame()


In [None]:
# Example usage
calendar_id = 'everymickle.com_5tt4gqff5p5lupdmmmt586c2mo@group.calendar.google.com'


In [12]:
events_df = get_calendar_events(calendar_id)

  now = datetime.utcnow()


In [17]:
# Basic analysis
if not events_df.is_empty():
    # Check current data types
    print("Current schema:")
    print(events_df.schema)
    
    # Convert string dates to datetime only if they're strings
    if events_df["start"].dtype == pl.String:
        events_df = events_df.with_columns([
            pl.col('start').str.to_datetime(format="%Y-%m-%dT%H:%M:%S%z", strict=False),
            pl.col('end').str.to_datetime(format="%Y-%m-%dT%H:%M:%S%z", strict=False)
        ])
    
    # Add duration column
    events_df = events_df.with_columns([
        (pl.col('end') - pl.col('start')).alias('duration')
    ])
    
    # Basic statistics
    event_count = events_df.height
    avg_duration = events_df.select(pl.mean('duration')).item()
    
    print(f"Total events: {event_count}")
    print(f"Average duration: {avg_duration}")
    
    # Events by day of week
    events_df = events_df.with_columns([
        pl.col('start').dt.weekday().alias('day_of_week')
    ])
    
    # Create a mapping for day names using when-then expressions
    events_df = events_df.with_columns([
        pl.when(pl.col('day_of_week') == 0).then(pl.lit('Monday'))
        .when(pl.col('day_of_week') == 1).then(pl.lit('Tuesday'))
        .when(pl.col('day_of_week') == 2).then(pl.lit('Wednesday'))
        .when(pl.col('day_of_week') == 3).then(pl.lit('Thursday'))
        .when(pl.col('day_of_week') == 4).then(pl.lit('Friday'))
        .when(pl.col('day_of_week') == 5).then(pl.lit('Saturday'))
        .when(pl.col('day_of_week') == 6).then(pl.lit('Sunday'))
        .otherwise(pl.lit('Unknown'))
        .alias('day_name')
    ])
    
    # Group by day name
    day_counts = events_df.group_by('day_name').count()
    
    # Sort by day of week
    day_order = {'Monday': 0, 'Tuesday': 1, 'Wednesday': 2, 
                'Thursday': 3, 'Friday': 4, 'Saturday': 5, 'Sunday': 6}
    day_counts = day_counts.sort(pl.col('day_name').map_elements(lambda x: day_order.get(x, 7)))
    
    print("Events by day of week:")
    print(day_counts)
    
    # Calculate duration statistics in minutes
    try:
        duration_stats = events_df.select([
            (pl.mean('duration').dt.total_minutes()).alias('avg_minutes'),
            (pl.min('duration').dt.total_minutes()).alias('min_minutes'),
            (pl.max('duration').dt.total_minutes()).alias('max_minutes')
        ])
        
        print("\nDuration statistics (minutes):")
        print(duration_stats)
    except Exception as e:
        print(f"Error calculating duration statistics: {e}")
        # Fallback to simpler statistics if needed
        print(events_df.select([
            pl.mean('duration').alias('avg_duration'),
            pl.min('duration').alias('min_duration'),
            pl.max('duration').alias('max_duration')
        ]))
else:
    print("No events found.")

Current schema:
Schema({'summary': String, 'start': Datetime(time_unit='us', time_zone='UTC'), 'end': Datetime(time_unit='us', time_zone='UTC'), 'created': String, 'organizer': String, 'status': String, 'link': String, 'duration': Duration(time_unit='us'), 'day_of_week': Int8})
Total events: 86
Average duration: 3:55:00
Events by day of week:
shape: (5, 2)
┌───────────┬───────┐
│ day_name  ┆ count │
│ ---       ┆ ---   │
│ str       ┆ u32   │
╞═══════════╪═══════╡
│ Wednesday ┆ 1     │
│ Thursday  ┆ 1     │
│ Friday    ┆ 2     │
│ Sunday    ┆ 2     │
│ Unknown   ┆ 80    │
└───────────┴───────┘

Duration statistics (minutes):
shape: (1, 3)
┌─────────────┬─────────────┬─────────────┐
│ avg_minutes ┆ min_minutes ┆ max_minutes │
│ ---         ┆ ---         ┆ ---         │
│ i64         ┆ i64         ┆ i64         │
╞═════════════╪═════════════╪═════════════╡
│ 235         ┆ 120         ┆ 600         │
└─────────────┴─────────────┴─────────────┘


  day_counts = events_df.group_by('day_name').count()
  day_counts = day_counts.sort(pl.col('day_name').map_elements(lambda x: day_order.get(x, 7)))
