# GFQL Temporal Predicates Demo

This notebook demonstrates how to use temporal (datetime) predicates in PyGraphistry's GFQL for filtering graph data based on dates and times.

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, date, time, timedelta
import pytz
import graphistry
from graphistry import n, e_forward
from graphistry.compute import (
    gt, lt, ge, le, eq, ne, between, is_in,
    DateTimeValue, DateValue, TimeValue
)

## Setup: Create Sample Data

Let's create a sample dataset representing a transaction network with temporal data.

In [None]:
# Generate sample transaction data
np.random.seed(42)

# Create nodes (accounts)
n_accounts = 100
accounts_df = pd.DataFrame({
    'account_id': [f'ACC_{i:04d}' for i in range(n_accounts)],
    'account_type': np.random.choice(['checking', 'savings', 'business'], n_accounts),
    'created_date': pd.date_range('2020-01-01', periods=n_accounts, freq='W'),
    'last_active': pd.date_range('2023-01-01', periods=n_accounts, freq='D') + 
                   pd.to_timedelta(np.random.randint(0, 365, n_accounts), unit='D')
})

# Create edges (transactions)
n_transactions = 500
transactions_df = pd.DataFrame({
    'transaction_id': [f'TXN_{i:06d}' for i in range(n_transactions)],
    'source': np.random.choice(accounts_df['account_id'], n_transactions),
    'target': np.random.choice(accounts_df['account_id'], n_transactions),
    'amount': np.random.exponential(100, n_transactions).round(2),
    'timestamp': pd.date_range('2023-01-01', periods=n_transactions, freq='H') + 
                 pd.to_timedelta(np.random.randint(0, 8760, n_transactions), unit='H'),
    'transaction_time': [time(np.random.randint(0, 24), np.random.randint(0, 60)) 
                        for _ in range(n_transactions)],
    'transaction_type': np.random.choice(['transfer', 'payment', 'deposit'], n_transactions)
})

print(f"Created {len(accounts_df)} accounts and {len(transactions_df)} transactions")
print(f"\nTransaction date range: {transactions_df['timestamp'].min()} to {transactions_df['timestamp'].max()}")

In [None]:
# Create graphistry instance
g = graphistry.edges(transactions_df, 'source', 'target').nodes(accounts_df, 'account_id')
print(f"Graph: {len(g._nodes)} nodes, {len(g._edges)} edges")

## 1. Basic DateTime Filtering

Filter transactions based on datetime values.

In [None]:
# Filter transactions after a specific date
cutoff_date = datetime(2023, 7, 1)
recent_transactions = g.chain([
    n(edge_match={"timestamp": gt(pd.Timestamp(cutoff_date))})
])

print(f"Transactions after {cutoff_date}: {len(recent_transactions._edges)}")
recent_transactions._edges[['transaction_id', 'timestamp', 'amount']].head()

In [None]:
# Filter transactions in a specific month
march_2023 = g.chain([
    n(edge_match={
        "timestamp": between(
            datetime(2023, 3, 1),
            datetime(2023, 3, 31, 23, 59, 59)
        )
    })
])

print(f"Transactions in March 2023: {len(march_2023._edges)}")
march_2023._edges[['transaction_id', 'timestamp', 'amount']].head()

## 2. Date-Only Filtering

Filter based on dates, ignoring time components.

In [None]:
# Filter accounts created after a specific date
new_accounts = g.chain([
    n(filter_dict={
        "created_date": ge(date(2021, 1, 1))
    })
])

print(f"Accounts created after 2021: {len(new_accounts._nodes)}")
new_accounts._nodes[['account_id', 'created_date', 'account_type']].head()

In [None]:
# Find accounts active in the last 90 days
ninety_days_ago = datetime.now().date() - timedelta(days=90)
active_accounts = g.chain([
    n(filter_dict={
        "last_active": gt(pd.Timestamp(ninety_days_ago))
    })
])

print(f"Recently active accounts: {len(active_accounts._nodes)}")

## 3. Time-of-Day Filtering

Filter transactions based on time of day.

In [None]:
# Find transactions during business hours (9 AM - 5 PM)
business_hours = g.chain([
    n(edge_match={
        "transaction_time": between(
            time(9, 0, 0),
            time(17, 0, 0)
        )
    })
])

print(f"Business hour transactions: {len(business_hours._edges)}")
print(f"Percentage of total: {len(business_hours._edges) / len(g._edges) * 100:.1f}%")

In [None]:
# Find transactions at specific times (e.g., on the hour)
on_the_hour_times = [time(h, 0, 0) for h in range(24)]
on_hour_transactions = g.chain([
    n(edge_match={
        "transaction_time": is_in(on_the_hour_times)
    })
])

print(f"Transactions on the hour: {len(on_hour_transactions._edges)}")

## 4. Complex Temporal Queries

Combine temporal predicates with other filters for complex queries.

In [None]:
# Find large transactions (>$500) in Q4 2023
q4_large_transactions = g.chain([
    n(edge_match={
        "timestamp": between(
            datetime(2023, 10, 1),
            datetime(2023, 12, 31, 23, 59, 59)
        ),
        "amount": gt(500)
    })
])

print(f"Large Q4 2023 transactions: {len(q4_large_transactions._edges)}")
if len(q4_large_transactions._edges) > 0:
    print(f"Total value: ${q4_large_transactions._edges['amount'].sum():,.2f}")
    print(f"Average: ${q4_large_transactions._edges['amount'].mean():,.2f}")

In [None]:
# Multi-hop query: Find accounts that received money in the last 30 days
# and then sent money to business accounts
thirty_days_ago = datetime.now() - timedelta(days=30)

money_flow = g.chain([
    # Start with recent incoming transactions
    n(edge_match={
        "timestamp": gt(pd.Timestamp(thirty_days_ago))
    }),
    # Go to receiving accounts
    n(direction='destination'),
    # Find outgoing transactions from these accounts
    e_forward(edge_match={
        "timestamp": gt(pd.Timestamp(thirty_days_ago))
    }),
    # To business accounts
    n(filter_dict={"account_type": eq("business")})
])

print(f"Money flow pattern found: {len(money_flow._nodes)} business accounts")

## 5. Using Temporal Value Classes

Use explicit temporal value classes for more control.

In [None]:
# Create temporal values with specific properties
dt_value = DateTimeValue("2023-06-15T14:30:00", "UTC")
date_value = DateValue("2023-06-15")
time_value = TimeValue("14:30:00")

# Use in predicates
specific_datetime = g.chain([
    n(edge_match={"timestamp": gt(dt_value)})
])

print(f"Transactions after {dt_value.value}: {len(specific_datetime._edges)}")

## 6. Timezone-Aware Filtering

Handle timezone-aware datetime comparisons.

In [None]:
# Add timezone info to our data for this example
transactions_df_tz = transactions_df.copy()
transactions_df_tz['timestamp_utc'] = pd.to_datetime(transactions_df_tz['timestamp']).dt.tz_localize('UTC')
transactions_df_tz['timestamp_eastern'] = transactions_df_tz['timestamp_utc'].dt.tz_convert('US/Eastern')

g_tz = graphistry.edges(transactions_df_tz, 'source', 'target')

# Filter using Eastern time
eastern = pytz.timezone('US/Eastern')
eastern_cutoff = eastern.localize(datetime(2023, 7, 1, 9, 0, 0))  # 9 AM Eastern

eastern_morning = g_tz.chain([
    n(edge_match={"timestamp_eastern": gt(pd.Timestamp(eastern_cutoff))})
])

print(f"Transactions after 9 AM Eastern on July 1, 2023: {len(eastern_morning._edges)}")

## 7. Performance Tips

Demonstrate performance considerations for temporal filtering.

In [None]:
# Measure performance of different temporal operations
import time

# Method 1: Using pandas Timestamp (recommended)
start = time.time()
result1 = g.chain([
    n(edge_match={"timestamp": gt(pd.Timestamp("2023-07-01"))})
])
time1 = time.time() - start

# Method 2: Using datetime object
start = time.time()
result2 = g.chain([
    n(edge_match={"timestamp": gt(datetime(2023, 7, 1))})
])
time2 = time.time() - start

print(f"Performance comparison:")
print(f"  pd.Timestamp: {time1:.4f} seconds")
print(f"  datetime:     {time2:.4f} seconds")
print(f"  Results match: {len(result1._edges) == len(result2._edges)}")

## 8. Visualization with Temporal Filtering

Visualize subgraphs filtered by temporal predicates.

In [None]:
# Visualize recent high-value transactions
recent_high_value = g.chain([
    n(edge_match={
        "timestamp": gt(datetime.now() - timedelta(days=7)),
        "amount": gt(200)
    })
])

print(f"Recent high-value transaction network:")
print(f"  Nodes: {len(recent_high_value._nodes)}")
print(f"  Edges: {len(recent_high_value._edges)}")

# Uncomment to visualize (requires Graphistry API key)
# recent_high_value.plot()

## Summary

This notebook demonstrated:

1. **DateTime filtering** with `gt`, `lt`, `between` predicates
2. **Date-only filtering** for day-level granularity
3. **Time-of-day filtering** for patterns like business hours
4. **Complex queries** combining temporal and non-temporal predicates
5. **Multi-hop queries** with temporal constraints
6. **Temporal value classes** for explicit control
7. **Timezone-aware** filtering
8. **Performance considerations**

Temporal predicates in GFQL provide a powerful way to analyze time-series aspects of graph data, enabling complex temporal queries while maintaining the expressiveness of graph traversals.