In [1]:
!pip install requests pandas pytz

import requests
import pandas as pd
from datetime import datetime, timedelta
import pytz

# Airtable setup
API_KEY = "your_airtable_api_here"
BASE_ID = "app59RhWQEcM6gMLt"
TABLE_NAME = "Imported table"
URL = f"https://api.airtable.com/v0/{BASE_ID}/{TABLE_NAME}"
HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

# --- Step 1: Fetch records ---
def get_records():
    records = []
    offset = None
    while True:
        params = {"pageSize": 100}
        if offset:
            params["offset"] = offset
        response = requests.get(URL, headers=HEADERS, params=params).json()
        records.extend(response.get("records", []))
        offset = response.get("offset")
        if not offset:
            break
    return records

# --- Step 2: Flag and update ---
def flag_and_update_records(records, days_threshold=30):
    flagged = []
    now = datetime.now(pytz.utc).replace(hour=0, minute=0, second=0, microsecond=0)

    for record in records:
        fields = record.get("fields", {})
        name = fields.get("Full Name", "Unknown")
        linkedin = fields.get("LinkedIn Profile", "")
        parsed_str = fields.get("Last Parsed Time")
        record_id = record["id"]

        # Parse date
        if parsed_str:
            try:
                parsed_dt = datetime.fromisoformat(parsed_str.replace("Z", "+00:00"))
                parsed_dt = parsed_dt.replace(hour=0, minute=0, second=0, microsecond=0)
                days_ago = (now - parsed_dt).days
            except:
                parsed_dt = None
                days_ago = None
        else:
            parsed_dt = None
            days_ago = None

        # Determine need to re-parse
        needs_reparse = (not parsed_dt) or (days_ago > days_threshold)

        # PATCH Airtable
        patch_payload = {
            "records": [
                {
                    "id": record_id,
                    "fields": {
                        "Needs Re-Parse": "Yes" if needs_reparse else "No"
                    }
                }
            ]
        }
        requests.patch(URL, headers=HEADERS, json=patch_payload)

        # Track flagged
        if needs_reparse:
            flagged.append({
                "Full Name": name,
                "LinkedIn Profile": linkedin or "Missing",
                "Last Parsed Time": parsed_str or "Never",
                "Days Since Parsed": days_ago if days_ago is not None else "N/A"
            })

    return pd.DataFrame(flagged)

# --- Step 3: Run full update ---
records = get_records()
df_flagged = flag_and_update_records(records)

if df_flagged.empty:
    print("All records are recently parsed!")
else:
    print(f"⚠️ {len(df_flagged)} records need re-parsing.")
    from IPython.display import display
    display(df_flagged)
    df_flagged.to_csv("records_to_reparse.csv", index=False)
    print("CSV saved as: records_to_reparse.csv")


All records are recently parsed!


In [None]:
# Test demo for date change of Anshu Arora

!pip install requests pandas pytz python-dateutil

import requests
import pandas as pd
from datetime import datetime, timedelta
from dateutil import parser
import pytz

# Airtable setup
API_KEY = "patOg2fel0UFxwdO9.6545502b9a164fd10c8cfaa732df841e5c310dcb1ac96bb9de3630bf5360e4e6"
BASE_ID = "app59RhWQEcM6gMLt"
TABLE_NAME = "Imported table"
URL = f"https://api.airtable.com/v0/{BASE_ID}/{TABLE_NAME}"
HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

# --- Get Records (first 10 only for speed) ---
def get_records(limit=10):
    params = {"pageSize": limit}
    response = requests.get(URL, headers=HEADERS, params=params)
    return response.json().get("records", [])

# --- Parse & Update ---
def test_and_update(records, days_threshold=30):
    flagged = []
    now = datetime.now(pytz.utc)

    for record in records:
        fields = record.get("fields", {})
        name = fields.get("Full Name", "Unknown")
        raw_time = fields.get("Last Parsed Time")
        record_id = record["id"]

        # Parse human-readable date like "5/4/2024 5:00pm"
        try:
            parsed_dt = parser.parse(raw_time)
            days_ago = (now - parsed_dt).days
        except:
            parsed_dt = None
            days_ago = None

        needs_reparse = (not parsed_dt) or (days_ago > days_threshold)
        result_str = "Yes" if needs_reparse else "No"

        # Airtable update
        patch_payload = {
            "records": [
                {
                    "id": record_id,
                    "fields": {
                        "Needs Re-Parse": result_str
                    }
                }
            ]
        }
        requests.patch(URL, headers=HEADERS, json=patch_payload)

        flagged.append({
            "Full Name": name,
            "Parsed Time": raw_time,
            "Days Since": days_ago,
            "Needs Re-Parse": result_str
        })

    return pd.DataFrame(flagged)

# --- Run ---
records = get_records(limit=10)
df_results = test_and_update(records)

print("Test results for first 10 records:")
from IPython.display import display
display(df_results)


🧪 Test results for first 10 records:


Unnamed: 0,Full Name,Parsed Time,Days Since,Needs Re-Parse
0,Anshu Arora,2023-05-05T00:33:00.000Z,731,Yes
1,Max Kaiser,2025-05-05T00:33:50.150Z,0,No
2,Vania Revelina,2025-05-05T00:33:50.381Z,0,No
3,Kimberly Rodriguez,2025-05-05T00:33:50.611Z,0,No
4,Teva Groulx,2025-05-05T00:33:50.884Z,0,No
5,Kyle Knebel,2025-05-05T00:33:51.115Z,0,No
6,Jonathan Benet,2025-05-05T00:33:51.337Z,0,No
7,Ryan Michelson,2025-05-05T00:33:51.548Z,0,No
8,Jenna Vannett,2025-05-05T00:33:51.756Z,0,No
9,Kit Johnston,2025-05-05T00:33:51.990Z,0,No
