## Get Baseline Logic to Copy GMail Messages to AWS s3

In [1]:
import boto3
import pickle


def get_creds():
    sm_client = boto3.client(
        'secretsmanager',
        region_name='us-east-1'
    )

    secret_token = sm_client.get_secret_value(SecretId='gmail_token')['SecretBinary']
    creds = pickle.loads(secret_token)
    return creds

In [2]:
from googleapiclient.discovery import build

creds = get_creds()
service = build('gmail', 'v1', credentials=creds)

In [3]:
users = service.users()

In [4]:
def get_job_details(job_name):
    dynamodb = boto3.resource('dynamodb')
    table = dynamodb.Table('gmail_jobs')
    job_details = table.get_item(Key={'job_id': job_name})['Item']
    return job_details

In [5]:
job_details = get_job_details('gmail_ingest')
job_details

{'job_description': 'Ingest data from gmail to s3',
 'is_active': 'Y',
 'job_id': 'gmail_ingest',
 'baseline_days': Decimal('45'),
 'job_run_bookmark_details': {'last_run_max_message_id': '17e144500cc9efec',
  'last_run_start_time_epoch': Decimal('1648492200'),
  'last_run_end_time_epoch': Decimal('1648578600')}}

In [6]:
baseline_days = job_details['baseline_days']
baseline_days

Decimal('45')

In [7]:
import datetime

In [8]:
start_time = datetime.datetime.now().date() - datetime.timedelta(days=int(baseline_days))
start_time

datetime.date(2022, 3, 30)

In [9]:
end_time = start_time + datetime.timedelta(days=1)
end_time

datetime.date(2022, 3, 31)

In [10]:
import time

In [11]:
start_time_epoch = int(time.mktime(start_time.timetuple()))
start_time_epoch

1648578600

In [12]:
end_time_epoch = int(time.mktime(end_time.timetuple()))   
end_time_epoch

1648665000

In [13]:
job_details = {
    'job_id': 'gmail_ingest',
    'job_description': 'Ingest data from gmail to s3',
    'is_active': 'Y',
    'baseline_days': 45,
    'job_run_bookmark_details': {
        'last_run_max_message_id': '17e144500cc9efec',
        'last_run_start_time_epoch': 1648492200,
        'last_run_end_time_epoch': 1648578600
    }
}

In [14]:
dynamodb = boto3.resource('dynamodb')

In [15]:
table = dynamodb.Table('gmail_jobs')

In [16]:
table.put_item(Item=job_details)

{'ResponseMetadata': {'RequestId': 'RA3JK774KNTGNEU3252NPTUNM3VV4KQNSO5AEMVJF66Q9ASUAAJG',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'Server',
   'date': 'Sat, 14 May 2022 08:01:25 GMT',
   'content-type': 'application/x-amz-json-1.0',
   'content-length': '2',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'RA3JK774KNTGNEU3252NPTUNM3VV4KQNSO5AEMVJF66Q9ASUAAJG',
   'x-amz-crc32': '2745614147'},
  'RetryAttempts': 0}}

In [17]:
table.get_item(Key={'job_id': 'gmail_ingest'})

{'Item': {'job_description': 'Ingest data from gmail to s3',
  'is_active': 'Y',
  'job_id': 'gmail_ingest',
  'baseline_days': Decimal('45'),
  'job_run_bookmark_details': {'last_run_max_message_id': '17e144500cc9efec',
   'last_run_start_time_epoch': Decimal('1648492200'),
   'last_run_end_time_epoch': Decimal('1648578600')}},
 'ResponseMetadata': {'RequestId': '379PDCHLUIVP3496LHJBIIEVQBVV4KQNSO5AEMVJF66Q9ASUAAJG',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'Server',
   'date': 'Sat, 14 May 2022 08:01:25 GMT',
   'content-type': 'application/x-amz-json-1.0',
   'content-length': '322',
   'connection': 'keep-alive',
   'x-amzn-requestid': '379PDCHLUIVP3496LHJBIIEVQBVV4KQNSO5AEMVJF66Q9ASUAAJG',
   'x-amz-crc32': '2826147811'},
  'RetryAttempts': 0}}

In [18]:
job_run_bookmark_details = table.get_item(Key={'job_id': 'gmail_ingest'})['Item']['job_run_bookmark_details']

In [19]:
job_run_bookmark_details

{'last_run_max_message_id': '17e144500cc9efec',
 'last_run_start_time_epoch': Decimal('1648492200'),
 'last_run_end_time_epoch': Decimal('1648578600')}

In [20]:
last_run_start_time_epoch = int(job_run_bookmark_details['last_run_start_time_epoch'])

In [21]:
last_run_end_time_epoch = int(job_run_bookmark_details['last_run_end_time_epoch'])

In [22]:
import datetime

In [23]:
datetime.datetime.now().date() - datetime.date(2022, 5, 1)

datetime.timedelta(days=13)

In [24]:
datetime.datetime.fromtimestamp(last_run_end_time_epoch)

datetime.datetime(2022, 3, 30, 0, 0)

In [25]:
last_run_diff = datetime.datetime.now().date() - datetime.datetime.fromtimestamp(last_run_end_time_epoch).date()

In [26]:
last_run_diff.days

45

In [27]:
if last_run_diff.days > 1:
    start_time_epoch = last_run_end_time_epoch
    end_time = datetime.datetime.fromtimestamp(start_time_epoch).date() + datetime.timedelta(days=1)
    end_time_epoch = int(time.mktime(end_time.timetuple()))
else:
    start_time_epoch = end_time_epoch
    end_time_epoch = int(time.mktime(datetime.datetime.now().timetuple()))

In [28]:
start_time_epoch

1648578600

In [29]:
end_time_epoch

1648665000

In [30]:
message_ids = users. \
    messages(). \
    list(
        userId='me', 
        q=f'after:{start_time_epoch} before:{end_time_epoch}'
    ). \
    execute()['messages']

In [31]:
message_ids

[{'id': '17fdc0b349836c2f', 'threadId': '17fdc0b349836c2f'},
 {'id': '17fdc0139bb6ef0e', 'threadId': '17fdc0139bb6ef0e'},
 {'id': '17fdbf69c6a845ea', 'threadId': '17fdbf69c6a845ea'},
 {'id': '17fdbd3a5bbc0550', 'threadId': '17fdbd3a5bbc0550'},
 {'id': '17fdbd0a3a385f4c', 'threadId': '17fdbd0a3a385f4c'},
 {'id': '17fdbc81ba38885c', 'threadId': '17fdbc81ba38885c'},
 {'id': '17fdbb8fdc3029e0', 'threadId': '17fdbb8fdc3029e0'},
 {'id': '17fdbb202cec0aec', 'threadId': '17fdbb202cec0aec'},
 {'id': '17fdba6487207633', 'threadId': '17fdba6487207633'},
 {'id': '17fdb9f619d8c36d', 'threadId': '17fdb9f619d8c36d'},
 {'id': '17fdb9ad57f34dbe', 'threadId': '17fdb9ad57f34dbe'},
 {'id': '17fdb8185604211e', 'threadId': '17fdb8185604211e'},
 {'id': '17fdb75066750fce', 'threadId': '17fdb75066750fce'},
 {'id': '17fdb64366fad0c4', 'threadId': '17fdb64366fad0c4'},
 {'id': '17fdb4f1cd3f25a7', 'threadId': '17fdb4f1cd3f25a7'},
 {'id': '17fdb4c53bf4c77b', 'threadId': '17fdb4c53bf4c77b'},
 {'id': '17fdb383418f4b9