# Notebook for updating dateset timestamps
Amazon Fraud Detector only retain 18 months of data for ingested events. This notebook provide functions to shift dateset timestamps to most recent months. 

In [3]:
import pandas as pd
from datetime import datetime, timezone, timedelta
import glob
import zipfile
import os

def update_timestamp(file):
    # Input: 
    #     file: file_path to csv 

    df = pd.read_csv(file,
                dtype='object',
                keep_default_na=False,
                na_values='')

    df['EVENT_TIMESTAMP'] = pd.to_datetime(df['EVENT_TIMESTAMP'])
    min_dt = min(df['EVENT_TIMESTAMP'])
    max_dt = max(df['EVENT_TIMESTAMP'])
    
    if 'LABEL_TIMESTAMP' in df.columns:
        df['LABEL_TIMESTAMP'] = pd.to_datetime(df['LABEL_TIMESTAMP'])
        min_dt = min(min_dt, df['LABEL_TIMESTAMP'].min())
        max_dt = max(max_dt, df['LABEL_TIMESTAMP'].max())
        
    print('Orignal dates')
    print(min_dt, max_dt)
    
    tz_info = max_dt.tzinfo

    assert max_dt-min_dt<timedelta(days=547)

    time_diff = datetime.now(tz_info)-max_dt-timedelta(days=1)

    df['EVENT_TIMESTAMP'] = df['EVENT_TIMESTAMP'] + time_diff
    print('Updated dates')
    print(df['EVENT_TIMESTAMP'].min(), df['EVENT_TIMESTAMP'].max())
    if 'LABEL_TIMESTAMP' in df.columns:
        df['LABEL_TIMESTAMP'] = df['LABEL_TIMESTAMP'] + time_diff
        print(df['LABEL_TIMESTAMP'].min(), df['LABEL_TIMESTAMP'].max())
        
    df['EVENT_TIMESTAMP'] = df['EVENT_TIMESTAMP'].dt.strftime("%Y-%m-%dT%H:%M:%SZ")
    if 'LABEL_TIMESTAMP' in df.columns:
        df['LABEL_TIMESTAMP'] = df['LABEL_TIMESTAMP'].dt.strftime("%Y-%m-%dT%H:%M:%SZ")
        
    
    df.to_csv(file,index=False)
    return 'SUCCESS'



In [None]:
update_timestamp('registration_data_20K_full.csv')

update_timestamp('registration_data_20K_minimum.csv')

update_timestamp('transaction_data_100K_full.csv')

with zipfile.ZipFile("ato_data_800K_full.csv.zip","r") as zip_ref:
    zip_ref.extractall(".")
update_timestamp('ato_data_800K_full.csv')
zipfile.ZipFile('ato_data_800K_full.csv.zip', mode='w').write("ato_data_800K_full.csv", compress_type=zipfile.ZIP_DEFLATED)
os.remove('ato_data_800K_full.csv')

Orignal dates
2021-09-25 13:34:54+00:00 2022-09-25 18:20:17+00:00
Updated dates
2021-09-25 13:35:45.043296+00:00 2022-09-25 18:21:08.043296+00:00
Orignal dates
2021-09-25 13:07:17+00:00 2022-09-25 18:20:17+00:00
Updated dates
2021-09-25 13:08:08.209699+00:00 2022-09-25 18:21:08.209699+00:00
Orignal dates
2022-05-29 18:20:18+00:00 2022-09-25 18:20:18+00:00
Updated dates
2022-05-29 18:21:09.000268+00:00 2022-09-25 18:21:09.000268+00:00
2022-05-29 18:21:09.000268+00:00 2022-09-25 18:21:09.000268+00:00
Orignal dates
2022-03-28 03:59:24+00:00 2022-09-25 18:20:23+00:00
Updated dates
2022-03-28 04:00:14.662085+00:00 2022-09-25 03:59:32.662085+00:00
2022-03-31 13:16:27.662085+00:00 2022-09-25 18:21:13.662085+00:00
