In [1]:
from wrds2pg import get_modified_str, wrds_to_csv
from datetime import datetime, timezone
from zoneinfo import ZoneInfo
import os
import time
from pathlib import Path

In [2]:
def modified_encode(modified):
    date_time_str = modified.split("Last modified: ")[1]
    mtimestamp = datetime \
                .strptime(date_time_str, "%m/%d/%Y %H:%M:%S") \
                .replace(tzinfo=ZoneInfo("America/Chicago")) \
                .astimezone(timezone.utc) \
                .timestamp()
    return mtimestamp

In [3]:
def modified_decode(modified_time):
    utc_dt = datetime.fromtimestamp(modified_time)
    last_modified = utc_dt \
                      .astimezone(ZoneInfo("America/Chicago")) \
                      .strftime("Last modified: %m/%d/%Y %H:%M:%S")
    return(last_modified)

In [4]:
def get_modified_csv(file_name):
    
    utc_dt=datetime.fromtimestamp(os.path.getmtime(file_name))
    last_modified = utc_dt \
                      .astimezone(ZoneInfo("America/Chicago")) \
                      .strftime("Last modified: %m/%d/%Y %H:%M:%S")
    return last_modified

In [5]:
def set_modified_csv(file_name, last_modified):
    mtimestamp = modified_encode(last_modified)
    current_time = time.time()  
    os.utime(file_name, times = (current_time, mtimestamp))    
    return True

In [6]:
def wrds_update_csv(table_name, schema,  
                    data_dir=os.getenv("CSV_DIR"),
                    host=os.getenv("PGHOST"), dbname=os.getenv("PGDATABASE"), 
                    engine=None, 
                    wrds_id=os.getenv("WRDS_ID"), 
                    force=False, fix_missing=False, fix_cr=False,
                    drop="", keep="", obs="", rename="", alt_table_name=None,
                    encoding=None, col_types=None,
    sas_schema=None, sas_encoding=None):

    if not alt_table_name:
        alt_table_name = table_name
    
    if not encoding:
        encoding = "utf-8"

    if not sas_schema:
        sas_schema = schema
        
    schema_dir = Path(data_dir, schema)
    
    if not os.path.exists(schema_dir):
        os.makedirs(schema_dir)
    
    csv_file = Path(data_dir, schema, alt_table_name).with_suffix('.csv.gz')
    modified = get_modified_str(table_name, sas_schema, wrds_id)
    
    if os.path.exists(csv_file):
        csv_modified = get_modified_csv(csv_file)
    else:
        csv_modified = ""
    if modified == csv_modified and not force:
        print(schema + "." + table_name + " already up to date")
        return False
    if force:
        print("Forcing update based on user request.")
    else:
        print("Updated %s.%s is available." % (schema, table_name))
        print("Getting from WRDS.\n")
    wrds_to_csv(table_name=table_name, 
                schema=schema, 
                csv_file=csv_file,
                wrds_id=wrds_id, 
                fix_missing=fix_missing, 
                fix_cr=fix_cr,
                drop=drop, keep=keep,
                obs=obs, rename=rename,
                encoding=encoding,
                sas_schema=sas_schema, 
                sas_encoding=sas_encoding)
    set_modified_csv(csv_file, modified)
    return True

In [7]:
csv_path = "/Users/iangow/Library/CloudStorage/Dropbox/data"
csv_file = Path(csv_path, "crsp", "dsi.csv.gz")

In [8]:
wrong_modified = get_modified_str("dsf", "crsp", wrds_id = "iangow")
set_modified_csv(csv_file, wrong_modified)
get_modified_csv(csv_file)

'Last modified: 01/17/2023 18:28:41'

In [9]:
wrds_update_csv("dsi", "crsp", data_dir=csv_path, wrds_id="iangow")
get_modified_csv(csv_file)

Updated crsp.dsi is available.
Getting from WRDS.



'Last modified: 01/17/2023 18:08:48'

In [10]:
get_modified_str("dsi", "crsp", wrds_id = "iangow")
wrds_update_csv("dsi", "crsp", data_dir=csv_path, wrds_id="iangow")

crsp.dsi already up to date


False