# About
* **Author**: Adil Rashitov (adil@wastelabs.co)
* **Created at**: 07.09.2022

In [None]:
# Imports / Configs / Global vars

# Import of native python tools
import os
import json
from functools import reduce

# Import of base ML stack libs
import numpy as np
import sklearn as sc

# Logging configuraiton
import logging
logging.basicConfig(format='[ %(asctime)s ][ %(levelname)s ]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
logger = logging.getLogger()
logger.setLevel(logging.INFO)


# Ipython configs
from IPython.core.display import display, HTML
from IPython.core.interactiveshell import InteractiveShell
display(HTML("<style>.container { width:100% !important; }</style>"))
InteractiveShell.ast_node_interactivity = 'all'

# Pandas configs
import pandas as pd
pd.options.display.max_rows = 350
pd.options.display.max_columns = 250

# Jupyter configs
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

# GLOBAL VARS
from pathlib import Path
import sys
PROJECT_DIR = os.getcwd().rsplit('/', 1)[0]
SRC_DIR = PROJECT_DIR 


if SRC_DIR not in sys.path:
    sys.path.append(SRC_DIR)

if PROJECT_DIR not in sys.path:
    sys.path.append(PROJECT_DIR)

# Main

In [None]:
SRC_S3_FPATH = "s3://dev-data-temp/dev_kpi_calculation_platform/dev/01_raw/test_stops.xlsx"

## Dev

1. Read source file
2. Append `upload_time` & `filename`
3. Query data
4. Export to S3

In [None]:
from src.aws_lambda.kpi import Session


session = Session(SRC_S3_FPATH)
session.read_stops()
session.process_stops()

In [None]:
from datetime import datetime
from typing import Literal

import awswrangler as wr
import boto3


In [None]:
def export_stops(stops: pd.DataFrame) -> pd.DataFrame:
    wr.s3.to_parquet(
        df=session.stops.copy(),
        boto3_session=boto3.Session(region_name="ap-southeast-1"),
        path="s3://dev-data-temp/dev_kpi_calculation_platform/dev/02_intermediate/stops/",
        index=False,
        dataset=True,
        sanitize_columns=True,
        database="kpi_calculation_platform",
        table="dev_stops",
        partition_cols=["processing_id"],
        use_threads=True,
        mode="overwrite",
    )