In [1]:
import boto3
import pandas as pd
from io import StringIO, BytesIO
from datetime import datetime, timedelta

In [None]:
# Adapter Layer
def read_csv_to_df(bucket, key, decoding='utf-8', sep=','):
    csv_obj = bucket.Object(key=key).get().get('Body').read().decode(decoding)
    data = StringIO(csv_obj)
    df=pd.read_csv(data, delimiter=sep)
    return df 

def write_df_to_s3(bucket, df, key): 
    out_buffer = BytesIO()
    df.to_parquet(out_buffer, index=False)
    bucket_target = s3.Bucket('xetra-edp')
    bucket.put_object(Body=out_buffer.getvalue(), Key=key)
    return True

def return_objects(bucket, arg_date):
    min_date = datetime.strptime(arg_date, '%Y-%m-%d').date()-timedelta(days=1)
    objects = [obj.key for obj in bucket.objects.all() if datetime.strptime(obj.key.split('/')[0], '%Y-%m-%d').date() >= min_date]
    return objects

In [3]:
s3 = boto3.resource('s3')
bucket = s3.Bucket('deutsche-boerse-xetra-pds')

In [None]:
# Application layer
def extract(bucket, objects):
    df_all=pd.concat([read_csv_to_df(bucket, obj) for obj in objects], ignore_index=True)
    return df

def transform_report1(df, columns, arg_date):
    df=df.loc(:, columns)
    df.dropna(inplace=True)
    df['opening_price'] = df.sort_values(by=['Time']).groupby(['ISIN', 'Date'])['StartPrice'].transform('first')
    df['closing_price'] = df.sort_values(by=['Time']).groupby(['ISIN', 'Date'])['StartPrice'].transform('last')
    df = df.groupby(['ISIN', 'Date'], as_index=False).agg(opening_price_eur=('opening_price', 'min'), closing_price_eur=('closing_price', 'min'), minimum_price_eur=('MinPrice', 'min'), maximum_price_eur=('MaxPrice', 'max'), daily_traded_volume=('TradedVolume', 'sum'))
    df = df.groupby(['ISIN', 'Date'], as_index=False).agg(opening_price_eur=('opening_price', 'min'), closing_price_eur=('closing_price', 'min'), minimum_price_eur=('MinPrice', 'min'), maximum_price_eur=('MaxPrice', 'max'), daily_traded_volume=('TradedVolume', 'sum'))
    df['change_prev_closing_%']=(df['closing_price_eur']-df['prev_closing_price'])/df['prev_closing_price']*100
    df.drop(columns=['prev_closing_price'], inplace=True)
    df=df_all.round(decimals=2)
    df=df_all[df.Date >= arg_date]
    return df

def load(bucket, df, trg_key, trg_format):
    key = trg_key + datetime.today().strftime("%Y%m%d_%H%M%S") + trg_format
    write_df_to_s3(bucket, df, key)
    return true

def etl_report1():
    df=extract(bucket, objects)
    df=transform_report1(df, columns, arg_date)
    load(bucket, df, trg_key, )

In [7]:
df=pd.DataFrame(columns=df_init.columns)
for obj in objects:
    csv_obj = bucket.Object(key=obj.key).get().get('Body').read().decode('utf-8')
    data = StringIO(csv_obj)
    df=pd.read_csv(data, delimiter=',')
    df = df.append(df, ignore_index=True)
    

In [9]:
df_init.columns

Index(['ISIN', 'Mnemonic', 'SecurityDesc', 'SecurityType', 'Currency',
       'SecurityID', 'Date', 'Time', 'StartPrice', 'MaxPrice', 'MinPrice',
       'EndPrice', 'TradedVolume', 'NumberOfTrades'],
      dtype='object')

In [10]:
Columns = ['ISIN', 'Date', 'Time', 'StartPrice', 'MaxPrice', 'MinPrice',
       'EndPrice', 'TradedVolume']

## Write to S3

In [22]:

out_buffer = BytesIO()
df_all.to_parquet(out_buffer, index=False)
bucket_target = s3.Bucket('xetra-edp')
bucket_target.put_object(Body=out_buffer.getvalue(), Key=key)

s3.Object(bucket_name='xetra-edp', key='xetra_daily_report_20211230_083546.parquet')

## Reading the uploaded file

In [23]:
for obj in bucket_target.objects.all():
    print(obj.key)

xetra_daily_report_20211230_083546.parquet


In [25]:
prq_obj = bucket_target.Object(key='xetra_daily_report_20211230_083546.parquet').get().get('Body').read()
data = BytesIO(prq_obj)
df_report=pd.read_parquet(data)

In [26]:
df_report

Unnamed: 0,ISIN,Date,opening_price_eur,closing_price_eur,minimum_price_eur,maximum_price_eur,daily_traded_volume,change_prev_closing_%
0,AT000000STR1,2021-12-29,36.20,36.60,36.20,36.70,592,0.00
1,AT000000STR1,2021-12-30,36.85,36.75,36.40,36.85,135,0.41
2,AT00000FACC2,2021-12-29,7.35,7.28,7.21,7.36,1479,-1.75
3,AT0000606306,2021-12-29,25.56,25.90,25.56,25.90,926,2.13
4,AT0000606306,2021-12-30,26.00,25.70,25.70,26.00,188,-0.77
...,...,...,...,...,...,...,...,...
6133,XS2314659447,2021-12-30,8.53,8.53,8.53,8.53,0,0.79
6134,XS2314660700,2021-12-29,17.81,17.77,17.73,17.81,69,-0.74
6135,XS2314660700,2021-12-30,18.04,17.99,17.99,18.04,0,1.27
6136,XS2376095068,2021-12-29,42.16,42.00,41.62,42.16,0,-1.74
