In [59]:
# v2: adds argument date to allow user to specify the date from which they want the report generated going forward

In [5]:
import boto3
import pandas as pd

# Neccessary for reading csv's off AWS Object Storage
from io import StringIO

# The timedelta module allows for easy selection of previous or later dates
from datetime import datetime as dt
from datetime import timedelta as td

In [36]:
# The date from which we want to pull all date going forward
arg_date = '2022-12-28'

In [37]:
# Input date needs to be arg_date-1 to populate prev_closing_price for arg_date, and needs to be DATETIME formate

# Convert string 'arg_date' into datetime
# datetime converts string into date and time, so the .date() func isolates date only
# timedelta lets you specify a timeframe to add/subtract from object

arg_date_dt = dt.strptime(arg_date, '%Y-%m-%d').date() - td(days=1)

In [38]:
arg_date_dt

datetime.date(2022, 12, 27)

In [39]:
s3 = boto3.resource('s3')
bucket = s3.Bucket('xetra-1234')

# Create list comp. to grab all object keys where date string is >= to arg_date_dt
# The code "if dt.strptime(obj.key.split('/')[0], '%Y-%m-%d').date() >= arg_date_dt]" does 2 things:
#    1. "dt.strptime(obj.key,.split('/')[0]" --> splits obj.key string at '/' and uses first item which is the YYYY-MM-DD,
#        and converts it datetime
#    2. ".date() >= arg_date_dt" --> isolates the date portion of the datetime object and compares it to arg_date_dt

objects = [obj for obj in bucket.objects.all() if dt.strptime(obj.key.split('/')[0], '%Y-%m-%d').date() >= arg_date_dt]


In [40]:
objects

[s3.ObjectSummary(bucket_name='xetra-1234', key='2022-12-27/2022-12-25_BINS_XETR00.csv'),
 s3.ObjectSummary(bucket_name='xetra-1234', key='2022-12-27/2022-12-25_BINS_XETR01.csv'),
 s3.ObjectSummary(bucket_name='xetra-1234', key='2022-12-27/2022-12-25_BINS_XETR02.csv'),
 s3.ObjectSummary(bucket_name='xetra-1234', key='2022-12-27/2022-12-25_BINS_XETR03.csv'),
 s3.ObjectSummary(bucket_name='xetra-1234', key='2022-12-27/2022-12-25_BINS_XETR04.csv'),
 s3.ObjectSummary(bucket_name='xetra-1234', key='2022-12-27/2022-12-25_BINS_XETR05.csv'),
 s3.ObjectSummary(bucket_name='xetra-1234', key='2022-12-27/2022-12-25_BINS_XETR06.csv'),
 s3.ObjectSummary(bucket_name='xetra-1234', key='2022-12-27/2022-12-25_BINS_XETR07.csv'),
 s3.ObjectSummary(bucket_name='xetra-1234', key='2022-12-27/2022-12-25_BINS_XETR08.csv'),
 s3.ObjectSummary(bucket_name='xetra-1234', key='2022-12-27/2022-12-25_BINS_XETR09.csv'),
 s3.ObjectSummary(bucket_name='xetra-1234', key='2022-12-27/2022-12-25_BINS_XETR10.csv'),
 s3.Object

In [41]:
# Temp df to obtain column names
csv_obj_init = bucket.Object(key=objects[0].key).get().get('Body').read().decode('utf-8')
data = StringIO(csv_obj_init)
df_init = pd.read_csv(data, delimiter=',')

In [42]:
df_init.columns

Index(['ISIN', 'Mnemonic', 'SecurityDesc', 'SecurityType', 'Currency',
       'SecurityID', 'Date', 'Time', 'StartPrice', 'MaxPrice', 'MinPrice',
       'EndPrice', 'TradedVolume', 'NumberOfTrades'],
      dtype='object')

In [43]:
# Func to read all csv files and concatenate into single df
df_all = pd.DataFrame(columns=df_init.columns)

for obj in objects:
    csv_obj = bucket.Object(key=obj.key).get().get('Body').read().decode('utf-8')
    data = StringIO(csv_obj)
    df = pd.read_csv(data, delimiter=',')
    df_all = pd.concat(objs=[df_all, df], ignore_index=True)

In [44]:
# Remove unused columns from df_all
columns = ['ISIN', 'Date', 'Time', 'StartPrice', 'MaxPrice', 'MinPrice', 'EndPrice', 'TradedVolume']
df_all = df_all.loc[:, columns]
df_all

Unnamed: 0,ISIN,Date,Time,StartPrice,MaxPrice,MinPrice,EndPrice,TradedVolume
0,AT0000A0E9W5,2022-12-27,08:00,14.020,14.270,14.020,14.270,1466
1,DE000A0D6554,2022-12-27,08:00,13.840,13.850,13.790,13.820,26486
2,DE000A0D9PT0,2022-12-27,08:00,197.350,197.550,197.000,197.250,2150
3,DE000A0JL9W6,2022-12-27,08:00,63.000,63.250,62.900,62.900,2077
4,DE000A0LD6E6,2022-12-27,08:00,65.600,65.600,65.600,65.600,1801
...,...,...,...,...,...,...,...,...
664085,GB00BLD4ZP54,2022-12-31,16:46,19.324,19.324,19.324,19.324,0
664086,LU1923627332,2022-12-31,16:52,12.400,12.400,12.400,12.400,2645
664087,US98956P1021,2022-12-31,20:30,113.100,113.100,113.100,113.100,0
664088,US9224171002,2022-12-31,20:30,24.600,24.600,24.600,24.600,0


# Transformations

### Get opening price per ISIN and day

In [45]:
df_all['OpeningPrice'] = df_all.sort_values(by=['Time']).groupby(['ISIN', 'Date'])['StartPrice'].transform('first')

In [46]:
df_all[df_all['ISIN']=='DE000A0DJ6J9']

Unnamed: 0,ISIN,Date,Time,StartPrice,MaxPrice,MinPrice,EndPrice,TradedVolume,OpeningPrice
179,DE000A0DJ6J9,2022-12-27,08:02,29.84,29.86,29.52,29.62,4250,29.84
376,DE000A0DJ6J9,2022-12-27,08:03,29.62,29.72,29.56,29.72,1944,29.84
555,DE000A0DJ6J9,2022-12-27,08:04,29.58,29.58,28.90,28.90,26093,29.84
2642,DE000A0DJ6J9,2022-12-27,08:05,28.88,28.88,28.54,28.54,727,29.84
2969,DE000A0DJ6J9,2022-12-27,08:06,28.54,28.54,28.40,28.40,775,29.84
...,...,...,...,...,...,...,...,...,...
659266,DE000A0DJ6J9,2022-12-31,16:26,35.58,35.60,35.52,35.60,1435,29.00
659640,DE000A0DJ6J9,2022-12-31,16:27,35.60,35.60,35.58,35.58,1544,29.00
659939,DE000A0DJ6J9,2022-12-31,16:28,35.60,35.68,35.60,35.66,1553,29.00
660292,DE000A0DJ6J9,2022-12-31,16:29,35.66,35.68,35.58,35.60,1341,29.00


### Get closing price per ISIN and day

In [47]:
df_all['ClosingPrice'] = df_all.sort_values(by=['Time']).groupby(['ISIN', 'Date'])['EndPrice'].transform('last')

In [48]:
df_all[df_all['ISIN']=='DE000A0DJ6J9']

Unnamed: 0,ISIN,Date,Time,StartPrice,MaxPrice,MinPrice,EndPrice,TradedVolume,OpeningPrice,ClosingPrice
179,DE000A0DJ6J9,2022-12-27,08:02,29.84,29.86,29.52,29.62,4250,29.84,29.74
376,DE000A0DJ6J9,2022-12-27,08:03,29.62,29.72,29.56,29.72,1944,29.84,29.74
555,DE000A0DJ6J9,2022-12-27,08:04,29.58,29.58,28.90,28.90,26093,29.84,29.74
2642,DE000A0DJ6J9,2022-12-27,08:05,28.88,28.88,28.54,28.54,727,29.84,29.74
2969,DE000A0DJ6J9,2022-12-27,08:06,28.54,28.54,28.40,28.40,775,29.84,29.74
...,...,...,...,...,...,...,...,...,...,...
659266,DE000A0DJ6J9,2022-12-31,16:26,35.58,35.60,35.52,35.60,1435,29.00,35.54
659640,DE000A0DJ6J9,2022-12-31,16:27,35.60,35.60,35.58,35.58,1544,29.00,35.54
659939,DE000A0DJ6J9,2022-12-31,16:28,35.60,35.68,35.60,35.66,1553,29.00,35.54
660292,DE000A0DJ6J9,2022-12-31,16:29,35.66,35.68,35.58,35.60,1341,29.00,35.54


### Aggregations

In [49]:
df_all = df_all.groupby(['ISIN', 'Date'], as_index=False).agg(opening_price_eur=('OpeningPrice', 'min'), 
                                                              closing_price_eur=('ClosingPrice', 'min'), 
                                                              minimum_price_eur=('MinPrice', 'min'),
                                                              maximum_price_eru=('MaxPrice','max'),
                                                              daily_traded_volume=('TradedVolume','sum'))

In [50]:
df_all

Unnamed: 0,ISIN,Date,opening_price_eur,closing_price_eur,minimum_price_eur,maximum_price_eru,daily_traded_volume
0,AT000000STR1,2022-12-27,36.1000,37.7000,36.1000,37.7000,2864
1,AT000000STR1,2022-12-28,36.6000,36.7000,35.7500,36.7000,1773
2,AT000000STR1,2022-12-29,36.6000,36.7000,35.7500,36.7000,1773
3,AT000000STR1,2022-12-30,36.6000,36.7000,35.7500,36.7000,1773
4,AT000000STR1,2022-12-31,36.6000,36.7000,35.7500,36.7000,1773
...,...,...,...,...,...,...,...
16070,XS2434891219,2022-12-27,3.4404,3.5034,3.4404,3.5034,0
16071,XS2434891219,2022-12-28,3.4412,3.6620,3.4184,3.6620,0
16072,XS2434891219,2022-12-29,3.4412,3.6620,3.4184,3.6620,0
16073,XS2434891219,2022-12-30,3.4412,3.6620,3.4184,3.6620,0


### Percent change from previous closing price

In [51]:
df_all['previous_closing_price'] = df_all.sort_values(by=['Date']).groupby(['ISIN'])['closing_price_eur'].shift(1)

In [52]:
df_all

Unnamed: 0,ISIN,Date,opening_price_eur,closing_price_eur,minimum_price_eur,maximum_price_eru,daily_traded_volume,previous_closing_price
0,AT000000STR1,2022-12-27,36.1000,37.7000,36.1000,37.7000,2864,
1,AT000000STR1,2022-12-28,36.6000,36.7000,35.7500,36.7000,1773,37.7000
2,AT000000STR1,2022-12-29,36.6000,36.7000,35.7500,36.7000,1773,36.7000
3,AT000000STR1,2022-12-30,36.6000,36.7000,35.7500,36.7000,1773,36.7000
4,AT000000STR1,2022-12-31,36.6000,36.7000,35.7500,36.7000,1773,36.7000
...,...,...,...,...,...,...,...,...
16070,XS2434891219,2022-12-27,3.4404,3.5034,3.4404,3.5034,0,
16071,XS2434891219,2022-12-28,3.4412,3.6620,3.4184,3.6620,0,3.5034
16072,XS2434891219,2022-12-29,3.4412,3.6620,3.4184,3.6620,0,3.6620
16073,XS2434891219,2022-12-30,3.4412,3.6620,3.4184,3.6620,0,3.6620


In [53]:
df_all['%_change_closing_price'] = (df_all['closing_price_eur'] - df_all['previous_closing_price']) / df_all['previous_closing_price'] * 100

In [54]:
df_all.drop(columns=['previous_closing_price'], inplace=True)

In [55]:
df_all = df_all.round(decimals=2)

In [57]:
# Filter finilized dataset by date
df_all = df_all[df_all.Date >= arg_date]

In [58]:
df_all

Unnamed: 0,ISIN,Date,opening_price_eur,closing_price_eur,minimum_price_eur,maximum_price_eru,daily_traded_volume,%_change_closing_price
1,AT000000STR1,2022-12-28,36.60,36.70,35.75,36.70,1773,-2.65
2,AT000000STR1,2022-12-29,36.60,36.70,35.75,36.70,1773,0.00
3,AT000000STR1,2022-12-30,36.60,36.70,35.75,36.70,1773,0.00
4,AT000000STR1,2022-12-31,36.60,36.70,35.75,36.70,1773,0.00
6,AT00000FACC2,2022-12-28,8.05,8.57,7.87,8.57,10205,2.51
...,...,...,...,...,...,...,...,...
16069,XS2376095068,2022-12-31,34.29,36.50,34.06,36.50,1000,0.00
16071,XS2434891219,2022-12-28,3.44,3.66,3.42,3.66,0,4.53
16072,XS2434891219,2022-12-29,3.44,3.66,3.42,3.66,0,0.00
16073,XS2434891219,2022-12-30,3.44,3.66,3.42,3.66,0,0.00
