In [1]:
import sys, datetime
sys.path.append("../../scripts/")
from s3_support import *

import pandas as pd
import numpy as np

# total year

In [16]:
q = '''select 
            year, 
            sum(amount) as volume, 
            count(id) as count,
            count(org) as orgs
        from transactions 
        where 
            status='A' 
        group by year'''
df_annual = redshift_query_read(q, schema='production')

In [17]:
df_annual.sort_values('year', inplace=True)
df_annual['volume per org'] = df_annual['volume'] / df_annual['orgs']
df_annual['volume growth'] = df_annual['volume'].pct_change()
df_annual['volume per org growth'] = df_annual['volume per org'].pct_change()
df_annual['count growth'] = df_annual['count'].pct_change()
df_annual[['year', 'volume growth', 'count growth', 'volume per org growth']].tail(11)

Unnamed: 0,year,volume growth,count growth,volume per org growth
1,2014,0.437168,0.378794,0.042337
9,2015,0.34235,0.31649,0.019643
0,2016,0.078024,0.111397,-0.030028
16,2017,0.257093,0.335355,-0.058608
17,2018,0.226939,0.318095,-0.069157
2,2019,0.195327,0.272595,-0.060717
14,2020,0.381639,0.308942,0.055539
15,2021,0.182356,0.161511,0.017946
18,2022,0.13326,0.148376,-0.013163
7,2023,0.16014,0.132101,0.024767


In [18]:
sp = [
    (2015, -0.73),
    (2016, 9.54),
    (2017, 19.42),
    (2018, -6.24),
    (2019, 28.88),
    (2020, 16.26),
    (2021, 26.89),
    (2022, -19.44),
    (2023, 0.29)
]
sp = pd.DataFrame(sp, columns=['year', 's&p'])

In [21]:
df = df_annual[['year', 'volume growth', 'count growth', 'volume per org growth']].merge(sp, on='year')
df

Unnamed: 0,year,volume growth,count growth,volume per org growth,s&p
0,2015,0.34235,0.31649,0.019643,-0.73
1,2016,0.078024,0.111397,-0.030028,9.54
2,2017,0.257093,0.335355,-0.058608,19.42
3,2018,0.226939,0.318095,-0.069157,-6.24
4,2019,0.195327,0.272595,-0.060717,28.88
5,2020,0.381639,0.308942,0.055539,16.26
6,2021,0.182356,0.161511,0.017946,26.89
7,2022,0.13326,0.148376,-0.013163,-19.44
8,2023,0.16014,0.132101,0.024767,0.29


In [22]:
df.drop('year', axis=1).corr()

Unnamed: 0,volume growth,count growth,volume per org growth,s&p
volume growth,1.0,0.818098,0.379286,0.158836
count growth,0.818098,1.0,-0.221089,0.186617
volume per org growth,0.379286,-0.221089,1.0,-0.028749
s&p,0.158836,0.186617,-0.028749,1.0


# december

In [25]:
q = '''select 
            year, 
            sum(amount) as volume, 
            count(id) as count,
            count(org) as orgs
        from transactions 
        where 
            status='A' and
            month=12
        group by year'''
df_dec = redshift_query_read(q, schema='production')

In [26]:
df_dec.sort_values('year', inplace=True)
df_dec['volume per org'] = df_dec['volume'] / df_dec['orgs']
df_dec['volume growth'] = df_dec['volume'].pct_change()
df_dec['volume per org growth'] = df_dec['volume per org'].pct_change()
df_dec['count growth'] = df_dec['count'].pct_change()
df_dec[['year', 'volume growth', 'count growth', 'volume per org growth']].tail(11)

Unnamed: 0,year,volume growth,count growth,volume per org growth
3,2013,0.698723,0.575461,0.078239
2,2014,0.247727,0.272071,-0.019137
7,2015,0.373248,0.330207,0.032356
1,2016,0.120944,0.164007,-0.036996
11,2017,0.317214,0.319698,-0.001882
10,2018,0.076059,0.188928,-0.094934
14,2019,0.288787,0.252284,0.029149
5,2020,0.627778,0.680096,-0.03114
13,2021,0.02389,-0.054362,0.082751
12,2022,-0.037377,0.009493,-0.04643


In [27]:
df_dec_s = df_dec[['year', 'volume growth', 'count growth', 'volume per org growth']].merge(sp, on='year')
df_dec_s

Unnamed: 0,year,volume growth,count growth,volume per org growth,s&p
0,2015,0.373248,0.330207,0.032356,-0.73
1,2016,0.120944,0.164007,-0.036996,9.54
2,2017,0.317214,0.319698,-0.001882,19.42
3,2018,0.076059,0.188928,-0.094934,-6.24
4,2019,0.288787,0.252284,0.029149,28.88
5,2020,0.627778,0.680096,-0.03114,16.26
6,2021,0.02389,-0.054362,0.082751,26.89
7,2022,-0.037377,0.009493,-0.04643,-19.44
8,2023,0.113225,0.103934,0.008417,0.29


In [37]:
df_dec_s.drop('year', axis=1).corr()['s&p']

volume growth            0.395435
count growth             0.233127
volume per org growth    0.611200
s&p                      1.000000
Name: s&p, dtype: float64

In [38]:
q = '''select 
            year, 
            org,
            sum(amount) as volume, 
            count(id) as count,
            count(org) as orgs
        from transactions 
        where 
            status='A' and
            month=12
        group by year, org'''
df_orgs = redshift_query_read(q, schema='production')

In [46]:
df_orgs_avgs = df_orgs.groupby(['year'])['volume'].agg(['mean', 'median']).reset_index()
df_orgs_avgs.sort_values('year', ascending=True, inplace=True)
df_orgs_avgs['mean growth'] = df_orgs_avgs['mean'].pct_change()
df_orgs_avgs['median growth'] = df_orgs_avgs['median'].pct_change()
df_orgs_avgs = df_orgs_avgs.merge(sp, on='year')

In [47]:
df_orgs_avgs

Unnamed: 0,year,mean,median,mean growth,median growth,s&p
0,2015,14665.337194,3120.0,0.217698,0.124324,-0.73
1,2016,14491.325,2900.5,-0.011866,-0.070353,9.54
2,2017,15715.540638,2933.375,0.084479,0.011334,19.42
3,2018,14172.069789,2483.0,-0.098213,-0.153535,-6.24
4,2019,15636.003966,2557.11,0.103297,0.029847,28.88
5,2020,22829.372282,3272.5,0.460052,0.279765,16.26
6,2021,21514.631105,3127.25,-0.05759,-0.044385,26.89
7,2022,19300.784172,2625.0,-0.1029,-0.160604,-19.44
8,2023,19939.82385,2585.25,0.03311,-0.015143,0.29


In [48]:
df_orgs_avgs[['mean growth', 'median growth', 's&p']].corr()

Unnamed: 0,mean growth,median growth,s&p
mean growth,1.0,0.983526,0.331377
median growth,0.983526,1.0,0.435972
s&p,0.331377,0.435972,1.0
