In [1]:
import psycopg2
import boto3
import json
from decimal import Decimal
import pandas as pd
from io import StringIO, BytesIO
import argparse
import pandas as pd
import numpy as np
from collections import namedtuple
import functools

  """)


# RDS

In [2]:
rds_connection = {
    'host': '',
    'database': 'globalmart',
    'user': 'postgres',
    'password': 'password',
}

In [3]:
class GlobalMartDB:
    def __init__(self, connection_params):
        self.connection_params = connection_params

    def connect(self):
        """Connect to the PostgreSQL database server"""
        try:
            connection = psycopg2.connect(**self.connection_params)
        except (Exception, psycopg2.DatabaseError) as error:
            print(error)
        print("Connection successful")
        self.connection = connection
        return self

    def execute(self, sql):
        with self.connection as conn:
            cursor = conn.cursor()
            cursor.execute(sql)
            results = cursor.fetchall()
            return results

    def df_builder(self, sql, columns):
        results = self.execute(sql)
        return pd.DataFrame(data=results, columns=columns)

In [4]:
num_orders_sql = """
    select customer_id,
    count(*) as orders
    from orders
    group by customer_id
    order by orders desc;
    """

amount_spent_sql = """
    with order_totals as(
    select order_id,
    sum(sales) as total
    from transactions
    group by order_id
    order by total desc
    )

    select customer_id,
    round(cast(sum(order_totals.total) as numeric), 2) as grand_total
    from orders
    join order_totals on order_totals.order_id = orders.order_id
    group by customer_id
    order by grand_total desc;
    """

purchase_freq_sql = """
    select customer_id,
    extract(day from (max(order_purchase_date) - min(order_purchase_date)) / count(order_purchase_date))::integer as avg_purchase_freq_days
    from orders
    group by customer_id
    order by customer_id;
    """

num_returns_sql = """
    with rtns as(
    select order_id,
    count(*) as no_returns
    from returns
    group by order_id
    )

    select customer_id,
    sum(rtns.no_returns) as total_returns
    from orders
    join rtns on rtns.order_id = orders.order_id
    group by orders.customer_id
    order by total_returns desc;
    """

In [5]:
num_orders_columns = ['customer_id', 'num_orders']
amount_spent_columns = ['customer_id', 'total_spent']
purchase_freq_columns = ['customer_id', 'purchase_freq']
num_returns_columns = ['customer_id', 'num_returns']

In [6]:
statements = [
    (num_orders_sql, num_orders_columns),
    (amount_spent_sql, amount_spent_columns),
    (purchase_freq_sql, purchase_freq_columns),
    (num_returns_sql, num_returns_columns),
]

In [7]:
db = GlobalMartDB(rds_connection).connect()

Connection successful


In [8]:
dfs = [db.df_builder(pair[0], pair[1]) for pair in statements]

In [9]:
fact_table = functools.reduce(pd.DataFrame.merge, dfs)

In [10]:
fact_table

Unnamed: 0,customer_id,num_orders,total_spent,purchase_freq,num_returns
0,102013,16,6274.04,30,1
1,100410,16,7373.32,30,1
2,102204,16,4537.68,27,1
3,100104,16,2996.33,33,1
4,101825,16,6848.36,24,1
...,...,...,...,...,...
29495,112683,15,7210.51,30,1
29496,122644,15,11472.98,35,1
29497,108628,15,7700.66,38,1
29498,108992,15,3912.85,29,1


In [11]:
s3 = boto3.client('s3')

In [12]:
fact_table.to_csv('fact_table.csv', index=False)

In [13]:
s3.upload_file(
    Filename='fact_table.csv',
    Bucket='on-premise-dump-files',
    Key='transformed/fact_table.csv',
)

In [14]:
s3.upload_file(
    Filename='ETL.ipynb', Bucket='on-premise-dump-files', Key='transformed/ETL.ipynb'
)

# Dynamo

In [15]:
dynamo = boto3.resource('dynamodb', region_name='us-east-1')
table = dynamo.Table('products')
from boto3.dynamodb.conditions import Key, Attr

In [16]:
query_product = 'TEC-PH-10002070'

table.query(KeyConditionExpression=Key('product_id').eq(query_product))

{'Items': [{'dimension': None,
   'upc': '8.84E+11',
   'product_photos_qty': Decimal('4'),
   'product_name': 'Dr. Martens Girl Delany Boots',
   'brand': 'Justin',
   'sizes': '2',
   'product_id': 'TEC-PH-10002070',
   'weight': None,
   'manufacturer': 'Dr. Martens',
   'colors': 'Blue',
   'categories': "Clothing,Shoes,Men's Shoes,Men's Boots",
   'dateAdded': '2016-09-12T11:50:29Z',
   'dateUpdated': '2016-09-23T13:07:28Z'}],
 'Count': 1,
 'ScannedCount': 1,
 'ResponseMetadata': {'RequestId': 'CLBNVG9QKGNBT585S4R2B5210JVV4KQNSO5AEMVJF66Q9ASUAAJG',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'Server',
   'date': 'Thu, 26 Aug 2021 15:26:01 GMT',
   'content-type': 'application/x-amz-json-1.0',
   'content-length': '475',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'CLBNVG9QKGNBT585S4R2B5210JVV4KQNSO5AEMVJF66Q9ASUAAJG',
   'x-amz-crc32': '1012503775'},
  'RetryAttempts': 0}}