# Testing script

Script for testing class modules created for Multinational Retail Data Centralisation project. 

Examples of database connecting, data extraction, data cleaning and uploading clean data to centralised database.


## Set up

###  Load in modules of classes

In [None]:
import Classes.database_utils as db_utils
import Classes.data_extraction as data_ext
import Classes.data_cleaning as data_clean

### Creating instances of each class

In [None]:
connector = db_utils.DatabaseConnector()
extractor = data_ext.DataExtractor()
cleaning = data_clean.DataCleaning()

### Creating database connections for AWS RDS and local centralised database

In [None]:
# Reading in AWS RDS database credentials from db_creds.yaml file.
AWS_RDS_credentials = connector.read_db_creds('db_creds')

# Create engine and connecting to AWS RDS database.
AWS_RDS_engine = connector.init_db_engine(AWS_RDS_credentials)

In [None]:
# Reading in centralised local database credentials from local_creds.yaml file.
local_credentials = connector.read_db_creds('local_creds')

# Create engine and connecting to centralised local database.
local_engine = connector.init_db_engine(local_credentials)

# Print list of available tables to test local database connection
local_list = connector.list_db_tables(local_engine)

## User data

In [None]:
# Printing list of available tables names in AWS RDS database
db_list = connector.list_db_tables(AWS_RDS_engine)


### Data download from AWS RDS


In [None]:
# Download data from 'legacy_users' table, using the AWS RDS specified connection engine.
rds_df = extractor.read_rds_table('legacy_users', AWS_RDS_engine)
rds_df.head()

### Data Cleaning

Brings up 'SettingWithCopyWarning', however, cleaning is applied as expected.

In [None]:
clean_rds_df = cleaning.clean_user_data(rds_df)

In [None]:
clean_rds_df

### Uploading dataframe to centralised database:

In [None]:
connector.upload_to_db(clean_rds_df, 'dim_users', local_engine)

## Card Details data

Data extraction from .pdf files

In [None]:
#extracting data
card_df = extractor.retrieve_pdf_data()

In [None]:
#cleaning card details data
clean_card_df = cleaning.clean_card_data(card_df)

In [None]:
#Upload cleaned data to local database
connector.upload_to_db(clean_card_df, 'dim_card_details', local_engine)

## Store details

Data extraction from web API, with API access-key placed in a hidden 'api_configuration.yaml' located in Credentials folder. 

In [10]:
num_stores = extractor.list_number_of_stores('https://aqj7u5id95.execute-api.eu-west-1.amazonaws.com/prod/number_stores')

In [11]:
num_stores

{'statusCode': 200, 'number_stores': 451}


Data extraction from multiple web APIs can take some time and noted as a possible project bottleneck.

In [None]:
stores_df = extractor.retrieve_stores_data(num_stores,'https://aqj7u5id95.execute-api.eu-west-1.amazonaws.com/prod/store_details/')

In [None]:
clean_stores_df = cleaning.clean_store_data(stores_df)

In [None]:
connector.upload_to_db(clean_stores_df, 'dim_store_details', local_engine)

## Product details

Data extraction from AWS s3 bucket.

In [None]:
product_df = extractor.extract_from_s3('s3://data-handling-public/products.csv')

In [None]:
product_df_weight = cleaning.convert_product_weights(product_df)

In [None]:
clean_product_df = cleaning.clean_products_data(product_df_weight)

In [None]:
connector.upload_to_db(clean_product_df, 'dim_products', local_engine)

## Order data

In [None]:
orders_df = extractor.read_rds_table('orders_table', AWS_RDS_engine)

In [None]:
clean_orders_df = cleaning.clean_orders_data(orders_df)

In [None]:
connector.upload_to_db(clean_orders_df, 'orders_table', local_engine)

## Events data


In [None]:
events_df = extractor.extract_from_s3('https://data-handling-public.s3.eu-west-1.amazonaws.com/date_details.json')

In [None]:
clean_event_df = cleaning.clean_events_data(events_df)

In [None]:
connector.upload_to_db(clean_event_df, 'dim_date_times', local_engine)