# Trust Bank Data Pre-Processing Exploration

In [None]:
SELECT 'TRUSTBANKDATA.raw.' || table_name AS full_table_name
FROM TRUSTBANKDATA.INFORMATION_SCHEMA.TABLES
WHERE table_schema = 'RAW';

## Transaction Tables

In [None]:

// table diagnostic features 
-- 1. TRUSTBANK.raw.FCT_TRANSACTIONS
SELECT * FROM TRUSTBANKDATA.raw.FCT_TRANSACTIONS LIMIT 10;


In [None]:
DESCRIBE TABLE TRUSTBANKDATA.raw.FCT_TRANSACTIONS

In [None]:
SELECT
    TRANSACTIONID,
    COUNT(*)
FROM
    TRUSTBANKDATA.raw.FCT_TRANSACTIONS
GROUP BY
    TRANSACTIONID
HAVING
    COUNT(*) > 1;


In [None]:
SELECT
    COUNT_IF(TRANSACTIONID IS NULL) AS missing_transaction_ids,
    COUNT_IF(TRANSACTIONSTARTDATETIME IS NULL) AS missing_start_datetime,
    COUNT_IF(TRANSACTIONENDDATETIME IS NULL) AS missing_end_datetime,
    COUNT_IF(CARDHOLDERID IS NULL) AS missing_cardholder_id,
    COUNT_IF(LOCATIONID IS NULL) AS missing_location_id,
    COUNT_IF(TRANSACTIONTYPEID IS NULL) AS missing_transaction_type_id
FROM
    TRUSTBANKDATA.raw.FCT_TRANSACTIONS



In [None]:
SELECT
    TRANSACTIONTYPEID,
    COUNT(*) AS count
FROM
    TRUSTBANKDATA.raw.FCT_TRANSACTIONS
GROUP BY
    TRANSACTIONTYPEID
ORDER BY
    count DESC;


In [None]:
SELECT
    CARDHOLDERID,
    COUNT(*) AS transaction_count,
    MIN(TRANSACTIONSTARTDATETIME) AS first_transaction,
    MAX(TRANSACTIONENDDATETIME) AS last_transaction
FROM
    TRUSTBANKDATA.raw.FCT_TRANSACTIONS
GROUP BY
    CARDHOLDERID
ORDER BY
    transaction_count DESC;


In [None]:
SELECT
    MIN(TRANSACTIONSTARTDATETIME) AS min_start_date,
    MAX(TRANSACTIONENDDATETIME) AS max_end_date
FROM
    TRUSTBANKDATA.raw.FCT_TRANSACTIONS


# ATM_LOCATION_LOOKUP Table

In [None]:

// table diagnostic features 
-- 1. TRUSTBANK.raw
SELECT * FROM TRUSTBANKDATA.raw.ATM_LOCATION_LOOKUP LIMIT 30;


In [None]:
DESCRIBE TABLE TRUSTBANKDATA.raw.ATM_LOCATION_LOOKUP

In [None]:
SELECT
    COUNT_IF(LOCATIONID IS NULL) AS missing_location_ids,
    COUNT_IF(LOCATION_NAME IS NULL) AS missing_location_names,
    COUNT_IF(NO_OF_ATMS IS NULL) AS missing_no_of_atms,
    COUNT_IF(CITY IS NULL) AS missing_cities,
    COUNT_IF(STATE IS NULL) AS missing_states,
    COUNT_IF(COUNTRY IS NULL) AS missing_countries,
    COUNT_IF(INSTALLATION_DATE IS NULL) AS missing_installation_dates,
    COUNT_IF(MAINTENANCE_DATE IS NULL) AS missing_maintenance_dates,
    COUNT_IF(OPERATIONAL_STATUS IS NULL) AS missing_operational_statuses,
    COUNT_IF(CASH_DEPOSIT_AVAILABLE IS NULL) AS missing_cash_deposit_available
FROM
   TRUSTBANKDATA.raw.ATM_LOCATION_LOOKUP

In [None]:
SELECT City, SUM(No_of_ATMS) AS Total_ATMs
FROM TRUSTBANKDATA.raw.ATM_LOCATION_LOOKUP
GROUP BY City
ORDER BY Total_ATMs DESC;


In [None]:
SELECT Operational_Status, COUNT(*) AS Number_of_Locations
FROM TRUSTBANKDATA.raw.ATM_LOCATION_LOOKUP
GROUP BY Operational_Status;

In [None]:
SELECT *
FROM TRUSTBANKDATA.raw.ATM_LOCATION_LOOKUP
WHERE Maintenance_Date IS NULL;

In [None]:
SELECT Cash_Deposit_Available, COUNT(*) AS Count
FROM TRUSTBANKDATA.raw.ATM_LOCATION_LOOKUP
GROUP BY Cash_Deposit_Available;


In [None]:
SELECT Location_Name, MAX(Maintenance_Date) AS Latest_Maintenance_Date
FROM TRUSTBANKDATA.raw.ATM_LOCATION_LOOKUP
WHERE Maintenance_Date is not Null
GROUP BY Location_Name
ORDER BY Latest_Maintenance_Date DESC;


# Branch_Lookup Table

In [None]:


// table diagnostic features 
-- 1. TRUSTBANK.raw
SELECT * FROM TRUSTBANKDATA.raw.BRANCH_LOOKUP LIMIT 30;

In [None]:
SELECT
    Location,
    Yearly_Revenue_SGD
FROM TRUSTBANKDATA.raw.BRANCH_LOOKUP
ORDER BY CAST(Yearly_Revenue_SGD AS FLOAT) DESC
LIMIT 5;


In [None]:
DESCRIBE TABLE TRUSTBANKDATA.raw.BRANCH_LOOKUP

# BRANCH_PERFORMANCE Table

In [None]:

// table diagnostic features 
-- 1. TRUSTBANK.raw
SELECT * FROM TRUSTBANKDATA.raw.BRANCH_Performance LIMIT 30;

In [None]:
DESCRIBE TABLE TRUSTBANKDATA.raw.BRANCH_Performance

In [None]:
SELECT
    COUNT_IF(branch_id IS NULL) AS missing_branch_ids,
    COUNT_IF(date IS NULL) AS missing_dates,
    COUNT_IF(ATM_Usage_Count IS NULL) AS missing_atm_usage_counts,
    COUNT_IF(Average_Transaction_Time_Minutes IS NULL) AS missing_avg_transaction_times,
    COUNT_IF(Customer_Satisfaction_Score IS NULL) AS missing_customer_satisfaction_scores,
    COUNT_IF(Compliance_Issue_Count IS NULL) AS missing_compliance_issue_counts,
    COUNT_IF(Fraudulent_Transactions_Count IS NULL) AS missing_fraudulent_transaction_counts,
    COUNT_IF(New_Accounts_Opened_Count IS NULL) AS missing_new_accounts_opened_counts,
    COUNT_IF(Accounts_Closed_Count IS NULL) AS missing_accounts_closed_counts,
    COUNT_IF(Credit_Card_Applications_Received_Count IS NULL) AS missing_credit_card_applications_received_counts,
    COUNT_IF(Branch_Operational_Hours_Hours IS NULL) AS missing_branch_operational_hours,
    COUNT_IF(ATM_Operational_Hours_Hours IS NULL) AS missing_atm_operational_hours,
    COUNT_IF(Maintenance_Issues_Reported_Count IS NULL) AS missing_maintenance_issues_reported_counts,
    COUNT_IF(ATM_Downtime_Hours IS NULL) AS missing_atm_downtime_hours,
    COUNT_IF(Transaction_Error_Count IS NULL) AS missing_transaction_error_counts
FROM
   TRUSTBANKDATA.raw.BRANCH_Performance


In [None]:
SELECT 
    branch_id, 
    AVG(CAST(ATM_Usage_Count AS INT)) AS Avg_ATM_Usage,
    AVG(Average_Transaction_Time_Minutes) AS Avg_Transaction_Time,
    AVG(Customer_Satisfaction_Score) AS Avg_Satisfaction_Score,
    SUM(Fraudulent_Transactions_Count) AS Total_Fraudulent_Transactions,
    SUM(New_Accounts_Opened_Count) AS Total_New_Accounts,
    AVG(CAST(ATM_Downtime_Hours AS FLOAT)) AS Avg_ATM_Downtime
FROM 
    TRUSTBANKDATA.raw.BRANCH_PERFORMANCE
GROUP BY 
    branch_id;


In [None]:
SELECT 
    branch_id,
    AVG(Customer_Satisfaction_Score) AS Avg_Customer_Satisfaction,
    SUM(COMPLIANCE_ISSUE_Count) AS Total_Compliance_Issue
FROM
    TRUSTBANKDATA.raw.BRANCH_PERFORMANCE
GROUP BY 
    branch_id
ORDER BY 
    Avg_Customer_Satisfaction DESC;


In [None]:
SELECT 
    branch_id,
    SUM(Credit_Card_Applications_Received_Count) AS Total_Credit_Applications,
    SUM(New_Accounts_Opened_Count) AS Total_New_Accounts,
    SUM(Accounts_Closed_Count) AS Total_Accounts_Closed
FROM 
    TRUSTBANKDATA.raw.BRANCH_PERFORMANCE
GROUP BY 
    branch_id;


In [None]:
SELECT 
    branch_id,
    AVG(Average_Transaction_Time_Minutes) AS Avg_Transaction_Time,
    AVG(ATM_Operational_Hours_Hours) AS Avg_ATM_Operational_Hours
FROM 
    TRUSTBANKDATA.raw.BRANCH_PERFORMANCE
GROUP BY 
    branch_id
ORDER BY 
    Avg_Transaction_Time;


In [None]:
SELECT 
    branch_id,
    SUM(Compliance_Issue_Count) AS Total_Compliance_Issues,
    SUM(Fraudulent_Transactions_Count) AS Total_Fraudulent_Transactions
FROM 
    TRUSTBANKDATA.raw.BRANCH_PERFORMANCE
GROUP BY 
    branch_id
ORDER BY 
    Total_Compliance_Issues DESC, 
    Total_Fraudulent_Transactions DESC;


In [None]:
SELECT 
    branch_id,
    AVG(CAST(ATM_Downtime_Hours AS FLOAT)) AS Avg_ATM_Downtime_Hours,
    SUM(Maintenance_Issues_Reported_Count) AS Total_Maintenance_Issues
FROM 
    TRUSTBANKDATA.raw.BRANCH_PERFORMANCE
GROUP BY 
    branch_id;


# Branch Table

In [None]:

// table diagnostic features 
-- 1. TRUSTBANK.raw
SELECT * FROM TRUSTBANKDATA.raw.BRANCH_TABLE LIMIT 100;

In [None]:
SELECT
    COUNT_IF(branch_id IS NULL) AS missing_branch_ids,
    COUNT_IF(date IS NULL) AS missing_dates,
    COUNT_IF(Branch_Revenue_SGD IS NULL) AS missing_branch_revenue,
    COUNT_IF(Branch_Expenses_SGD IS NULL) AS missing_branch_expenses,
    COUNT_IF(Net_Income_SGD IS NULL) AS missing_net_income,
    COUNT_IF(Branch_Manager_ID IS NULL) AS missing_branch_manager_id,
    COUNT_IF(Avg_Daily_Branch_Transaction_Value_SGD IS NULL) AS missing_avg_daily_transaction_value,
    COUNT_IF(Pending_Transactions_Count IS NULL) AS missing_pending_transactions,
    COUNT_IF(Branch_Avg_Transaction_Queue_Length IS NULL) AS missing_avg_transaction_queue_length,
    COUNT_IF(Digital_Transactions_Volume_ IS NULL) AS missing_digital_transactions_volume,
    COUNT_IF(Branch_Internet_Downtime_Hours IS NULL) AS missing_internet_downtime
FROM
    TRUSTBANKDATA.raw.BRANCH_TABLE


In [None]:
DESCRIBE TABLE TRUSTBANKDATA.raw.BRANCH_TABLE

In [None]:
SELECT
    branch_id,
    SUM(CAST(Branch_Revenue_SGD AS FLOAT)) AS Total_Revenue,
    SUM(CAST(Branch_Expenses_SGD AS FLOAT)) AS Total_Expenses,
    SUM(CAST(Net_Income_SGD AS FLOAT)) AS Net_Income
FROM
    TRUSTBANKDATA.raw.BRANCH_TABLE
GROUP BY
    branch_id
ORDER BY
    Net_Income DESC;


# CALENDAR_LOOKUP

In [None]:

// table diagnostic features 
-- 1. TRUSTBANK.raw
SELECT * FROM TRUSTBANKDATA.raw.calendar_lookup LIMIT 30;

In [None]:
SELECT
    COUNT_IF(Date IS NULL) AS missing_date,
    COUNT_IF(Quarter IS NULL) AS missing_quarter,
    COUNT_IF(Month IS NULL) AS missing_month,
    COUNT_IF(Month_Name IS NULL) AS missing_month_name,
    COUNT_IF(DAY_OF_WEEK IS NULL) AS missing_day,
    COUNT_IF(IsHoliday IS NULL) AS missing_is_holiday,
    COUNT_IF(Day_Name IS NULL) AS missing_day_name,
    COUNT_IF(Week_of_Year IS NULL) AS missing_week_of_year,
    COUNT_IF(Year IS NULL) AS missing_year,
    COUNT_IF(END_OF_WEEK IS NULL) AS missing_start_of_month
FROM
    TRUSTBANKDATA.raw.calendar_lookup


In [None]:
DESCRIBE TABLE TRUSTBANKDATA.raw.calendar_lookup

# CUSTOMERLOOKUP

In [None]:
// table diagnostic features 
-- 1. TRUSTBANK.raw
SELECT * FROM TRUSTBANKDATA.raw.customerlookup LIMIT 30;

In [None]:
SELECT
    COUNT_IF(CardholderID IS NULL) AS missing_cardholder_ids,
    COUNT_IF(First_Name IS NULL) AS missing_first_names,
    COUNT_IF(Last_Name IS NULL) AS missing_last_names,
    COUNT_IF(Gender IS NULL) AS missing_genders,
    COUNT_IF(ATMID IS NULL) AS missing_atm_ids,
    COUNT_IF(BIRTH_DATE IS NULL) AS missing_ages,
    
    COUNT_IF(AccountType IS NULL) AS missing_account_types,
    COUNT_IF(IsPrivateBanking IS NULL) AS missing_is_private_banking,
    COUNT_IF(Preferred_Contact_Method IS NULL) AS missing_preferred_contact_methods,
    COUNT_IF(CustomerSince IS NULL) AS missing_customer_since,
    COUNT_IF(PhoneNo IS NULL) AS missing_phone_numbers,
    COUNT_IF(Balance_SGD IS NULL) AS missing_balances,
    COUNT_IF("BRANCH-ID" IS NULL) AS missing_branch_ids,
    COUNT_IF(INVESTMENT_PORTFOLIO_VALUE__SGD IS NULL) AS missing_investment_portfolio_values
FROM
    TRUSTBANKDATA.raw.customerlookup
