In [0]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("banking").getOrCreate()

In [0]:
bank_customer = spark.read.csv("/FileStore/tables/bank_customer.csv",header=True,inferSchema=True)
bank_fd = spark.read.csv("/FileStore/tables/bank_fd_account.csv",header=True,inferSchema=True)
bank_rd = spark.read.csv("/FileStore/tables/bank_rd_account.csv",header=True,inferSchema=True)
bank_sb = spark.read.csv("/FileStore/tables/bank_sb_account.csv",header=True,inferSchema=True)

In [0]:
temp_table_name = "bank_customer"
bank_customer.createOrReplaceTempView(temp_table_name)

temp_table_name = "bank_fd"
bank_fd.createOrReplaceTempView(temp_table_name)

temp_table_name = "bank_rd"
bank_rd.createOrReplaceTempView(temp_table_name)

temp_table_name = "bank_sb"
bank_sb.createOrReplaceTempView(temp_table_name)

In [0]:
bank_customer.show()

+-------+-------------------+--------+----------+--------+---------+---------+
|CUST_ID|         CUST_FNAME|INITIALS|CUST_LNAME|CUST_SEX| CUST_DOB|CUST_TYPE|
+-------+-------------------+--------+----------+--------+---------+---------+
|   1001|               John|    null|     Smith|       M|21-OCT-78|      IND|
|   1002|           Franklin|       J|      Dang|       M|21-OCT-78|      IND|
|   1003|              Alice|       M|    Powell|       F|09-APR-76|      IND|
|   1004|             Ramesh|       R|   Narayan|       M|08-DEC-71|      IND|
|   1005|              Nancy|       S|    Taylor|       F|17-AUG-79|      IND|
|   1006|              Ahmad|     M A|    Jabbar|       M|25-JUN-72|      IND|
|   1007|       DealWell Co.|    null|      null|    null|     null|     CORP|
|   1008|               Aman|       K|    Mathur|       M|30-OCT-80|      IND|
|   1009|             Smitha|    null|    Ramesh|       F|03-MAR-76|      IND|
|   1010|Popular Enterprises|    null|      null|   

In [0]:
bank_fd.show()

+----------+-------+---------+------------+-----------+----------+
|ACCOUNT_NO|CUST_ID| START_DT|FD_TERM_MNTH|INITIAL_AMT|ACC_STATUS|
+----------+-------+---------+------------+-----------+----------+
|5212340001|   1002|22-JUN-00|          30|      15000|    Closed|
|5212340002|   1002|11-MAY-02|          66|      25000|    Active|
|5212340003|   1007|26-NOV-03|          36|      80555|    Closed|
|5212340004|   1003|29-NOV-04|          60|      30000|    Active|
|5212340005|   1009|23-JUN-04|         120|      45000|    Active|
|5212340006|   1001|26-SEP-05|          24|      45000|    Active|
|5212340007|   1010|18-OCT-05|          84|   15000000|    Active|
|5212340008|   1008|18-DEC-06|          36|      98765|    Active|
|5212340009|   1006|14-FEB-07|         120|      25000|    Active|
|5212340010|   1007|21-SEP-08|          60|    2500000|    Active|
+----------+-------+---------+------------+-----------+----------+



In [0]:
bank_rd.show()

+----------+----------+-----------+-----------+---------------+------------+----------+
|ACCOUNT_NO|RD_CUST_ID|RD_START_DT|RD_TERM_YRS|INSTALLMENT_AMT|CURR_BALANCE|ACC_STATUS|
+----------+----------+-----------+-----------+---------------+------------+----------+
|6000010001|      1006|  13-JUN-06|         10|           1000|       75000|    Active|
|6000010002|      1003|  26-NOV-06|          6|           3000|      208000|    Active|
|6000010003|      1008|  03-OCT-07|          5|           2500|      148000|    Active|
|6000010004|      1009|  29-APR-07|          4|           6000|           0|    Closed|
|6000010005|      1001|  17-FEB-08|          1|          10000|           0|    Closed|
|6000010006|      1007|  16-JUL-08|          9|          20000|     1000012|    Active|
|6000010007|      1001|  26-MAY-08|          4|           5000|           0|    Closed|
|6000010008|      1010|  18-DEC-09|          7|          50000|     1550000|    Active|
|6000010009|      1007|  20-APR-

In [0]:
bank_sb.show()

+----------+---------------+-----------------+------------+----------+----------+---------+
|ACCOUNT_NO|PRIMARY_CUST_ID|SECONDARY_CUST_ID|CURR_BAL_AMT|ACC_STATUS|START_DATE| END_DATE|
+----------+---------------+-----------------+------------+----------+----------+---------+
|1000012003|           1005|             1001|         0.0|    Closed| 20-NOV-06|23-AUG-11|
|1000012004|           1008|             null|     84924.0|    Active| 11-MAR-06|     null|
|1000012005|           1009|             1004|   209844.03|    Active| 20-NOV-06|     null|
|1000012006|           1004|             null|         0.0|    Closed| 12-SEP-07|12-SEP-10|
|1000012007|           1003|             null|     12300.0|  Inactive| 09-DEC-08|     null|
|1000012008|           1005|             null|     10235.0|    Active| 23-MAY-09|     null|
|1000012009|           1006|             null|     73535.0|    Active| 10-OCT-10|     null|
|1000012010|           1004|             null|     54674.0|    Active| 09-OCT-11

In [0]:
%sql
-- Get a list of customers who have active FD account with more than 25000 as the initial amount. The list should have the customer id, first name, FD account number and initial amount; the list should be sorted on the initial amount with highest amount first.
-- Answer 1

SELECT c.CUST_ID, c.CUST_FNAME, f.ACCOUNT_NO, f.INITIAL_AMT 
FROM bank_customer c JOIN bank_fd f ON c.CUST_ID = f.CUST_ID
WHERE f.ACC_STATUS = 'Active' AND f.INITIAL_AMT > 25000
ORDER BY f.INITIAL_AMT DESC;

CUST_ID,CUST_FNAME,ACCOUNT_NO,INITIAL_AMT
1010,Popular Enterprises,5212340007,15000000
1007,DealWell Co.,5212340010,2500000
1008,Aman,5212340008,98765
1009,Smitha,5212340005,45000
1001,John,5212340006,45000
1003,Alice,5212340004,30000


In [0]:
%sql 
-- Get a list of customers who have active accounts in all three types- FD, RD and DB, along with their net worth. Net worth is calculated as the total of FD initial Amount. RD Current Balance and Sb current balance. 
-- The list should have the customer id, first name, FD account number, FD initial amount, RD Current Balance, SB Current Balance and Net Worth with highest amount appearing list.
-- Answer 2

SELECT c.CUST_ID, c.CUST_FNAME, f.ACCOUNT_NO, f.INITIAL_AMT, r.CURR_BALANCE, s.CURR_BAL_AMT, (f.INITIAL_AMT + R.CURR_BALANCE + s.CURR_BAL_AMT) AS NET_WORTH
FROM bank_customer c JOIN bank_fd f ON c.CUST_ID = f.CUST_ID
JOIN bank_rd r ON c.CUST_ID = r.RD_CUST_ID
JOIN bank_sb s ON c.CUST_ID = s.PRIMARY_CUST_ID
WHERE f.ACC_STATUS = 'Active' AND r.ACC_STATUS = 'Active' AND s.ACC_STATUS = 'Active'
ORDER BY NET_WORTH DESC

CUST_ID,CUST_FNAME,ACCOUNT_NO,INITIAL_AMT,CURR_BALANCE,CURR_BAL_AMT,NET_WORTH
1008,Aman,5212340008,98765,148000,84924.0,331689.0
1006,Ahmad,5212340009,25000,75000,73535.0,173535.0


In [0]:
# Create a new DataFrame that has a column 'CUST_TYP_INDEX' which is unique number assigned to each customer type. For example: 1.0 for corporate type of customer (CORP) 2 for individual customer (IND)
# Answer 3

keys = [i.CUST_TYPE for i in bank_customer.select('CUST_TYPE').distinct().collect()]
values = [i for i in range(1, len(keys)+1)]

ans = dict(zip(keys, values))

new = []
temp = [row['CUST_TYPE'] for row in bank_customer.select('CUST_TYPE').collect()]
for i in temp:
    new.append(ans[i])
    
from pyspark.sql.types import IntegerType

df = spark.createDataFrame(new, IntegerType())

df = df.selectExpr("value as CUST_TYP_INDEX")

df.show()

+--------------+
|CUST_TYP_INDEX|
+--------------+
|             1|
|             1|
|             1|
|             1|
|             1|
|             1|
|             2|
|             1|
|             1|
|             2|
+--------------+



In [0]:
%sql
-- Get a list of computer ids along with the account number and amount of their active accounts of each type of account they have
-- For example, if a customer has all 3 types of accounts active, then the list should contain the customer id, active FD account number, initial amount, status, active RD account number, balance amount, status, active SB account number, balance amount, status.
-- If a customer has only 2 types of accounts active, then the list should have details of only these 2 accounts. The list should be sorted by the customer id and for each customer id it should be sorted on account number.
-- Answer 4

SELECT bank_fd.CUST_ID, bank_fd.ACCOUNT_NO, bank_fd.INITIAL_AMT AS AMT, bank_fd.ACC_STATUS FROM bank_fd WHERE bank_fd.ACC_STATUS = 'Active'
UNION
SELECT bank_rd.RD_CUST_ID, bank_rd.ACCOUNT_NO, bank_rd.CURR_BALANCE, bank_rd.ACC_STATUS FROM bank_rd WHERE bank_rd.ACC_STATUS = 'Active'
UNION
SELECT bank_sb.PRIMARY_CUST_ID, bank_sb.ACCOUNT_NO, bank_sb.CURR_BAL_AMT, bank_sb.ACC_STATUS FROM bank_sb WHERE bank_sb.ACC_STATUS = 'Active'
ORDER BY CUST_ID, ACCOUNT_NO

CUST_ID,ACCOUNT_NO,AMT,ACC_STATUS
1001,1000012001,25000.0,Active
1001,5212340006,45000.0,Active
1002,1000012002,31109.43,Active
1002,5212340002,25000.0,Active
1003,5212340004,30000.0,Active
1003,6000010002,208000.0,Active
1004,1000012010,54674.0,Active
1005,1000012008,10235.0,Active
1006,1000012009,73535.0,Active
1006,5212340009,25000.0,Active


In [0]:

%sql
-- Modify the above to add the customer first name and also to add a column named acc_type which should show 
-- 'FD' if it is an FD account
-- 'RD' if it is an RD account
-- 'SB' if it is an SB account
-- Answer 5

SELECT bank_customer.CUST_FNAME, bank_fd.CUST_ID, bank_fd.ACCOUNT_NO, bank_fd.INITIAL_AMT AS AMT, bank_fd.ACC_STATUS, 'FD' AS ACC_TYPE
FROM bank_customer JOIN bank_fd ON bank_customer.CUST_ID = bank_fd.CUST_ID 
WHERE bank_fd.ACC_STATUS = 'Active'

UNION

SELECT bank_customer.CUST_FNAME, bank_rd.RD_CUST_ID, bank_rd.ACCOUNT_NO, bank_rd.CURR_BALANCE, bank_rd.ACC_STATUS, 'RD' AS ACC_TYPE
FROM bank_customer JOIN bank_rd ON bank_customer.CUST_ID = bank_rd.RD_CUST_ID 
WHERE bank_rd.ACC_STATUS = 'Active'

UNION

SELECT bank_customer.CUST_FNAME, bank_sb.PRIMARY_CUST_ID, bank_sb.ACCOUNT_NO, bank_sb.CURR_BAL_AMT, bank_sb.ACC_STATUS, 'SB' AS ACC_TYPE
FROM bank_customer JOIN bank_sb ON bank_customer.CUST_ID = bank_sb.PRIMARY_CUST_ID 
WHERE bank_sb.ACC_STATUS = 'Active'

ORDER BY CUST_ID, ACCOUNT_NO

CUST_FNAME,CUST_ID,ACCOUNT_NO,AMT,ACC_STATUS,ACC_TYPE
John,1001,1000012001,25000.0,Active,SB
John,1001,5212340006,45000.0,Active,FD
Franklin,1002,1000012002,31109.43,Active,SB
Franklin,1002,5212340002,25000.0,Active,FD
Alice,1003,5212340004,30000.0,Active,FD
Alice,1003,6000010002,208000.0,Active,RD
Ramesh,1004,1000012010,54674.0,Active,SB
Nancy,1005,1000012008,10235.0,Active,SB
Ahmad,1006,1000012009,73535.0,Active,SB
Ahmad,1006,5212340009,25000.0,Active,FD
