In [0]:
# File location and type for 'txn' table
file_location = "/FileStore/tables/txn.csv"  
file_type = "csv"

# CSV options
infer_schema = "false"
first_row_is_header = "true"
delimiter = ","

# Read the CSV file
txn_df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("header", first_row_is_header) \
  .option("sep", delimiter) \
  .load(file_location)

# Display the DataFrame to verify
display(txn_df)

# Create a temporary SQL view for 'txn'
txn_df.createOrReplaceTempView("txn")

Account No,TRANSACTION DETAILS,VALUE DATE,WITHDRAWAL AMT,DEPOSIT AMT,BALANCE AMT
409000611074',TRF FROM Indiaforensic SERVICES,29-Jun-17,,1000000.0,1000000.0
409000611074',TRF FROM Indiaforensic SERVICES,5-Jul-17,,1000000.0,2000000.0
409000611074',FDRL/INTERNAL FUND TRANSFE,18-Jul-17,,500000.0,2500000.0
409000611074',TRF FRM Indiaforensic SERVICES,1-Aug-17,,3000000.0,5500000.0
409000611074',FDRL/INTERNAL FUND TRANSFE,16-Aug-17,,500000.0,6000000.0
409000611074',FDRL/INTERNAL FUND TRANSFE,16-Aug-17,,500000.0,6500000.0
409000611074',FDRL/INTERNAL FUND TRANSFE,16-Aug-17,,500000.0,7000000.0
409000611074',FDRL/INTERNAL FUND TRANSFE,16-Aug-17,,500000.0,7500000.0
409000611074',FDRL/INTERNAL FUND TRANSFE,16-Aug-17,,500000.0,8000000.0
409000611074',FDRL/INTERNAL FUND TRANSFE,16-Aug-17,,500000.0,8500000.0


In [0]:
# 1. Maximum Withdrawal Amount in Transactions
max_withdrawal_amount = txn_df.agg({" WITHDRAWAL AMT ": "max"}).collect()[0][0]
print("Maximum withdrawal amount in transactions:", max_withdrawal_amount)

Maximum withdrawal amount in transactions: 9999


In [0]:
# 2. Minimum Withdrawal Amount of an Account
min_withdrawal_amount = txn_df.agg({" WITHDRAWAL AMT ": "min"}).collect()[0][0]
print("Minimum withdrawal amount of an account:", min_withdrawal_amount)

Minimum withdrawal amount of an account: 0.01


In [0]:
# 3. Maximum Deposit Amount of an Account
max_deposit_amount = txn_df.agg({" DEPOSIT AMT ": "max"}).collect()[0][0]
print("Maximum deposit amount of an account:", max_deposit_amount)

Maximum deposit amount of an account: 9999999


In [0]:
# 4. Minimum Deposit Amount of an Account
min_deposit_amount = txn_df.agg({" DEPOSIT AMT ": "min"}).collect()[0][0]
print("Minimum deposit amount of an account:", min_deposit_amount)

Minimum deposit amount of an account: 0.01


In [0]:
# 6. Number of transactions on each date
transactions_by_date = txn_df.groupBy("VALUE DATE").count()
transactions_by_date.show()

+----------+-----+
|VALUE DATE|count|
+----------+-----+
| 23-Dec-16|  143|
|  7-Feb-19|   98|
| 21-Jul-15|   80|
|  9-Sep-15|   91|
| 17-Jan-15|   16|
| 18-Nov-17|   53|
| 21-Feb-18|   77|
| 20-Mar-18|   71|
| 19-Apr-18|   71|
| 21-Jun-16|   97|
| 17-Oct-17|  101|
|  3-Jan-18|   70|
|  8-Jun-18|  223|
| 15-Dec-18|   62|
|  8-Aug-16|   97|
| 17-Dec-16|   74|
|  3-Sep-15|   83|
| 21-Jan-16|   76|
|  4-May-18|   92|
|  7-Sep-17|   94|
+----------+-----+
only showing top 20 rows



In [0]:
# 7. List of customers with withdrawal amount more than 1 lakh
customers_with_high_withdrawal = txn_df.filter(txn_df[" WITHDRAWAL AMT "] > 100000).select("Account No").distinct()
customers_with_high_withdrawal.show()


+-------------+
|   Account No|
+-------------+
|409000438611'|
|     1196711'|
|     1196428'|
|409000493210'|
|409000611074'|
|409000425051'|
|409000405747'|
|409000493201'|
|409000438620'|
|409000362497'|
+-------------+

