In [6]:
import numpy as np
import pandas as pd
from warnings import filterwarnings
filterwarnings('ignore')

In [3]:
df = pd.read_csv('bankd.txt',sep='\t')

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   BankId     18 non-null     object
 1   AccountID  18 non-null     object
dtypes: object(2)
memory usage: 420.0+ bytes


In [14]:
df

Unnamed: 0,BankId,AccountID
0,A1020,S39403030
1,A1020,S30495345
2,A1020,S234234432
3,A1020,C34534543
4,A1020,C34595044
5,A1020,C2304985345
6,A1020,D934530945
7,A1020,D940404984
8,A1020,D02340494r
9,A1021,S3405935845


In [16]:
len_data = df[(df["AccountID"].str.len() >= 8) & (df["AccountID"].str.len() <= 13)]

print(len_data)

   BankId    AccountID
0   A1020    S39403030
1   A1020    S30495345
2   A1020   S234234432
3   A1020    C34534543
4   A1020    C34595044
5   A1020  C2304985345
6   A1020   D934530945
7   A1020   D940404984
8   A1020   D02340494r
9   A1021  S3405935845
10  A1021   S950504840
11  A1021    S94040958
12  A1021    C34095345
13  A1021    C08548494
14  A1021    C88594048
15  A1021     D9440021
16  A1021     D9440021


In [31]:
error_records = []
valid_records = []

valid_df = pd.DataFrame(columns=['BankID', 'AccountID'])
error_df = pd.DataFrame(columns=['BankID', 'AccountID', 'Reason'])

account_type_map = {'C': 'Credit', 'S': 'Savings', 'D': 'Deposit'}

for index, row in df.iterrows():
    bank_id = row['BankId']
    account_id = row['AccountID']
    
    if (bank_id, account_id) in valid_records:
        error_records.append((bank_id, account_id, 'Duplicate Record'))
    elif not (7 <= len(account_id[1:]) <= 12):
        error_records.append((bank_id, account_id, 'Account Length Violation'))
    elif not account_id[1:].isdigit():
        error_records.append((bank_id, account_id, 'Invalid Account ID Composition'))
    else:
        valid_records.append((bank_id, account_id))
        account_type = account_type_map.get(account_id[0], 'Unknown')
        account_no = account_id[1:]
        valid_df = valid_df.append({'BankID': bank_id, 'AccountID': account_id}, ignore_index=True)

target_df = pd.concat([valid_df['BankID'],
                       pd.DataFrame(valid_df['AccountID'].apply
                                    (lambda x: pd.Series([account_type_map.get(x[0], 'Unknown'),
                                                          int(x[1:])])))], axis=1)
target_df.columns = ['BankID', 'Account_type', 'Account_no']


In [32]:
print("Target_Account:")
print(target_df)

Target_Account:
   BankID Account_type  Account_no
0   A1020      Savings    39403030
1   A1020      Savings    30495345
2   A1020      Savings   234234432
3   A1020       Credit    34534543
4   A1020       Credit    34595044
5   A1020       Credit  2304985345
6   A1020      Deposit   934530945
7   A1020      Deposit   940404984
8   A1021      Savings  3405935845
9   A1021      Savings   950504840
10  A1021      Savings    94040958
11  A1021       Credit    34095345
12  A1021       Credit     8548494
13  A1021       Credit    88594048
14  A1021      Deposit     9440021


In [33]:
for record in error_records:
    error_df = error_df.append({'BankID': record[0], 'AccountID': record[1], 'Reason': record[2]}
                               , ignore_index=True)

print("\nError_Account:")
print(error_df)


Error_Account:
  BankID   AccountID                          Reason
0  A1020  D02340494r  Invalid Account ID Composition
1  A1021    D9440021                Duplicate Record
2  A1021     D495489        Account Length Violation
