In [402]:
import pandas as pd
import json
"This is a string that makes this line exactly seventy-nine characters long."

def load_data(file_path, sheet_name):
    data = pd.read_excel(file_path, sheet_name=sheet_name)
    return data

def extract_kyc_info(data):
    kyc_info = {}
    for i in range(0, 50, 5):
        customer_info = data.iloc[i:i+5]['KYC Information for 10 Customers:']
        customer_id = customer_info.iloc[0].split()[1].strip(':')
        name = customer_info.iloc[1].split(': ')[1]
        country = customer_info.iloc[2].split(': ')[1]
        purpose = customer_info.iloc[3].split(': ')[1]
        nature = customer_info.iloc[4].split(': ')[1]

        kyc_info[customer_id] = {
            'Customer': customer_id,
            'Name': name,
            'Country': country,
            'Purpose': purpose,
            'Nature': nature,
            'Transactions': {},
            'Number of Transactions': 0,
            'Risk Class': 'Not Classified'
        }
    return kyc_info

def append_transactions(data, kyc_info):
    for _, row in data.iterrows():
        customer_id = str(row['Customer_ID'])
        if customer_id in kyc_info:
            transaction_id = str(row['Transaction_ID'])
            transaction = {
                'Transaction_ID': row['Transaction_ID'],
                'Amount': row['Amount'],
                'Date': row['Date'].strftime('%Y-%m-%d'),
                'From/To Country': row['From/To Country'],
                'Transaction Type': row['Transaction_Type']
            }
            kyc_info[customer_id]['Transactions'][transaction_id] = transaction
            
    for customer_id in kyc_info:
        number_of_transactions = len(kyc_info[customer_id]['Transactions'])
    kyc_info[customer_id]['Number of Transactions'] = number_of_transactions

    return kyc_info


In [403]:
def classify_risk(customers):
    high_risk_countries = ['Russia', 'UAE', 'Oman']
    low_risk_countries = ['Sweden', 'Italy', 'Germany',
                          'Japan', 'France', 'Spain', 'USA']
    high_risk_purpose = ['trader', 'real estate', 'dealer', 'luxury']
    high_risk_transaction_types = ['Transfer']
    high_risk_nature = ['sporadic', 'large', 'irregular']
    
    risk_points = {}
    
    for customer_id, info in customers.items():
        risk_score = 0

        if info['Country'] in high_risk_countries:
            risk_score += 5
            
        if info['Country'] in low_risk_countries:
            risk_score -= 0.5

        if any(word in info['Purpose'].lower() for word in high_risk_purpose):
            risk_score += 2

        if any(word in info['Nature'].lower() for word in high_risk_nature):
            risk_score += 3.5

        for transaction in info['Transactions'].values():
            if transaction['Transaction Type'] in high_risk_transaction_types:
                risk_score += 0.5

        if risk_score >= 5:
            risk_class = 'High Risk'
        elif risk_score >= 4.5:
            risk_class = 'Normal Risk'
        else:
            risk_class = 'Low Risk'

        customers[customer_id]['Risk Class'] = risk_class
        risk_points[customer_id] = (risk_score, risk_class)
    
    return customers, risk_points


In [404]:
def main():
    file_path = '/Users/davidabaas/Downloads/Case data risk analyst AML 240607.xlsx'
    sheet_name = 'Sheet1'
    data = load_data(file_path, sheet_name)
    
    kyc_info = extract_kyc_info(data)
    transaction_data = data.reset_index(drop=True)
    kyc_info = append_transactions(transaction_data, kyc_info)
    
    kyc_info, risk_points = classify_risk(kyc_info)
    
    kyc_info_json = json.dumps(kyc_info, indent=4)
    print(kyc_info_json)
    
    print("\nCustomer Risk Points and Class:")
    for customer_id, (points, risk_class) in risk_points.items():
        print(f"Customer ID {customer_id}: {points} points, {risk_class}")

    return kyc_info

if __name__ == "__main__":
    kyc_info = main()

In [405]:
Customer_ID = '1007'
print(json.dumps(kyc_info[Customer_ID], indent = 4))

In [406]:
Transaction_ID = '97'
kyc_info['1007']['Transactions'][Transaction_ID]

In [407]:
def print_high_risk_customers(customers):
    print("\nHigh Risk Customers:")
    for customer_id, info in customers.items():
        if info['Risk Class'] == 'High Risk':
            print(f"Customer ID {customer_id}")
            
print_high_risk_customers(kyc_info)