## Identify card and loan defaulters

In [10]:
from datetime import datetime

import apache_beam as beam
import apache_beam.runners.interactive.interactive_beam as ib
from apache_beam.runners.interactive.interactive_runner import InteractiveRunner
p = beam.Pipeline(InteractiveRunner())

In [11]:
def calculate_points(element):
    ## [CT28383,Miyako,Burns,R_7488,Issuers,500,490,38,101,30-01-2018]
    (customer_id, 
     first_name, 
     last_name, 
     realtionship_id, 
     card_type, 
     max_limit, 
     spent, 
     cash_withdrawn,
     payment_cleared,
     payment_date) = element.split(',')
    ## assign values
    spent, payment_cleared, max_limit = int(spent), int(payment_cleared), int(max_limit)
    key_name = ", ".join([customer_id, first_name, last_name])
    defaulter_points = 0
    # 1. payment_cleared is less than 70% of spent
    if payment_cleared < spent * 0.7:
        defaulter_points += 1
    # 2. spent is 100% of max_limit and any amount of payment is pending
    if spent == max_limit and payment_cleared < spent:
        defaulter_points += 1
    # 3. spent is 100% of max_limit and payment_cleared is less than 70% of spent
    if spent == max_limit and payment_cleared < spent * 0.7:
        defaulter_points += 1
    return key_name, defaulter_points

def calculate_late_payment(elements):
  due_date = datetime.strptime(elements[6].rstrip().lstrip(), '%d-%m-%Y')
  payment_date = datetime.strptime(elements[8].rstrip().lstrip(), '%d-%m-%Y')
  if payment_date <= due_date:
    elements.append('0') 
  else:
    elements.append('1')    
  return elements

def format_output(sum_pair):
  key_name, miss_months = sum_pair
  return str(key_name) + ', ' + str(miss_months) + ' missed'

def calculate_month(input_list):        #input  [CT88330,Humberto,Banks,Serviceman,LN_1559,Medical Loan,26-01-2018, 2000, 30-01-2018]
                                       
  # Convert payment_date to datetime and extract month of payment
  payment_date = datetime.strptime(input_list[8].rstrip().lstrip(), '%d-%m-%Y')  # payment_date = 30-01-2018
  input_list.append(str(payment_date.month))                                     # [CT88330,Humberto,Banks,Serviceman,LN_1559,Medical Loan,26-01-2018, 2000, 30-01-2018, 01]
  
  return input_list 

def calculate_personal_loan_defaulter(input):
    max_allowed_missed_months = 4
    max_allowed_consecutive_missing = 2    
    name, months_list = input
    months_list.sort()
    sorted_months = months_list
    total_payments = len(sorted_months)
    missed_payments = 12 - total_payments
    if missed_payments > max_allowed_missed_months:
       return name, missed_payments
    consecutive_missed_months = 0
    temp = sorted_months[0] - 1
    if temp > consecutive_missed_months:
        consecutive_missed_months = temp
    temp = 12 - sorted_months[total_payments-1]                  
    if temp > consecutive_missed_months:
        consecutive_missed_months = temp
    for i in range(1, len(sorted_months)):
        temp = sorted_months[i] - sorted_months[i-1] - 1
        if temp > consecutive_missed_months:
            consecutive_missed_months = temp
    if consecutive_missed_months > max_allowed_consecutive_missing:
       return name, consecutive_missed_months
    return name, 0 

def return_tuple(element):
  thisTuple=element.split(',')
  return (thisTuple[0],thisTuple[1:])  

In [12]:
card_defaulter = (
    p
    | "Read credit card data" >> beam.io.ReadFromText("bank/cards.txt", skip_header_lines=1)
    | "Calculate defaulter points" >> beam.Map(calculate_points)
    | "Combine points for defaulters" >> beam.CombinePerKey(sum)
    | "Filter card defaulters" >> beam.Filter(lambda e: e[1] > 0)
)

input_collection = (
    p
    | "Read loan file" >> beam.io.ReadFromText("bank/loan.txt",skip_header_lines=1)
    | 'Split Row' >> beam.Map(lambda row : row.split(','))
)

medical_loan_defaulter = (
    input_collection
    | 'Filter medical loan' >> beam.Filter(lambda element : (element[5]).rstrip().lstrip() == 'Medical Loan')
    | 'Calculate late payment' >> beam.Map(calculate_late_payment)
    | 'Make key value pairs' >> beam.Map(lambda elements: (elements[0] + ', ' + elements[1]+' '+elements[2], int(elements[9])) ) 
    | 'Group medical loan based on month' >> beam.CombinePerKey(sum)
    | 'Check for medical loan defaulter' >> beam.Filter(lambda element: element[1] >= 3)
    | 'Format medical loan output' >> beam.Map(format_output)
    )

personal_loan_defaulter = (
    input_collection
    | 'Filter personal loan' >> beam.Filter(lambda element : (element[5]).rstrip().lstrip() == 'Personal Loan')
    | 'Split and Append New Month Column' >> beam.Map(calculate_month)   
    | 'Make key value pairs loan' >> beam.Map(lambda elements: (elements[0] + ', ' + elements[1]+' '+elements[2], int(elements[9])) ) 
    | 'Group personal loan based on month' >> beam.GroupByKey()
    | 'Check for personal loan defaulter' >> beam.Map(calculate_personal_loan_defaulter)
    | 'Filter only personal loan defaulters' >> beam.Filter(lambda element: element[1] > 0)
    | 'Format personal loan output' >> beam.Map(format_output)
  )

final_loan_defaulters = (
      ( personal_loan_defaulter, medical_loan_defaulter )
      | 'Combine all defaulters' >> beam.Flatten()
      | 'tuple for loan' >> beam.Map(return_tuple)
    )  
                        
both_defaulters =  (
    {'card_defaulter': card_defaulter, 'loan_defaulter': final_loan_defaulters}
    | beam.CoGroupByKey()
   )

ib.show(both_defaulters)