In [1]:
# Se importan las librerías necesarias:
# pandas para el tratamiento del CSV
# decimal para trabajar con números en punto fijo (ya que el flotante causa errores)
import pandas as pd
import decimal

In [2]:
# Se especifica el fichero de fraude a leer
FRAUDE_CSV = "./fraud_log.csv"
# Y se carga
df = pd.read_csv(FRAUDE_CSV)

In [3]:
# Se cambian los tipos de datos de las columnas de fraude
df['isFraud'] = df['isFraud'].astype('bool')
df['isFlaggedFraud'] = df['isFlaggedFraud'].astype('bool')

In [4]:
# Se comprueba el tipo de datos de las columnas
for col in df.columns:
    print(col, df[col].dtype)

step int64
type object
amount float64
nameOrig object
oldbalanceOrg float64
newbalanceOrig float64
nameDest object
oldbalanceDest float64
newbalanceDest float64
isFraud bool
isFlaggedFraud bool


In [5]:
# Se imprime la cabecera para comprobar que se han cargado los datos correctamente
df.head(10)

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,False,False
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,False,False
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,True,False
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,True,False
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,False,False
5,1,PAYMENT,7817.71,C90045638,53860.0,46042.29,M573487274,0.0,0.0,False,False
6,1,PAYMENT,7107.77,C154988899,183195.0,176087.23,M408069119,0.0,0.0,False,False
7,1,PAYMENT,7861.64,C1912850431,176087.23,168225.59,M633326333,0.0,0.0,False,False
8,1,PAYMENT,4024.36,C1265012928,2671.0,0.0,M1176932104,0.0,0.0,False,False
9,1,DEBIT,5337.77,C712410124,41720.0,36382.23,C195600860,41898.0,40348.79,False,False


In [6]:
# Confirmamos que hay más de 6 millones de registros
df.count()

step              6362620
type              6362620
amount            6362620
nameOrig          6362620
oldbalanceOrg     6362620
newbalanceOrig    6362620
nameDest          6362620
oldbalanceDest    6362620
newbalanceDest    6362620
isFraud           6362620
isFlaggedFraud    6362620
dtype: int64

In [7]:
# Ahora se extraen los tipos de movimientos que hubo
df['type'].unique()

array(['PAYMENT', 'TRANSFER', 'CASH_OUT', 'DEBIT', 'CASH_IN'],
      dtype=object)

In [8]:
# Y, para cada uno de ellos, se imprime el número de pagos fradulentos
for t in df['type'].unique():
    count = len(df.loc[(df['type'] == t)].index)
    legit = len(df.loc[(df['type'] == t) & (df['isFraud'] == False) & (df['isFlaggedFraud'] == False)].index)
    fraud = count - legit
    print("%d payments for type %s (%d fraudulent)" % (count, t, fraud,))

2151495 payments for type PAYMENT (0 fraudulent)
532909 payments for type TRANSFER (4097 fraudulent)
2237500 payments for type CASH_OUT (4116 fraudulent)
41432 payments for type DEBIT (0 fraudulent)
1399284 payments for type CASH_IN (0 fraudulent)


In [9]:
# Se piden explícitamente la cantidad a reponer de las retiradas en efectivo, es decir, el tipo CASH_OUT
dff = df.loc[(df['type']) == "CASH_OUT"]
dff = dff.loc[(dff['isFraud'] == True) | (dff['isFlaggedFraud'] == True)]
# La variable dff tiene ahora todos los registros de retiradas de dinero fradulentas
dff.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,True,False
252,1,CASH_OUT,2806.0,C2101527076,2806.0,0.0,C1007251739,26202.0,0.0,True,False
681,1,CASH_OUT,20128.0,C1118430673,20128.0,0.0,C339924917,6268.0,12145.85,True,False
724,1,CASH_OUT,416001.33,C749981943,0.0,0.0,C667346055,102.0,9291619.62,True,False
970,1,CASH_OUT,1277212.77,C467632528,1277212.77,0.0,C716083600,0.0,2444985.19,True,False


In [10]:
# Como se va a trabajar con sumas de decimales, se utiliza el tipo Decimal para evitar errores de aproximación
# Se calcula la cantidad de pagos fradulentos sumando las cantidades
amount = decimal.Decimal(0)
for index, row in dff.iterrows():
    amount += decimal.Decimal(str(row['amount']))
print(amount)

5989202243.83


In [11]:
# Se definen los billetes y monedas de euro
euro = [500, 200, 100, 50, 20, 10, 5, 2, 1, 0.5, 0.2, 0.1, 0.05, 0.02, 0.01]
print(len(euro))

15


In [12]:
# Se aplica un algoritmo greedy de asignación de monedas
ac = amount
output = [0] * len(euro)
for i, coin in enumerate(euro):
    coin = decimal.Decimal(str(coin))
    # Se calcula el número de billetes a asignar
    exact = ac // coin
    output[i] = int(exact)
    # Y se resta esa cantidad asignada a la cantidad
    ac -= exact * coin

# Imprimiendo al final la solución
print("[%s]" % (",".join(str(v) for v in output),))

[11978404,1,0,0,2,0,0,1,1,1,1,1,0,1,1]
