In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)

In [None]:
# Dataset: https://www.kaggle.com/datasets/teamincribo/credit-card-fraud
ccf_df = pd.read_csv("../data/credit_card_fraud.csv")

In [None]:
ccf_df.info()

In [None]:
ccf_df = pd.concat([ccf_df]*100).reset_index(drop=True)

In [None]:
ccf_df["Card Type"].unique()

In [None]:
ccf_df["Transaction Currency"].unique()

In [None]:
ccf_df.head()

# Caso de estudio
Nos piden sacar un informe con la cantidad de cada transacción quitandole las comisiones correspondientes a la tarjeta de crédito, las correspondientes a cambio de moneda y finalmente queremos expresar el resultado en Euros. A esta cantidad la llamaremos transacción neta. Para ello nos proporcionan los siguientes datos:

- Comisiones según la tarjeta:
    - MasterCard = 2%
    - Visa = 3 %
    - American Express = 5%
- Comisión por pago en moneda extranjera:
    - INR = 1%
    - USD = 0.5 %
- Tasa conversión a Euros:
    - INR/EUR = 0.011
    - USD/EUR = 0.95

## Resolución iterando el dataframe

In [None]:
for index, row in ccf_df.iterrows():
    tarjeta = row["Card Type"]
    moneda = row["Transaction Currency"]
    if tarjeta == "MasterCard":
        comision_tarjeta = 0.02
    elif tarjeta == "Visa":
        comision_tarjeta = 0.03
    elif tarjeta == "American Express":
        comision_tarjeta = 0.05
    if moneda == "INR":
        comision_moneda = 0.01
        conversion_moneda = 0.011
    elif moneda == "USD":
        comision_moneda = 0.005
        conversion_moneda = 0.95
    elif moneda == "EUR":
        comision_moneda = 0
        conversion_moneda = 1
    ccf_df.loc[index, "transaccion_neta"] = row["Transaction Amount"] * (1-comision_tarjeta-comision_moneda) * conversion_moneda

In [None]:
ccf_df["transaccion_neta"].head()

In [None]:
ccf_df = ccf_df.drop(columns={"transaccion_neta"})

## Usando Apply

In [None]:
def calcular_transaccion_neta(transaction_amount, tarjeta, moneda):
    if tarjeta == "MasterCard":
        comision_tarjeta = 0.02
    elif tarjeta == "Visa":
        comision_tarjeta = 0.03
    elif tarjeta == "American Express":
        comision_tarjeta = 0.05
    if moneda == "INR":
        comision_moneda = 0.01
        conversion_moneda = 0.011
    elif moneda == "USD":
        comision_moneda = 0.005
        conversion_moneda = 0.95
    elif moneda == "EUR":
        comision_moneda = 0
        conversion_moneda = 1
    return transaction_amount * (1-comision_tarjeta-comision_moneda) * conversion_moneda

In [None]:
ccf_df.head()

In [None]:
%%timeit
ccf_df["transaccion_neta"] = ccf_df.apply(lambda row: calcular_transaccion_neta(row["Transaction Amount"], row["Card Type"], row["Transaction Currency"]), axis=1)

In [None]:
ccf_df["transaccion_neta"].head()

In [None]:
ccf_df = ccf_df.drop(columns={"transaccion_neta"})

## Operar vectorialmente

### Generar columnas con los valores de comisión y conversión (MAP IT!)

In [None]:
comisiones_tarjeta = {"MasterCard": 0.02, "Visa": 0.03, "American Express": 0.05}
comisiones_moneda = {"INR": 0.01, "USD": 0.005, "EUR": 0}
conversiones_moneda = {"INR": 0.011, "USD": 0.95, "EUR": 1}

In [None]:
ccf_df["comisiones_tarjeta"] = ccf_df["Card Type"].map(comisiones_tarjeta)
ccf_df["comisiones_moneda"] = ccf_df["Transaction Currency"].map(comisiones_moneda)
ccf_df["conversiones_moneda"] = ccf_df["Transaction Currency"].map(conversiones_moneda)

In [None]:
ccf_df.head()

### Operadores matemáticos directamente

In [None]:
%%timeit
ccf_df["transaccion_neta"] = ccf_df["Transaction Amount"] * (1 - ccf_df["comisiones_tarjeta"] - ccf_df["comisiones_moneda"]) * ccf_df["conversiones_moneda"]

In [None]:
ccf_df["transaccion_neta"].head()

In [None]:
ccf_df = ccf_df.drop(columns={"transaccion_neta"})

### Todavía más rápido directamente con Numpy

In [None]:
%%timeit
ccf_df["transaccion_neta"] = ccf_df["Transaction Amount"].to_numpy() * (1 - ccf_df["comisiones_tarjeta"].to_numpy() - ccf_df["comisiones_moneda"].to_numpy()) * ccf_df["conversiones_moneda"].to_numpy()

In [None]:
ccf_df = ccf_df.drop(columns={"transaccion_neta"})

# Operaciones ya existentes en pandas o numpy

In [None]:
%%timeit
# min max normalizer
(ccf_df["Transaction Amount"] - ccf_df["Transaction Amount"].min()) / (ccf_df["Transaction Amount"].max() - ccf_df["Transaction Amount"].min())

In [None]:
%%timeit
ccf_df["Transaction Amount"].mean()

In [None]:
%%timeit
ccf_df["Transaction Amount"].to_numpy().mean()

# Strings

## Cuidado con las funciones de strings!

In [None]:
ccf_df["Card Expiration Date"].head()

In [None]:
ccf_df["Card Expiration Date"][0].split("/")

In [None]:
%%timeit
ccf_df["Card Expiration Date"].apply(lambda x: x.split("/"))

In [None]:
%%timeit
ccf_df["Card Expiration Date"].str.split("/")

In [None]:
%%timeit
[value.split('/') for value in ccf_df["Card Expiration Date"].tolist()]