# Challenge

Another approach to identifying fraudulent transactions is to look for outliers in the data. Standard deviation or quartiles are often used to detect outliers. Using this starter notebook, code two Python functions:

* One that uses standard deviation to identify anomalies for any cardholder.

* Another that uses interquartile range to identify anomalies for any cardholder.

## Identifying Outliers using Standard Deviation

In [31]:
# Initial imports
import pandas as pd
import numpy as np
import random
from sqlalchemy import create_engine

In [32]:
# Create a connection to the database
engine = create_engine("postgresql://yourname:testpass123@localhost:5432/fraud_detection")

In [33]:
# Write function that locates outliers using standard deviation
def outlier_finder(cardholder_id):
    query1 = f"""
        SELECT * 
        FROM transaction
        WHERE card IN
        (
        SELECT card
        FROM credit_card
        WHERE (cardholder_id = {cardholder_id})
        )  
        ;
    """

    # Creates dataframe
    cardholder_transaction_df = pd.read_sql(query1, engine)


    # Save average_transaction in variable
    average_transaction = cardholder_transaction_df['amount'].mean()
    # Save standard deviation in a variable
    cardholder_std = cardholder_transaction_df['amount'].std()
    # Find upper and lower limit
    upper_limit = average_transaction + 2 * cardholder_std
    lower_limit = average_transaction - 2 * cardholder_std

    # Define an outlier function
    def outlier(amount):
        if amount < lower_limit or amount > upper_limit:
            return "possible fraud"
        else:
            return "normal"
            
    # Apply the outlier function to the dataframe
    cardholder_transaction_df['outlier'] = cardholder_transaction_df['amount'].apply(outlier)


    # Returns all transactions that have been flagged as 'possible fraud'
    return cardholder_transaction_df[cardholder_transaction_df['outlier']=='possible fraud']


outlier_finder(8)

Unnamed: 0,id,date,amount,card,id_merchant,outlier
19,2573,2018-03-08 20:11:49,20.71,30063281385429,19,possible fraud
107,3132,2018-12-05 13:07:50,20.29,4834483169177062,84,possible fraud
112,3000,2018-12-08 18:32:01,21.61,30063281385429,38,possible fraud


In [34]:
outlier_finder(18)

Unnamed: 0,id,date,amount,card,id_merchant,outlier
18,3098,2018-02-19 22:48:25,1839.0,344119623920892,95,possible fraud
34,1359,2018-04-03 03:23:37,1077.0,344119623920892,100,possible fraud
49,3139,2018-06-03 20:02:28,1814.0,344119623920892,123,possible fraud
71,136,2018-07-18 09:19:08,974.0,344119623920892,19,possible fraud
90,1431,2018-09-10 22:49:41,1176.0,344119623920892,72,possible fraud
117,3252,2018-11-17 05:30:43,1769.0,344119623920892,18,possible fraud
123,1326,2018-12-13 12:09:58,1154.0,344119623920892,8,possible fraud


cardholder_transaction_df['amount'].dtypes

In [36]:
# Find anomalous transactions for 3 random card holders

# Selects random card holders from a list
# Initializes the list
all_card_hodlers = list(range(1,26))
# Selects first random cardholder then removes them from the list to prevent duplicate selection
rand_card_holder_1 = random.choice(all_card_hodlers)
all_card_hodlers.remove(rand_card_holder_1)
# Selects second
rand_card_holder_2 = random.choice(all_card_hodlers)
all_card_hodlers.remove(rand_card_holder_2)
# Selects third
rand_card_holder_3 = random.choice(all_card_hodlers)

# Finds and displays potentially fraudulent transactions for first random card holder
outlier_finder(rand_card_holder_1)

Unnamed: 0,id,date,amount,card,id_merchant,outlier
92,3051,2018-06-21 22:11:26,20.65,4150721559116778,42,possible fraud


In [37]:
# Finds and displays potentially fraudulent transactions for second random card holder
outlier_finder(rand_card_holder_2)

Unnamed: 0,id,date,amount,card,id_merchant,outlier
8,2749,2018-03-12 05:29:57,22.49,6011987562414062,147,possible fraud


In [38]:
# Finds and displays potentially fraudulent transactions for third random card holder
outlier_finder(rand_card_holder_3)

Unnamed: 0,id,date,amount,card,id_merchant,outlier


## Identifying Outliers Using Interquartile Range

In [39]:
# Write a function that locates outliers using interquartile range
"""
split into 4 groups of %25
"""

def iqr_outlier_finder(cardholder_id):
    query1 = """
        SELECT * 
        FROM transaction
        WHERE card IN
        (
        SELECT card
        FROM credit_card
        WHERE (cardholder_id = 18)
        )  
        ;
    """

    # Creates the dataframe
    cardholder_transaction_iqr_df = pd.read_sql(query1, engine)

    # Saves the relavent quartile variables
    Q1 = cardholder_transaction_iqr_df.quantile(0.25)
    Q3 = cardholder_transaction_iqr_df.quantile(0.75)
    IQR = Q3 - Q1

    # Sets the lower and upper limits
    lower_limit = Q1.amount - (1.5 * IQR.amount)
    upper_limit = Q3.amount + (1.5 * IQR.amount)

    # Defines the outlier() function
    def outlier(amount):
        if ((amount > upper_limit) or (amount < lower_limit)):
            return "possible fraud"
        else:
            return "normal"

    # Applys the outlier function ot each transaction of the dataframe
    cardholder_transaction_iqr_df['outlier'] = cardholder_transaction_iqr_df['amount'].apply(outlier)

    # Saves the flagged transactions and returns them
    possible_fraud = cardholder_transaction_iqr_df[cardholder_transaction_iqr_df['outlier']=='possible fraud']
    return possible_fraud

In [40]:
# Find anomalous transactions for 3 random card holders

# Selects random card holders from a list
# Initializes the list
all_card_hodlers = list(range(1,26))
# Selects first random cardholder then removes them from the list to prevent duplicate selection
rand_card_holder_1 = random.choice(all_card_hodlers)
all_card_hodlers.remove(rand_card_holder_1)
# Selects second
rand_card_holder_2 = random.choice(all_card_hodlers)
all_card_hodlers.remove(rand_card_holder_2)
# Selects third
rand_card_holder_3 = random.choice(all_card_hodlers)

# Finds and displays potentially fraudulent transactions for first random card holder
iqr_outlier_finder(rand_card_holder_1)

Unnamed: 0,id,date,amount,card,id_merchant,outlier
2,3457,2018-01-07 01:10:54,175.0,344119623920892,12,possible fraud
3,812,2018-01-08 11:15:36,333.0,344119623920892,95,possible fraud
18,3098,2018-02-19 22:48:25,1839.0,344119623920892,95,possible fraud
34,1359,2018-04-03 03:23:37,1077.0,344119623920892,100,possible fraud
49,3139,2018-06-03 20:02:28,1814.0,344119623920892,123,possible fraud
62,654,2018-06-30 01:56:19,121.0,344119623920892,20,possible fraud
66,560,2018-07-06 16:12:08,117.0,344119623920892,62,possible fraud
71,136,2018-07-18 09:19:08,974.0,344119623920892,19,possible fraud
87,2103,2018-09-02 11:20:42,458.0,344119623920892,10,possible fraud
90,1431,2018-09-10 22:49:41,1176.0,344119623920892,72,possible fraud


In [41]:
# Finds and displays potentially fraudulent transactions for second random card holder
iqr_outlier_finder(rand_card_holder_2)

Unnamed: 0,id,date,amount,card,id_merchant,outlier
2,3457,2018-01-07 01:10:54,175.0,344119623920892,12,possible fraud
3,812,2018-01-08 11:15:36,333.0,344119623920892,95,possible fraud
18,3098,2018-02-19 22:48:25,1839.0,344119623920892,95,possible fraud
34,1359,2018-04-03 03:23:37,1077.0,344119623920892,100,possible fraud
49,3139,2018-06-03 20:02:28,1814.0,344119623920892,123,possible fraud
62,654,2018-06-30 01:56:19,121.0,344119623920892,20,possible fraud
66,560,2018-07-06 16:12:08,117.0,344119623920892,62,possible fraud
71,136,2018-07-18 09:19:08,974.0,344119623920892,19,possible fraud
87,2103,2018-09-02 11:20:42,458.0,344119623920892,10,possible fraud
90,1431,2018-09-10 22:49:41,1176.0,344119623920892,72,possible fraud


In [42]:
# Finds and displays potentially fraudulent transactions for third random card holder
iqr_outlier_finder(rand_card_holder_3)

Unnamed: 0,id,date,amount,card,id_merchant,outlier
2,3457,2018-01-07 01:10:54,175.0,344119623920892,12,possible fraud
3,812,2018-01-08 11:15:36,333.0,344119623920892,95,possible fraud
18,3098,2018-02-19 22:48:25,1839.0,344119623920892,95,possible fraud
34,1359,2018-04-03 03:23:37,1077.0,344119623920892,100,possible fraud
49,3139,2018-06-03 20:02:28,1814.0,344119623920892,123,possible fraud
62,654,2018-06-30 01:56:19,121.0,344119623920892,20,possible fraud
66,560,2018-07-06 16:12:08,117.0,344119623920892,62,possible fraud
71,136,2018-07-18 09:19:08,974.0,344119623920892,19,possible fraud
87,2103,2018-09-02 11:20:42,458.0,344119623920892,10,possible fraud
90,1431,2018-09-10 22:49:41,1176.0,344119623920892,72,possible fraud
