**Dataset Module**

In [None]:
def retrieve_data(file_path):
    # open file in read mode
    file = open(file_path, "r")
    transactions = []

    for line in file.readlines():
        fields = line.strip().split(":")
        transaction = {
            "user_id": int(fields[0]),
            "transaction_id": fields[1],
            "description": fields[2],
            "amount": float(fields[3]),
            "x_coordinate": float(fields[4]),
            "y_coordinate": float(fields[5]),
            "is_fraudulent": bool(fields[6] == 'true')
        }

        transactions.append(transaction)
    file.close()

    return transactions


**Distance Module**

In [None]:
def user_transaction_distance(trans1_id: str, trans2_id: str, transactions: list) -> float:
    """
    This function computes the distance between two given transactions of a user
    :param trans1_id: the first transaction id
    :param trans2_id: the second transaction id
    :param transactions: the list of all the users transactions
    :return: teh distance between two transactions
    """
    trans1 = [transaction for transaction in transactions if transaction["transaction_id"] == trans1_id]
    trans2 = [transaction for transaction in transactions if transaction["transaction_id"] == trans2_id]

    # Check for invalid transaction id
    if len(trans1) == 0 or len(trans2) == 0:
        return -2

    trans1 = trans1[0]
    trans2 = trans2[0]
    if trans1["user_id"] == trans2["user_id"]:
        return ((trans1["x_coordinate"] - trans2["x_coordinate"]) ** 2 + (
                trans1["y_coordinate"] - trans2["y_coordinate"]) ** 2) ** 0.5
    else:
        return -1  # both transaction is not of the same user


In [None]:
def transaction_distance(trans1_id: str, trans2_id: str, transactions: list) -> float:
    """
    This function computes the distance of transactions of any two users
    :param trans1_id: the first user transaction id
    :param trans2_id: the second user transaction id
    :param transactions: the list of all the users transactions
    :return: the distance between two transactions
    """
    trans1 = [transaction for transaction in transactions if transaction["transaction_id"] == trans1_id]
    trans2 = [transaction for transaction in transactions if transaction["transaction_id"] == trans2_id]

    # Check for invalid transaction id
    if len(trans1) == 0 or len(trans2) == 0:
        return -2

    # Retrieve the transaction from the list
    trans1 = trans1[0]
    trans2 = trans2[0]
    if trans1["user_id"] != trans2["user_id"]:
        return ((trans1["x_coordinate"] - trans2["x_coordinate"]) ** 2 + (
                trans1["y_coordinate"] - trans2["y_coordinate"]) ** 2) ** 0.5
    else:
        return -1  # both transaction is of the same user

**Statistics Module**

In [None]:
def avrg_transaction(user_id: int = -1, transactions=None, for_all: bool = False) -> float:
    """
    :param user_id: The user id
    :param transactions: the list of all users transaction
    :param for_all: is the computation for a user or all users
    :return: the average transactions of any user and of all users
    """
    if transactions is None:
        transactions = {}
    if for_all:
        # calculate the average amount for all transaction
        all_trans_amt = [transaction["amount"] for transaction in transactions]
        # return the average amount of all users
        return sum(all_trans_amt) / len(all_trans_amt)
    else:
        # calculate the average amount of a user's transactions
        user_trans_amt = [transaction["amount"] for transaction in transactions if transaction["user_id"] == user_id]

        # Check if the user entered a valid user_id
        if len(user_trans_amt) == 0:
            return -1
        # return the user average amount
        return sum(user_trans_amt) / len(user_trans_amt)


def transaction_mode(user_id: int = -1, transactions=None, for_all: bool = False):
    """
    :param user_id: The user id
    :param transactions: the list of all users transaction
    :param for_all: is the computation for a user or all users
    :return: the mode of transactions of any user and of all users
    """
    if transactions is None:
        transactions = {}
    if for_all:
        # calculate the mode of all transactions
        return custom_mode([transaction["amount"] for transaction in transactions])
    else:
        # calculate the mode of a user's transactions
        user_trans_amt = [transaction["amount"] for transaction in transactions if transaction["user_id"] == user_id]

        # Check if the user entered a valid user_id
        if len(user_trans_amt) == 0:
            return -1
        print(user_trans_amt)
        # return the user transaction mode
        return custom_mode(user_trans_amt)


def transaction_median(user_id: int = -1, transactions=None, for_all: bool = False):
    """
    :param user_id: The user id
    :param transactions: the list of all users transaction
    :param for_all: is the computation for a user or all users
    :return: the median of all transactions of a user and of all users
    """
    if transactions is None:
        transactions = {}
    if for_all:
        # calculate the mode of all transactions
        return custom_median([transaction["amount"] for transaction in transactions])
    else:
        # calculate the median of a user's transactions
        user_trans_amt = [transaction["amount"] for transaction in transactions if transaction["user_id"] == user_id]

        # Check if the user entered a valid user_id
        if len(user_trans_amt) == 0:
            return -1, -1
        # return the user transaction median
        return custom_median(user_trans_amt)


def interquartile_range(user_id: int = -1, transactions=None, for_all: bool = False):
    """
    :param user_id: The user id
    :param transactions: the list of all users transaction
    :param for_all: is the computation for a user or all users
    :return: the interquartile range of any user's transaction and of all users
    """
    if transactions is None:
        transactions = {}
    if for_all:
        # calculate the interquartile range of all transactions
        return custom_interquartile_range([transaction["amount"] for transaction in transactions])
    else:
        # calculate the interquartile range of a user's transactions
        user_trans_amt = [transaction["amount"] for transaction in transactions if transaction["user_id"] == user_id]

        # Check if the user entered a valid user_id
        if len(user_trans_amt) == 0:
            return -1, -1
        # return the user interquartile range
        return custom_interquartile_range(user_trans_amt)


def user_location_centroid(user_id: int, transactions: list):
    """
    :param user_id: The user id
    :param transactions: the list of all users transaction
    :return: the location centroid of any user based on their transaction locations
    """
    # Compute the centroid of any user
    all_x = [transaction["x_coordinate"] for transaction in transactions if transaction["user_id"] == user_id]
    all_y = [transaction["y_coordinate"] for transaction in transactions if transaction["user_id"] == user_id]

    # Check if the user entered a valid user_id
    if len(all_x) == 0:
        return -1, -1

    # returns the x_coordinate, y_coordinate
    return (sum(all_x) / len(all_x)), (sum(all_y) / len(all_y))


def transaction_standard_deviation(trans_amt: list):
    """
    :param trans_amt: the list of all transaction amount
    :return: the standard deviation for any specific user's transaction
    """
    # Calculate the mean of the transactions
    mean = sum(trans_amt) / len(trans_amt)
    # Calculate the variance
    variance = sum([(x - mean) ** 2 for x in trans_amt]) / len(trans_amt)
    return variance ** 0.5


def is_fraudulent(transaction_id: str, transactions: list):
    """
    This function determines whether a function is fraudulent of not. It also provides details of the transaction.
    :param transaction_id: the transaction id
    :param transactions: the list of all the user's transactions
    """
    transaction = [transaction for transaction in transactions if transaction["transaction_id"] == transaction_id]
    if len(transaction) == 0:
        return "Invalid transaction id"
    transaction = transaction[0]
    if transaction["is_fraudulent"]:
        return f"This transaction is fraudulent. Below are the details \
            \nUser ID: {transaction['user_id']}, \
            \nDescription: {transaction['description']}, \
            \nAmount: {transaction['amount']}, \
            \nX_coordinate: {transaction['x_coordinate']}, \
            \nY_coordinate: {transaction['y_coordinate']},"
    else:
        return f"This transaction is not fraudulent. Below are the details \
            \nUser ID: {transaction['user_id']}, \
            \nDescription: {transaction['description']}, \
            \nAmount: {transaction['amount']}, \
            \nX_coordinate: {transaction['x_coordinate']}, \
            \nY_coordinate: {transaction['y_coordinate']},"


def abnormal_transaction(user_id: int, transactions: list):
    """
    :param user_id: the user id
    :param transactions: the list of user's transactions
    :return: the abnormal transactions for a given user
    """
    abnormal_transactions = []

    # retrieve the transaction description of the user
    user_transactions = [transaction for transaction in transactions if transaction["user_id"] == user_id]

    # Check if the user entered a valid user_id
    if len(user_transactions) == 0:
        return -1

    # retrieve the transaction description of the user
    descriptions = [transaction["description"] for transaction in user_transactions]
    with open("fraud-description.txt", "r") as f_des:
        non_fraudulent = [line.strip() for line in f_des.readlines()]
        for i in range(len(descriptions)):
            if descriptions[i] in non_fraudulent:
                transaction = user_transactions[i]
                # Check if the fraudulent description has a false value for is_fraudulent
                if not transaction["is_fraudulent"]:
                    abnormal_transactions.append(transaction)
    with open("description.txt", "r") as des:
        non_fraudulent = [line.strip() for line in des.readlines()]
        for i in range(len(descriptions)):
            if descriptions[i] in non_fraudulent:
                transaction = user_transactions[i]
                # Check if the description has a true value for is_fraudulent
                if transaction["is_fraudulent"]:
                    abnormal_transactions.append(transaction)

    return abnormal_transactions


def compute_z_score(transaction_id: str = "", user_id: int = -1, transactions: list = None, for_all: bool = False):
    """
    :param transaction_id: the transaction id of the user.
    :param user_id: the user id.
    :param transactions: the list of user's transactions.
    :param for_all: is the computation for a user or all users.
    :return: the Z-score of any user's transactions and for all users transactions.
    """
    if transactions is None:
        transactions = {}

    if for_all:
        # Get the index of the transaction id
        trans_index = -1
        for i, d in enumerate(transactions):
            if d["transaction_id"] == transaction_id:
                trans_index = i
        if trans_index == -1:
            return '-1'  # return str(-1) since the transaction id was not found

        # Calculate the mean and standard deviation of all transactions
        all_trans_amt = [transaction["amount"] for transaction in transactions]
        all_mean = sum(all_trans_amt) / len(all_trans_amt)
        all_std_dev = transaction_standard_deviation(all_trans_amt)
        # Calculate and return the Z-score
        return round((all_trans_amt[trans_index] - all_mean) / all_std_dev, 2)
    else:
        user_trans = [transaction for transaction in transactions if transaction["user_id"] == user_id]

        # Get the index of the transaction id
        trans_index = -1
        for i, d in enumerate(user_trans):
            if d["transaction_id"] == transaction_id:
                trans_index = i
        if trans_index == -1:
            return '-1'  # return str(-1) since the transaction id was not found

        # Calculate the mean and standard deviation of the user's transactions
        user_trans_amt = [transaction["amount"] for transaction in user_trans]
        user_mean = sum(user_trans_amt) / len(user_trans_amt)
        user_std_dev = transaction_standard_deviation(user_trans_amt)

        # Calculate and return the Z-score of a transaction of the user
        return round((user_trans_amt[trans_index] - user_mean) / user_std_dev, 2)


def compute_frequency(x_cord, y_cord, transactions: dict):
    """
    :param x_cord: the x coordinate of the location
    :param y_cord: the y coordinate of the location
    :param transactions: the list of user's transactions
    :return: the frequencies of transactions at any given location
    """
    # Count the number of transactions at the x coordinate
    freq = sum([1 for t in transactions if t['x_coordinate'] == float(x_cord) and t['y_coordinate'] == float(y_cord)])
    return freq


def get_outlier(user_id: int = -1, transactions=None, threshold=3, for_all: bool = False):
    """
    :param for_all: is the computation for all locations
    :param threshold: the z-score threshold for outlier detection (default=3).
    :param user_id: the user id
    :param transactions: the list of user's transactions
    :return: a list of outlier of any location and of any user
    """
    if transactions is None:
        transactions = []
    if not for_all:
        transactions = [transaction for transaction in transactions if transaction["user_id"] == user_id]

    x_cords = [transaction['x_coordinate'] for transaction in transactions]
    y_cords = [transaction['y_coordinate'] for transaction in transactions]

    x_mean = sum(x_cords) / len(x_cords)
    y_mean = sum(y_cords) / len(y_cords)
    x_std_dev = transaction_standard_deviation(x_cords)
    y_std_dev = transaction_standard_deviation(x_cords)

    z_scores = [((x_cords[i] - x_mean) / x_std_dev) + ((y_cords[i] - y_mean) / y_std_dev) for i in range(len(x_cords))]
    return [{'x_coordinate': x_cords[i], 'y_coordinate': y_cords[i]} for i in range(len(transactions)) if
            z_scores[i] > threshold]


def get_percentiles(percentile: int, user_id: int = -1, transactions=None, for_all: bool = False):
    """
    :param percentile: the nth percentile value (0 to 100%)
    :param user_id: the user id
    :param transactions: the list of user's transactions
    :param for_all: is the computation for a user or all users
    :return: the nth percentile of transactions of any user and of all users
    """
    if transactions is None:
        transactions = {}
    # Check if percentile is within 0 to 100
    if not (0 <= percentile <= 100):
        return -1

    if for_all:
        # get the amount of all users
        all_trans_amt = [transaction["amount"] for transaction in transactions]
        # Sort the transactions in ascending order
        all_trans_amt.sort()
        # Calculate the index of the nth percentile
        all_index = int(len(all_trans_amt) * percentile / 100)
        # return the nth percentile for all users
        return all_trans_amt[all_index]
    else:
        # get the amount of a given user
        user_trans_amt = [transaction["amount"] for transaction in transactions if transaction["user_id"] == user_id]
        # Sort the transactions in ascending order
        user_trans_amt.sort()
        # Calculate the index of the nth percentile
        user_index = int(len(user_trans_amt) * percentile / 100)
        # Return the nth percentile value
        return user_trans_amt[user_index]


def custom_mode(items: list):
    freq = {}
    for item in items:
        freq[item] = freq.get(item, 0) + 1

    # Find the value(s) with the highest frequency
    max_count = max(freq.values())
    modes = [k for k, v in freq.items() if v == max_count]

    if len(modes) == 1:
        return modes[0]  # Only one mode
    elif len(modes) == len(items):
        return []
    else:
        return modes


def custom_median(items: list) -> float:
    items.sort()
    mid = len(items) / 2
    real_mid = int(mid)
    if mid == real_mid:
        median = (items[real_mid - 1] + items[real_mid]) / 2
    else:
        median = items[real_mid]
    return median


def custom_interquartile_range(items: list) -> float:
    items.sort()
    median = custom_median(items)
    if median in items:
        lower_quarter = items[:items.index(median)]
        upper_quarter = items[items.index(median) + 1:]
    else:
        half = len(items) // 2
        lower_quarter = items[:half]
        upper_quarter = items[half:]
    q1 = custom_median(lower_quarter)
    q3 = custom_median(upper_quarter)
    return q3 - q1


**Main Module (Test Module)**

In [None]:
print(f"Ensure to copy and paste the Transaction.txt, fraud-description.txt and description.txt in this location: ")
!pwd

Ensure to copy and paste the Transaction.txt, fraud-description.txt and description.txt in this location: 
/content


In [None]:
# import dataset_module
# import distance_module
# import statistic_module

if __name__ == '__main__':
    # get all the transactions data
    transactions = retrieve_data("Transaction.txt")

    print("Hello there, Welcome")
    while True:
        try:
            user_input = input("Enter 1 for distance computations\nEnter 2 for statistics computation\nEnter q to quit: ")
            if user_input == '1':
                print("Enter a command to perform either of the task below")
                print("A: to compute distance between any two given transactions of a user \
                      \nB: to compute distance of transactions of any two users: ")
                user_input = input()
                if user_input == 'A':
                    first_transaction_id = input("Enter the first transaction id of the user: ")
                    second_transaction_id = input("Enter the second transaction id of the same user: ")
                    distance = user_transaction_distance(first_transaction_id, second_transaction_id,
                                                                         transactions)
                    if distance == -1:
                        print("Please enter transaction ids of the same user")
                    elif distance == -2:
                        print("Transaction id not found! try again")
                    else:
                        print("The distance is %.2f" % distance)

                elif user_input == 'B':
                    first_transaction_id = input("Enter the transaction id of the first user: ")
                    second_transaction_id = input("Enter the transaction id of the second user: ")
                    distance = transaction_distance(first_transaction_id, second_transaction_id,
                                                                    transactions)
                    if distance == -1:
                        print("Please enter transaction ids of different users")
                    elif distance == -2:
                        print("Transaction id not found! try again")
                    else:
                        print("The distance is %.2f" % distance)
                else:
                    print("Invalid input. try again")
            elif user_input == '2':
                print("Enter a command to perform either of the task below \
                      \nA: to compute the average transaction of any user and of all user \
                      \nB: to compute the mode of transaction of any user and of all user \
                      \nC: to compute the median of all transaction of any user and of all user \
                      \nD: to compute the interquartile range of any user's transaction and of all user \
                      \nE: to compute the location centroid of any user based on their transaction locations \
                      \nF: to compute the standard deviation of any specific user's transaction \
                      \nG: to determine whether a transaction is fraudulent or not, and the details of the transaction \
                      \nH: to retrieve the abnormal transaction for a given user \
                      \nI: to compute the Z-score of any user's transaction and of all users  \
                      \nJ: to retrieve the frequencies of transactions at a given location \
                      \nK: to retrieve the outlier of any location and of any user \
                      \nL: to compute the percentile of transactions of any user and of all user ")
                user_input = input(": ")
                if user_input == 'A':
                    user_input = input("Enter 1 to compute for any user\nEnter 2 to compute for all users: ")
                    if user_input == '1':
                        user_id = int(input("Enter the user id: "))
                        avrg = avrg_transaction(user_id, transactions)
                        if avrg != -1:
                            print(f"Average transaction of user with id: {user_id} is {round(avrg, 2)}")
                        else:
                            print("User id not found, please try again")
                    elif user_input == '2':
                        avrg = avrg_transaction(transactions=transactions, for_all=True)
                        print(f"Average transaction of all users is {round(avrg, 2)}")
                    else:
                        print("Invalid input, try again.")

                elif user_input == 'B':
                    user_input = input("Enter 1 to compute for any user\nEnter 2 to compute for all users: ")
                    if user_input == '1':
                        user_id = int(input("Enter the user id: "))
                        mode = transaction_mode(user_id, transactions)
                        if mode != -1:
                            print(f"Mode of transaction of user with id: {user_id} is {mode}")
                        else:
                            print("User id not found, please try again")
                    elif user_input == '2':
                        mode = transaction_mode(transactions=transactions, for_all=True)
                        print(f"Mode of transaction of all users is {mode}")
                    else:
                        print("Invalid input, try again.")
                elif user_input == 'C':
                    user_input = input("Enter 1 to compute for any user\nEnter 2 to compute for all users: ")
                    if user_input == '1':
                        user_id = int(input("Enter the user id: "))
                        median = transaction_median(user_id, transactions)
                        if median != -1:
                            print(f"Median of transaction of user with id: {user_id} is {round(median, 2)}")
                        else:
                            print("User id not found, please try again")
                    elif user_input == '2':
                        median = transaction_median(transactions=transactions, for_all=True)
                        print(f"Median of transaction of all users is {round(median, 2)}")
                    else:
                        print("Invalid input, try again.")
                elif user_input == 'D':
                    user_input = input("Enter 1 to compute for any user\nEnter 2 to compute for all users: ")
                    if user_input == '1':
                        user_id = int(input("Enter the user id: "))
                        iqr = interquartile_range(user_id, transactions)
                        if iqr != -1:
                            print(f"Interquartile range of transaction of user with id: {user_id} is {round(iqr, 2)}")
                        else:
                            print("User id not found, please try again")
                    elif user_input == '2':
                        iqr = interquartile_range(transactions=transactions, for_all=True)
                        print(f"Interquartile range of transaction of all users is {round(iqr, 2)}")
                    else:
                        print("Invalid input, try again.")
                elif user_input == 'E':
                    user_id = int(input("Enter the user id: "))
                    x_cord, y_cord = user_location_centroid(user_id, transactions)
                    if x_cord != -1:
                        print(
                            f"The location centroid of user with id: {user_id} is ({round(x_cord, 2)}, {round(y_cord, 2)})")
                    else:
                        print("User id not found, please try again")
                elif user_input == 'F':
                    user_id = int(input("Enter the user id: "))
                    user_trans_amt = [transaction["amount"] for transaction in transactions if
                                      transaction["user_id"] == user_id]
                    # Check if the user entered a valid user_id
                    if len(user_trans_amt) != 0:
                        user_sd = transaction_standard_deviation(user_trans_amt)
                        print(f"The standard deviation of user with id: {user_id} is {round(user_sd, 2)}")
                    else:
                        print("User id not found, please try again")
                elif user_input == 'G':
                    transaction_id = input("Enter the transaction id: ")
                    print(is_fraudulent(transaction_id, transactions))
                elif user_input == 'H':
                    user_id = int(input("Enter the user id: "))
                    abnormal_trans = abnormal_transaction(user_id, transactions)
                    if abnormal_trans == -1:
                        print("User id not found, please try again")
                    elif len(abnormal_trans) == 0:
                        print("This user have no abnormal transaction")
                    else:
                        print("Here are the following abnormal transaction")
                        for i in abnormal_trans:
                            print(i)
                elif user_input == 'I':
                    user_input = input("Enter 1 to compute for any user\nEnter 2 to compute for all users: ")
                    if user_input == '1':
                        user_id = int(input("Enter the user id: "))
                        transaction_id = input("Enter the transaction id of the user: ")
                        user_zscore = compute_z_score(transaction_id, user_id, transactions)
                        if user_zscore == '-1':
                            print("The transaction id does not exist for this user.")
                        else:
                            print(f"The Z-score of transaction of user with id: {user_id} is {user_zscore}")
                    elif user_input == '2':
                        transaction_id = input("Enter the transaction id: ")
                        all_zscore = compute_z_score(transaction_id, transactions=transactions,
                                                                      for_all=True)
                        if all_zscore == '-1':
                            print("The transaction id does not exist")
                        else:
                            print(f"Z-score of this transaction is \n{all_zscore}")
                    else:
                        print("Invalid input, try again.")
                elif user_input == 'J':
                    x_cord = input("Enter the x coordinate: ")
                    y_cord = input("Enter the y coordinate: ")
                    freq = compute_frequency(x_cord, y_cord, transactions)
                    print(f"The frequency at ({x_cord}, {y_cord}) is {freq}")
                elif user_input == 'K':
                    user_input = input("Enter 1 to compute for any location\nEnter 2 to compute for any user: ")
                    if user_input == '1':
                        outliers = get_outlier(transactions=transactions, for_all=True)
                        print(f"The outlier is {outliers}")
                    elif user_input == '2':
                        user_id = int(input("Enter the user id: "))
                        outliers = get_outlier(user_id, transactions=transactions)
                        print(f"The outlier is {outliers}")
                    else:
                        print("Invalid input, try again.")
                elif user_input == 'L':
                    user_input = input("Enter 1 to compute for any user\nEnter 2 to compute for all users: ")
                    if user_input == '1':
                        user_id = int(input("Enter the user id: "))
                        n = int(input("Enter the nth percentile value: "))
                        percentile = get_percentiles(n, user_id, transactions)
                        if percentile != -1:
                            print(
                                f"The {n}th percentile of transactions of user with id: {user_id} is {percentile}")

                        else:
                            print("Percentile value should be within 0 to 100")
                    elif user_input == '2':
                        n = int(input("Enter the nth percentile value: "))
                        percentile = get_percentiles(n, transactions=transactions, for_all=True)
                        if percentile != -1:
                            print(f"The {n}th percentile of transaction of all users is {percentile}")
                        else:
                            print("Percentile value should be within 0 to 100")
                    else:
                        print("Invalid input, try again.")
                else:
                    print("Invalid input, try again")
            elif user_input == 'q':
                break
            else:
                print("Invalid input. try again")

            print("\n")
        except ValueError:
            print("\nPlease enter a valid input")
