# Blockchain and Machine Learning for Fraud Detection: Employing Artificial Intelligence in the Banking Sector

### Blockchain Integrated with an XGBoosted K-means Model 

#### Import Libraries & Packages

In [1]:
import pandas as pd
import json
import numpy as np
from numpy import loadtxt
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import xgboost
from xgboost import XGBClassifier
import hashlib
import json
from time import time
from urllib.parse import urlparse
from uuid import uuid4
import requests
from flask import Flask, jsonify, request
from matplotlib import gridspec 
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score  
from sklearn.metrics import precision_score, recall_score 
from sklearn.metrics import f1_score, matthews_corrcoef 
from sklearn.metrics import confusion_matrix 

### Blockchain 

In [2]:
#Create a class to store the block chain

class Blockchain:
    def __init__(self):
        self.current_trans = []
        self.chain = []
        self.nodes = set()

        #Create the genesis block
        self.new_block(prev_hash='1', proof=100)

    def new_node(self, address):
        """
        Add a new node. View the node here:'http://192.168.0.5:5000'
        """

        parsed_url = urlparse(address)
        if parsed_url.netloc:
            self.nodes.add(parsed_url.netloc)
        elif parsed_url.path:
            self.nodes.add(parsed_url.path)
        else:
            raise ValueError('Invalid URL. Please try again.')


    def valid_chain(self, chain):
        """
        Determine if blockchain is valid.
        """

        prev_block = chain[0]
        current_index = 1

        while current_index < len(chain):
            block = chain[current_index]
            print(f'{prev_block}')
            print(f'{block}')
            print("\n-----------\n")
            #Check that the hash of the block is correct
            prev_block_hash = self.hash(prev_block)
            if block['prev_hash'] != prev_block_hash:
                return False

            #Check that the Proof of Work is correct
            if not self.valid_proof(prev_block['proof'], block['proof'], prev_block_hash):
                return False

            prev_block = block
            current_index += 1

        return True

    def conflict_resolution(self):
        """
        Resolves conflicts by replacing current chain with the longest one in the network.
        """

        neighbours = self.nodes
        new_chain = None

        #Identifying long chains
        max_length = len(self.chain)

        #Grab and verify the chains from all the nodes in the network
        for node in neighbours:
            response = requests.get(f'http://{node}/chain')

            if response.status_code == 200:
                length = response.json()['length']
                chain = response.json()['chain']

                #Check if the length is longer and the chain is valid
                if length > max_length and self.valid_chain(chain):
                    max_length = length
                    new_chain = chain

        #Replace chain if a valid longer chain is discovered
        if new_chain:
            self.chain = new_chain
            return True

        return False

    def new_block(self, proof, prev_hash):

        block = {
            'index': len(self.chain) + 1,
            'timestamp': time(),
            'transactions': self.current_trans,
            'proof': proof,
            'prev_hash': prev_hash or self.hash(self.chain[-1]),
        }

        #Reset the current list of transactions
        self.current_trans = []

        self.chain.append(block)
        return block

    def new_trans(self, sender, recipient, amount):
        """
        Creates a new transaction to go into the next mined Block.
        """
        self.current_trans.append({
            'sender': sender,
            'recipient': recipient,
            'amount': amount,
        })

        return self.prev_block['index'] + 1

    @property
    def prev_block(self):
        return self.chain[-1]

    @staticmethod
    def hash(block):
        """
        SHA-256 encryption
        """

        #Ensure that dictionary is ordered, to avoid inconsistent hashes.
        block_str = json.dumps(block, sort_keys=True).encode()
        return hashlib.sha256(block_str).hexdigest()

    def proof_of_work(self, prev_block):
        
         #Proof of Work Algorithm:
         #- Find a number p' such that hash(pp') contains leading 4 zeroes
         #- Where p is the previous proof, and p' is the new proof

        prev_proof = prev_block['proof']
        prev_hash = self.hash(prev_block)

        proof = 0
        while self.valid_proof(prev_proof, proof, prev_hash) is False:
            proof += 1

        return proof

    @staticmethod
    def valid_proof(prev_proof, proof, prev_hash):

        #Validates Proof

        guess = f'{prev_proof}{proof}{prev_hash}'.encode()
        guess_hash = hashlib.sha256(guess).hexdigest()
        return guess_hash[:4] == "0000"





### Integration of XGBoosted KMeans with Blockchain

In [None]:
#Instantiate the Node
app = Flask(__name__)

#Generate a globally unique address for this node
node_id = str(uuid4()).replace('-', '')

#Instantiate the Blockchain
blockchain = Blockchain()

data = pd.read_csv("lpetrocelli-czech-financial-dataset-real-anonymized-transactions/creditcard.csv" , sep=",") 
X = data.drop(['Class'], axis = 1) 
Y = data["Class"]
xData = X.values 
yData = Y.values
fraud = data[data['Class'] == 1] 
valid = data[data['Class'] == 0] 


xTrain, xTest, yTrain, yTest = train_test_split( xData, yData, test_size = 0.2, random_state = 42)
print(xTest[0])
print("1")

rfc = RandomForestClassifier() 
rfc.fit(xTrain, yTrain) 
# predictions
print("-1")
yPred = rfc.predict(xTest)

n_outliers = len(fraud) 
n_errors = (yPred != yTest).sum() 
print("The model used is Random Forest classifier") 
  
acc = accuracy_score(yTest, yPred) 
print("The accuracy is {}".format(acc)) 
  
prec = precision_score(yTest, yPred) 
print("The precision is {}".format(prec)) 
  
rec = recall_score(yTest, yPred) 
print("The recall is {}".format(rec)) 
  
f1 = f1_score(yTest, yPred) 
print("The F1-Score is {}".format(f1)) 
  
MCC = matthews_corrcoef(yTest, yPred) 
print("The Matthews correlation coefficient is{}".format(MCC)) 



@app.route('/mine', methods=['GET'])
def mine():
    #Run the proof of work algorithm to get the next proof...
    prev_block = blockchain.prev_block
    proof = blockchain.proof_of_work(prev_block)

    #Receive a reward for finding the proof.
    #The sender is "0" to signify a new transaction.
    blockchain.new_trans(
        sender="0",
        recipient=node_id,
        amount=1,
    )

    #Forge the new Block by adding it to the chain
    prev_hash = blockchain.hash(prev_block)
    block = blockchain.new_block(proof, prev_hash)

    response = {
        'message': "New Block Forged",
        'index': block['index'],
        'transactions': block['transactions'],
        'proof': block['proof'],
        'prev_hash': block['prev_hash'],
    }
    return jsonify(response), 200

@app.route('/transactions/new', methods=['POST'])
def new_trans():
    values = request.get_json()
    print(values)
    #Check that the required fields are in the POST'ed data
    required = ["sender","recipient","Time","V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11","V12","V13","V14","V15","V16","V17","V18","V19","V20","V21","V22","V23","V24","V25","V26","V27","V28","Amount"]
    if not all(k in values for k in required):
        return 'Missing values', 400
#    data_dic = json.loads(values)
    data = values.values()
    
    xy = []
    for l in range(2,len(data)):
        xy.append(list(data)[l])
    prediction=rfc.predict([xy])
    if prediction == 0 :
        index = blockchain.new_trans(values['sender'], values['recipient'], values['Amount'])
        response = {'message': f'Transaction will be added to Block {index}'}
    else :
        response = {'message': f'Transaction will  not added to Block because it is fraud activity'}
    return jsonify(response), 201
    
    #Kmeans clustering is implemented on the newly formed chain
    #dividing the data into train and test sets for the k-means model
#    dataset=blockchain.chain
#    print(dataset)
#    data = pd.read_csv("lpetrocelli-czech-financial-dataset-real-anonymized-transactions/credit.csv" , sep=",") 
#    X = dataset[:,0:8]
#    Y = dataset[:,8]
#    #split data into train and test sets
#    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=7)
#    #fit model no training data
#    model = XGBClassifier()

    #Building the k-means model

#    kmeans = KMeans(n_clusters=2)
#    kmeans.fit(X_train)
#    KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=30000,
#    n_clusters=2, n_init=10, n_jobs=1, precompute_distances='auto',
#    random_state=None, tol=0.0001, verbose=0)
#    correct = 0
#    for i in range(len(X_test)):
#        predict_me = np.array(X_test[i].astype(float))
#        predict_me = predict_me.reshape(-1, len(predict_me))
#        prediction = kmeans.predict(predict_me)
#        if prediction[0] == y[i]:
#            correct += 1

#        print(correct/len(X))
    

@app.route('/chain', methods=['GET'])
def full_chain():
    response = {
        'chain': blockchain.chain,
        'length': len(blockchain.chain),
    }
    return jsonify(response), 200
    
@app.route('/nodes/register', methods=['POST'])
def new_nodes():
    values = request.get_json()

    nodes = values.get('nodes')
    if nodes is None:
        return "Error: Please supply a valid list of nodes", 400

    for node in nodes:
        blockchain.new_node(node)

    response = {
        'message': 'New nodes have been added',
        'total_nodes': list(blockchain.nodes),
    }
    return jsonify(response), 201


@app.route('/nodes/resolve', methods=['GET'])
def consensus():
    replaced = blockchain.conflict_resolution()

    if replaced:
        response = {
            'message': 'Our chain was replaced',
            'new_chain': blockchain.chain
        }
    else:
        response = {
            'message': 'Our chain is authoritative',
            'chain': blockchain.chain
        }

    return jsonify(response), 200


if __name__ == '__main__':
    from argparse import ArgumentParser

  #  parser = ArgumentParser()
  #  parser.add_argument('-p', '--port', default=5000, type=int, help='port to listen on')
  #  args = parser.parse_args()
  #  port = args.port

    app.run(host='0.0.0.0', port=5000)
    

[ 4.15050000e+04 -1.65265066e+01  8.58497180e+00 -1.86498532e+01
  9.50559352e+00 -1.37938185e+01 -2.83240430e+00 -1.67016943e+01
  7.51734390e+00 -8.50705864e+00 -1.41101844e+01  5.29923635e+00
 -1.08340065e+01  1.67112025e+00 -9.37385858e+00  3.60805642e-01
 -9.89924654e+00 -1.92362924e+01 -8.39855199e+00  3.10173537e+00
 -1.51492344e+00  1.19073869e+00 -1.12767001e+00 -2.35857877e+00
  6.73461329e-01 -1.41369967e+00 -4.62762361e-01 -2.01857525e+00
 -1.04280417e+00  3.64190000e+02]
1
-1
The model used is Random Forest classifier
The accuracy is 0.9995786664794073
The precision is 0.9743589743589743
The recall is 0.7755102040816326
The F1-Score is 0.8636363636363635
The Matthews correlation coefficient is0.8690748763736589
 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [11/Aug/2020 11:16:56] "[37mGET /chain HTTP/1.1[0m" 200 -
127.0.0.1 - - [11/Aug/2020 11:16:57] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
127.0.0.1 - - [11/Aug/2020 13:22:25] "[37mPOST //transactions/new HTTP/1.1[0m" 201 -


{'sender': 'abc', 'recipient': 'bcd', 'Time': 406, 'V1': -2.3122265423263, 'V2': 1.95199201064158, 'V3': -1.60985073229769, 'V4': 3.9979055875468, 'V5': -0.522187864667764, 'V6': -1.42654531920595, 'V7': -2.53738730624579, 'V8': 1.39165724829804, 'V9': -2.77008927719433, 'V10': -2.77227214465915, 'V11': 3.20203320709635, 'V12': -2.89990738849473, 'V13': -0.595221881324605, 'V14': -4.28925378244217, 'V15': 0.389724120274487, 'V16': -1.14074717980657, 'V17': -2.83005567450437, 'V18': -0.0168224681808257, 'V19': 0.416955705037907, 'V20': 0.126910559061474, 'V21': 0.517232370861764, 'V22': -0.0350493686052974, 'V23': -0.465211076182388, 'V24': 0.320198198514526, 'V25': 0.0445191674731724, 'V26': 0.177839798284401, 'V27': 0.261145002567677, 'V28': -0.143275874698919, 'Amount': 0}


127.0.0.1 - - [11/Aug/2020 13:23:06] "[37mPOST //transactions/new HTTP/1.1[0m" 201 -


{'sender': 'abc', 'recipient': 'bcd', 'Time': 11, 'V1': -1.3598071336738, 'V2': -0.0727811733098497, 'V3': 2.53634673796914, 'V4': 1.37815522427443, 'V5': -0.338320769942518, 'V6': 0.462387777762292, 'V7': 0.239598554061257, 'V8': 0.0986979012610507, 'V9': 0.363786969611213, 'V10': 0.0907941719789316, 'V11': -0.551599533260813, 'V12': -0.617800855762348, 'V13': -0.991389847235408, 'V14': -0.311169353699879, 'V15': 1.46817697209427, 'V16': -0.470400525259478, 'V17': 0.207971241929242, 'V18': 0.0257905801985591, 'V19': 0.403992960255733, 'V20': 0.251412098239705, 'V21': -0.018306777944153, 'V22': 0.277837575558899, 'V23': -0.110473910188767, 'V24': 0.0669280749146731, 'V25': 0.128539358273528, 'V26': -0.189114843888824, 'V27': 0.133558376740387, 'V28': -0.0210530534538215, 'Amount': 149.62}


127.0.0.1 - - [11/Aug/2020 13:23:19] "[37mGET /mine HTTP/1.1[0m" 200 -
127.0.0.1 - - [11/Aug/2020 13:23:34] "[37mGET /chain HTTP/1.1[0m" 200 -


In [None]:
%tb