# Storing Machine Learning Models on Blockchain
_Andrew Cachia, Feb 2019_

#### Imports

In [1]:
import pandas as pd
import numpy as np
import json
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from flask import Flask, Response, request, jsonify
from web3 import Web3

# Section 0: Shared configuration

This section describes the configuration that should be used by both owner and client, as well as splitting the data to be used by each.

#### Data

In [26]:
input = pd.read_csv('Iris Data Set/iris.csv')
input.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
# Splitting between training and testing
X_train, X_test, y_train, y_test = train_test_split(input.loc[:,input.columns != 'class'], input['class'], test_size=0.2)

#### Setting up configurations to access smart contract

In [8]:
w3 = Web3(Web3.HTTPProvider("http://127.0.0.1:8545"))
        
contract_abi = '[{"constant": false,"inputs": [{"name": "_encoded_parameters","type": "string"}],"name": "setModelParameters","outputs": [],"payable": false,"stateMutability": "nonpayable","type": "function"},{"inputs": [],"payable": false,"stateMutability": "nonpayable","type": "constructor"},{"constant": true,"inputs": [],"name": "getModelParameters","outputs": [{"name": "_encoded_parameters","type": "string"}],"payable": false,"stateMutability": "view","type": "function"}]'
contract_address = Web3.toChecksumAddress('0x748b9e15ac55f06bdd4b605fa6d1be5d1e7c2ed7')

ml_contract = w3.eth.contract(address=contract_address, abi=contract_abi)

# Section 1: Organization

This is the part that will be used by the organisation. Here, the model is trained on private data, and the model parameters are uploaded to the smart contract. It must be ensured that the address of the user uploading this data is the same as the owner of the smart contract.

### Training

In [9]:
cls = LogisticRegression()
f = cls.fit(X_train,y_train)
cls.coef_



array([[ 0.40043029,  1.37794159, -2.15749759, -0.95856915],
       [ 0.49646784, -1.65299042,  0.55981371, -1.48152142],
       [-1.58557039, -1.31086355,  2.20823646,  2.44341041]])

### Encode model parameters to json

In [10]:
encoded = json.dumps((cls.coef_.tolist(), cls.intercept_.tolist(), cls.classes_.tolist()))
encoded

'[[[0.40043028547787896, 1.377941589045666, -2.1574975875300777, -0.9585691538326847], [0.49646783655567767, -1.6529904209609068, 0.559813705284916, -1.4815214212935361], [-1.5855703912796488, -1.3108635533360338, 2.2082364626024265, 2.4434104076295045]], [0.23609827350167537, 0.8695054088949591, -1.0751280708940156], ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]]'

### Upload to Smart Contract

In [21]:
w3.eth.defaultAccount = w3.eth.accounts[0]
    
tx_hash = ml_contract.functions.setModelParameters(
    encoded
)
tx_hash = tx_hash.transact()

# Wait for transaction to be mined...
w3.eth.waitForTransactionReceipt(tx_hash)

AttributeDict({'transactionHash': HexBytes('0xaad2afbe441f5c83998668b1b84f3f05f1b7e96de35def1ad2e91e15c16785f2'),
 'transactionIndex': 0,
 'blockHash': HexBytes('0x1bdc023923767ea5e2497c02db655c3f842591c46038d8211ac00caf9ec13364'),
 'blockNumber': 5,
 'from': '0x6256b974e3200fdb2d299beb29f566777cb25b7b',
 'to': '0x748b9e15ac55f06bdd4b605fa6d1be5d1e7c2ed7',
 'gasUsed': 294304,
 'cumulativeGasUsed': 294304,
 'contractAddress': None,
 'logs': [],
 'status': 1,
 'logsBloom': HexBytes('0x00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

# Section 2: Client

This section shows how the client can retrieve the model parameters from the smart contract, and create a logistic regression model based on them. The client may then test out the model on unseen data.

### Retrieving data from smart contract and decoding

In [22]:
w3.eth.defaultAccount = w3.eth.accounts[1]

encoded_parameters = ml_contract.functions.getModelParameters().call()
decoded_parameters = json.loads(encoded_parameters)
decoded_parameters

[[[0.40043028547787896,
   1.377941589045666,
   -2.1574975875300777,
   -0.9585691538326847],
  [0.49646783655567767,
   -1.6529904209609068,
   0.559813705284916,
   -1.4815214212935361],
  [-1.5855703912796488,
   -1.3108635533360338,
   2.2082364626024265,
   2.4434104076295045]],
 [0.23609827350167537, 0.8695054088949591, -1.0751280708940156],
 ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']]

### Creating model based on parameters retrieved

In [23]:
cls = LogisticRegression()
cls.coef_ = np.array(decoded_parameters[0])
cls.intercept_ = np.array(decoded_parameters[1])
cls.classes_ = np.array(decoded_parameters[2])

### Performing Prediction

In [24]:
predictions = cls.predict(X_test);
result = pd.DataFrame(np.vstack((predictions, y_test)).T,columns=['Predicted Outcomes','Actual Outcomes'])
result.head()

Unnamed: 0,Predicted Outcomes,Actual Outcomes
0,Iris-setosa,Iris-setosa
1,Iris-virginica,Iris-virginica
2,Iris-setosa,Iris-setosa
3,Iris-versicolor,Iris-versicolor
4,Iris-virginica,Iris-virginica


### Showing resulting metrics

In [25]:
accuracy = np.sum(predictions == y_test) / y_test.shape[0] * 100
conf_matrix = confusion_matrix(predictions, y_test)
precision = conf_matrix[0,0] / (conf_matrix[0,0] + conf_matrix[0,1]) * 100

print(conf_matrix)
print("Accuracy: {0:.2f}%".format(accuracy))
print("Precision: {0:.2f}%".format(precision))

[[11  0  0]
 [ 0 10  1]
 [ 0  2  6]]
Accuracy: 90.00%
Precision: 100.00%
