# Hackathing 2: Pysyft and Federated Learning

### Toy Federated Learning Example

In [1]:
import torch
from torch import nn
from torch import optim

In [3]:
data = torch.tensor([[0,0],[0,1],[1,0],[1,1.]])
target = torch.tensor([[0],[0],[1],[1.]])

# A Toy Model
model = nn.Linear(2,1)

def train():
    # Training Logic
    opt = optim.SGD(params=model.parameters(),lr=0.1)
    for iter in range(20):

        # 1) erase previous gradients (if they exist)
        opt.zero_grad()

        # 2) make a prediction
        pred = model(data)

        # 3) calculate how much we missed
        loss = ((pred - target)**2).sum()

        # 4) figure out which weights caused us to miss
        loss.backward()

        # 5) change those weights
        opt.step()

        # 6) print our progress
        print(loss.data)

In [4]:
# train the model
train()

tensor(2.8703)
tensor(1.0659)
tensor(0.6785)
tensor(0.4755)
tensor(0.3393)
tensor(0.2441)
tensor(0.1768)
tensor(0.1289)
tensor(0.0946)
tensor(0.0698)
tensor(0.0517)
tensor(0.0385)
tensor(0.0288)
tensor(0.0216)
tensor(0.0162)
tensor(0.0122)
tensor(0.0092)
tensor(0.0070)
tensor(0.0053)
tensor(0.0040)


In [3]:
# We've trained a basic model in the normal way - now we must train it in a federated way!
# This is a multi step process
# 1. Create a couple workers
# 2. Get pointers to training data on each worker
# 3. Updated training logic to do federated learning
# Training Steps:
# 1. Send model to correct worker
# 2. Train on the data located there
# 3. Get the model back and repeat with next worker

In [2]:
import syft as sy
hook = sy.TorchHook(torch)

In [4]:
# create a couple workers

jake = sy.VirtualWorker(hook, id="jake")
toby = sy.VirtualWorker(hook, id="toby")

In [5]:

# A Toy Dataset
data = torch.tensor([[0,0],[0,1],[1,0],[1,1.]], requires_grad=True)
target = torch.tensor([[0],[0],[1],[1.]], requires_grad=True)

# get pointers to training data on each worker by
# sending some training data to jake and toby
data_jake = data[0:2]
target_jake = target[0:2]

data_toby = data[2:]
target_toby = target[2:]

# Iniitalize A Toy Model
model = nn.Linear(2,1)

data_jake = data_jake.send(jake)
data_toby = data_toby.send(toby)
target_jake = target_jake.send(jake)
target_toby = target_toby.send(toby)

# organize pointers into a list
datasets = [(data_jake,target_jake),(data_toby,target_toby)]

In [6]:
from syft.federated.floptimizer import Optims
workers = ['jake', 'toby']
optims = Optims(workers, optim=optim.Adam(params=model.parameters(),lr=0.1))

In [12]:
def train():
    # Training Logic
    for iter in range(10):
        
        # NEW) iterate through each worker's dataset
        for data,target in datasets:
            
            # NEW) send model to correct worker
            model.send(data.location)
            
            #Call the optimizer for the worker using get_optim
            opt = optims.get_optim(data.location.id)
            #print(data.location.id)

            # 1) erase previous gradients (if they exist)
            opt.zero_grad()

            # 2) make a prediction
            pred = model(data)

            # 3) calculate how much we missed
            loss = ((pred - target)**2).sum()

            # 4) figure out which weights caused us to miss
            loss.backward()

            # 5) change those weights
            opt.step()
            
            # NEW) get model (with gradients)
            model.get()

            # 6) print our progress
            print(loss.get().data) # NEW) slight edit... need to call .get() on loss\
    
# federated averaging

In [13]:
train()

tensor(0.4928)
tensor(2.7715)
tensor(0.1333)
tensor(1.3551)
tensor(0.1435)
tensor(0.6209)
tensor(0.3279)
tensor(0.3036)
tensor(0.4963)
tensor(0.1868)
tensor(0.5739)
tensor(0.1468)
tensor(0.5562)
tensor(0.1228)
tensor(0.4679)
tensor(0.0933)
tensor(0.3429)
tensor(0.0592)
tensor(0.2141)
tensor(0.0305)


In [14]:
#shortcomings of this application -> we call model.get() and recieve the updated model from Toby or Jake,
#we learn alot about Jake & Toby by looking at their gradients
#sometimes can perfectly restore their training data
#Strategies?
#1. Average the gradient across multiple individuals bf uploading to central server

### Advanced Remote Execution Tools

We want to average the gradients before calling .get() so we don't ever see anyone's exact gradient.

To do this must:
- use a pointer to send a Tensor directly to another worker

In [9]:
jake = sy.VirtualWorker(hook, id='jake')
toby = sy.VirtualWorker(hook, id='toby')

In [19]:
jake.clear_objects()
toby.clear_objects()

<VirtualWorker id:toby #objects:0>

In [20]:
# this is a local tensor
x = torch.tensor([1,2,3,4])
x

tensor([1, 2, 3, 4])

In [21]:
# this sends the local tensor to Jake
x_ptr = x.send(jake)

# this is now a pointer
x_ptr

(Wrapper)>[PointerTensor | me:47705114154 -> jake:35365737396]

In [22]:
# now we can SEND THE POINTER to toby!!!
pointer_to_x_ptr = x_ptr.send(toby)

pointer_to_x_ptr

(Wrapper)>[PointerTensor | me:50971181403 -> toby:47705114154]

We created a tensor called x and sent it to Jake, creating a pointer on our local machine (x_ptr)

Then called x_prt.send(toby) which sent the pointer to Toby. This did not move the data, it moved the pointer to the data

In [10]:
# As you can see above, Bob still has the actual data (data is always stored in a LocalTensor type). 
jake._objects


{}

In [13]:
# Toby, on the other hand, has x_ptr!! (notice how it points at bob)
toby._objects

{16200693430: (Wrapper)>[PointerTensor | toby:16200693430 -> jake:74887559502]}

In [14]:
# and we can use .get() to get x_ptr back from Toby

x_ptr = pointer_to_x_ptr.get()
x_ptr

(Wrapper)>[PointerTensor | me:16200693430 -> jake:74887559502]

In [15]:
 #and then we can use x_ptr to get x back from Jake!

x = x_ptr.get()
x

tensor([1, 2, 3, 4])

Just like normal pointers - can perform arbitrary PyTorch operations across Tensors

In [16]:
jake._objects

{}

In [17]:
toby._objects

{}

In [25]:
p2p2x = torch.tensor([1,2,3,4,5]).send(jake).send(toby)

In [26]:
y = p2p2x + p2p2x

In [27]:
jake._objects

{3886126330: tensor([1, 2, 3, 4, 5]),
 62481850534: tensor([ 2,  4,  6,  8, 10])}

In [28]:
toby._objects

{11194324781: (Wrapper)>[PointerTensor | toby:11194324781 -> jake:3886126330],
 14342832401: (Wrapper)>[PointerTensor | toby:14342832401 -> jake:62481850534]}

In [29]:
y.get().get()

tensor([ 2,  4,  6,  8, 10])

In [30]:
jake._objects

{3886126330: tensor([1, 2, 3, 4, 5])}

In [31]:
toby._objects

{11194324781: (Wrapper)>[PointerTensor | toby:11194324781 -> jake:3886126330]}

In [32]:
p2p2x.get().get()

tensor([1, 2, 3, 4, 5])

In [33]:
jake._objects

{}

In [34]:
toby._objects

{}

**Pointer Chain Operations**
Whenever we called a .send() or a .get() operation, it called that operation directly on the tensor on our local machine. However, it we have a chain of pointers, sometimes want to call operations like .get() or .send() on the last pointer in the chain (such as sending data directly from one worker to another). To accomplish this, use functions designed for this privacy preserving operation

In [35]:
# x is now a pointer to the data which lives on jake's machine
x = torch.tensor([1,2,3,4,5]).send(jake)

In [36]:
print('  jake:', jake._objects)
print('toby:',toby._objects)

  jake: {20104235647: tensor([1, 2, 3, 4, 5])}
toby: {}


In [37]:
x = x.move(toby)

In [38]:
print('jake:', jake._objects)
print('toby:',toby._objects)

jake: {}
toby: {20104235647: tensor([1, 2, 3, 4, 5])}


In [39]:
x

(Wrapper)>[PointerTensor | me:61497175931 -> toby:20104235647]

### Federated Learning with Model Averaging

In previous part, trained model with simplified federated learning, where we had to trust the mode owner to be able to see worker's gradients.

In this tutorial, use advanced aggregation tools to allow the weights to be aggregated by a trusted "secure worker" before the final resulting model is sent back to the model owner (us)

only secure worker can see whose weights came from whom. May be able to tell which parts of model changed, but don't know which worker caused change

In [41]:
# create a couple workers

bob = sy.VirtualWorker(hook, id="bob")
alice = sy.VirtualWorker(hook, id="alice")
secure_worker = sy.VirtualWorker(hook, id="secure_worker")


# A Toy Dataset
data = torch.tensor([[0,0],[0,1],[1,0],[1,1.]], requires_grad=True)
target = torch.tensor([[0],[0],[1],[1.]], requires_grad=True)

# get pointers to training data on each worker by
# sending some training data to bob and alice
bobs_data = data[0:2].send(bob)
bobs_target = target[0:2].send(bob)

alices_data = data[2:].send(alice)
alices_target = target[2:].send(alice)


In [42]:
# Iniitalize A Toy Model
model = nn.Linear(2,1)

In [43]:
#send a copy of current model to Alice & Bob so that they can perform steps of learning on their own dataset
bobs_model = model.copy().send(bob)
alices_model = model.copy().send(alice)

bobs_opt = optim.SGD(params=bobs_model.parameters(),lr=0.1)
alices_opt = optim.SGD(params=alices_model.parameters(),lr=0.1)

In [44]:
#Federated Learning via Secure Averaging - each data owner first trains their model for
#several iterations locally before the models are average together
for i in range(10):

    # Train Bob's Model
    bobs_opt.zero_grad()
    bobs_pred = bobs_model(bobs_data)
    bobs_loss = ((bobs_pred - bobs_target)**2).sum()
    bobs_loss.backward()

    bobs_opt.step()
    bobs_loss = bobs_loss.get().data

    # Train Alice's Model
    alices_opt.zero_grad()
    alices_pred = alices_model(alices_data)
    alices_loss = ((alices_pred - alices_target)**2).sum()
    alices_loss.backward()

    alices_opt.step()
    alices_loss = alices_loss.get().data
    
    print("Bob:" + str(bobs_loss) + " Alice:" + str(alices_loss))

Bob:tensor(1.4295) Alice:tensor(11.6062)
Bob:tensor(0.3257) Alice:tensor(0.1180)
Bob:tensor(0.0750) Alice:tensor(0.0248)
Bob:tensor(0.0180) Alice:tensor(0.0201)
Bob:tensor(0.0049) Alice:tensor(0.0167)
Bob:tensor(0.0018) Alice:tensor(0.0139)
Bob:tensor(0.0010) Alice:tensor(0.0116)
Bob:tensor(0.0007) Alice:tensor(0.0096)
Bob:tensor(0.0006) Alice:tensor(0.0080)
Bob:tensor(0.0005) Alice:tensor(0.0067)


Now that each data owner has partially trained model, time to average them together in a secure way. Must instruct Alice & Bob to send their model to the secure server

In [45]:
alices_model.move(secure_worker)

In [46]:
bobs_model.move(secure_worker)

In [47]:
# Last step is to average Bob and Alice's trained models together
# and use this to set the values for our global "model"

In [48]:
with torch.no_grad():
    model.weight.set_(((alices_model.weight.data + bobs_model.weight.data) / 2).get())
    model.bias.set_(((alices_model.bias.data + bobs_model.bias.data) / 2).get())

In [49]:
#iterate this multiple times

In [50]:
iterations = 10
worker_iters = 5

for a_iter in range(iterations):
    
    bobs_model = model.copy().send(bob)
    alices_model = model.copy().send(alice)

    bobs_opt = optim.SGD(params=bobs_model.parameters(),lr=0.1)
    alices_opt = optim.SGD(params=alices_model.parameters(),lr=0.1)

    for wi in range(worker_iters):

        # Train Bob's Model
        bobs_opt.zero_grad()
        bobs_pred = bobs_model(bobs_data)
        bobs_loss = ((bobs_pred - bobs_target)**2).sum()
        bobs_loss.backward()

        bobs_opt.step()
        bobs_loss = bobs_loss.get().data

        # Train Alice's Model
        alices_opt.zero_grad()
        alices_pred = alices_model(alices_data)
        alices_loss = ((alices_pred - alices_target)**2).sum()
        alices_loss.backward()

        alices_opt.step()
        alices_loss = alices_loss.get().data
    
    alices_model.move(secure_worker)
    bobs_model.move(secure_worker)
    with torch.no_grad():
        model.weight.set_(((alices_model.weight.data + bobs_model.weight.data) / 2).get())
        model.bias.set_(((alices_model.bias.data + bobs_model.bias.data) / 2).get())
    
    print("Bob:" + str(bobs_loss) + " Alice:" + str(alices_loss))

Bob:tensor(0.0008) Alice:tensor(0.0161)
Bob:tensor(0.0025) Alice:tensor(0.0090)
Bob:tensor(0.0035) Alice:tensor(0.0045)
Bob:tensor(0.0039) Alice:tensor(0.0023)
Bob:tensor(0.0038) Alice:tensor(0.0012)
Bob:tensor(0.0034) Alice:tensor(0.0006)
Bob:tensor(0.0029) Alice:tensor(0.0003)
Bob:tensor(0.0025) Alice:tensor(0.0002)
Bob:tensor(0.0020) Alice:tensor(0.0001)
Bob:tensor(0.0016) Alice:tensor(7.7089e-05)


In [51]:
preds = model(data)
loss = ((preds - target) ** 2).sum()

In [53]:
print(preds)
print(target)
print(loss.data)

tensor([[0.1177],
        [0.0991],
        [0.8744],
        [0.8558]], grad_fn=<AddmmBackward>)
tensor([[0.],
        [0.],
        [1.],
        [1.]], requires_grad=True)
tensor(0.0602)


### Pysyft Sandbox

In [54]:
sy.create_sandbox(globals())



Setting up Sandbox...
	- Hooking PyTorch
	- Creating Virtual Workers:
		- bob
		- theo
		- jason
		- alice
		- andy
		- jon
	Storing hook and workers as global variables...
	Loading datasets from SciKit Learn...
		- Boston Housing Dataset
		- Diabetes Dataset
		- Breast Cancer Dataset
	- Digits Dataset
		- Iris Dataset
		- Wine Dataset
		- Linnerud Dataset
	Distributing Datasets Amongst Workers...
	Collecting workers into a VirtualGrid...
Done!


In [55]:
workers

[<VirtualWorker id:bob #objects:17>,
 <VirtualWorker id:theo #objects:14>,
 <VirtualWorker id:jason #objects:14>,
 <VirtualWorker id:alice #objects:17>,
 <VirtualWorker id:andy #objects:14>,
 <VirtualWorker id:jon #objects:14>]

In [56]:
hook

<syft.frameworks.torch.hook.hook.TorchHook at 0x7f7a2635f520>

In [57]:
bob

<VirtualWorker id:bob #objects:17>

In [None]:
bob._objects

We want to be able to search for datasets on a remote machine. In this example, a research lab wants to query hospitals for a "radio" dataset

In [59]:
x = torch.tensor([1,2,3,4,5]).tag("#radio", "#hospital1").describe("The input datapoints to the hospital1 dataset.")
y = torch.tensor([5,4,3,2,1]).tag("#radio", "#hospital2").describe("The input datapoints to the hospital2 dataset.")
z = torch.tensor([1,2,3,4,5]).tag("#fun", "#mnist",).describe("The images in the MNIST training dataset.")

In [60]:
x

tensor([1, 2, 3, 4, 5])
	Tags: #hospital1 #radio 
	Description: The input datapoints to the hospital1 dataset....
	Shape: torch.Size([5])

In [61]:
x = x.send(bob)
y = y.send(bob)
z = z.send(bob)

# this searches for exact match within a tag or within the description
results = bob.search(["#radio"])

In [62]:
results

[tensor([1, 2, 3, 4, 5])
 	Tags: #hospital1 #radio 
 	Description: The input datapoints to the hospital1 dataset....
 	Shape: torch.Size([5]),
 tensor([5, 4, 3, 2, 1])
 	Tags: #radio #hospital2 
 	Description: The input datapoints to the hospital2 dataset....
 	Shape: torch.Size([5])]

In [63]:
print(results[0].description)

The input datapoints to the hospital1 dataset.


In [64]:
# search for datasets that are pre-populated on the sandbox workers
boston_housing_results = bob.search(["#boston", "#housing"])

In [None]:
boston_housing_results

A Grid is a collection of workers that gives some convenienve functions for when you want to put together a dataset

In [66]:
grid = sy.PrivateGridNetwork(*workers)

In [67]:
results = grid.search("#boston")

In [68]:
boston_data = grid.search("#boston","#data")

In [69]:
boston_target = grid.search("#boston","#target")

### Federated Learning on MNIST using a CNN

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
class Arguments():
    def __init__(self):
        self.batch_size = 64
        self.test_batch_size = 1000
        self.epochs = 100
        self.lr = 0.01
        self.momentum = 0.5
        self.no_cuda = False
        self.seed = 1
        self.log_interval = 30
        self.save_model = False

args = Arguments()
use_cuda = not args.no_cuda and torch.cuda.is_available()

### Multiclass Classification Using Pytorch

In [2]:
import pandas as pd
from sklearn.feature_selection import SelectFromModel, SelectKBest, f_classif
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler    
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import numpy as np

import torch
from torch import nn
from torch import optim
import syft as sy

In [3]:
stock_data = pd.read_csv('../pytorch/grid1.csv')
stock_data

Unnamed: 0,Ticker,Short Name,EBITDA to Net Sales:Q,GM:Q,Net D/E LF,Total Market Value LF,ROE LF,ROA LF,Curr Liab LF,Net Int Cov,...,CFO T12M,Net Debt to EBITDA LF,Debt/EBITDA LF,ST Brrwng LF,Market Cap,LT Brrwng LF,Net Debt LF,Moody's Issuer Rtg,Fitch Issr ST Rtg,S&P LT LC
0,TXMC US Equity,TIREX CORP,,,,,,45801300.00%,4.98M,,...,-192.63k,,,675.75k,670.05k,,,,,
1,EUBG US Equity,ENTREPRENEUR UNI,,,,1.10M,,1847.86%,182.03k,,...,-45.02k,0.39,0.39,176.88k,170.98k,0,176.41k,,,
2,SPQS US Equity,SPORTSQUEST INC,,,,1.16M,,800.34%,340.00k,,...,-165,0,0,0,1.38k,0,-96,,,
3,SCRH US Equity,SCORES HOLDING C,-120.82%,,,882.56k,,591.33%,325.16k,,...,-449.80k,,,22.08k,330.37k,0,-3.38k,,,
4,SBR US Equity,SABINE ROYALTY,,,-167.83,394.61M,732.52%,488.27%,3.56M,,...,41.57M,,,0,401.66M,0,-8.36M,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,MDKM US Equity,MDECHEM INC,,,,,,,,,...,,,,,32.11k,,,,,
2996,IHGP US Equity,INTERACT HOLDING,,,,,,,,,...,,,,,40.67,,,,,
2997,AVOT US Equity,AMER VIDEO TELEC,,,,,,,,,...,,,,,,,,,,
2998,MMTS US Equity,MULTI-MEDIA TUTL,-59.34%,86.25%,,2.89M,,,6.97M,,...,,,,2.24M,32.53k,538.02k,2.78M,,,


In [4]:
#rename columns
stock_data = stock_data.rename(columns={"Ticker": "ticker", "Short Name": "full_name", "EBITDA to Net Sales:Q":"ebitda/net_sales", "GM:Q":"gm", "Net D/E LF":"net_debt/e", "Total Market Value LF": "mv", "ROE LF": "roe", "ROA LF": "roa", "Curr Liab LF":"liabilities", "Net Int Cov":"net_interest_coverage", "CFO T12M": "cfo", "Net Debt to EBITDA LF":"net_debt/ebitda", "Debt/EBITDA LF":"debt/ebitda", "ST Brrwng LF":"st_borrowing", "LT Brrwng LF":"lt_borrowing", "Net Debt LF":"net_debt","Moody's Issuer Rtg":"m_rtg", "Fitch Issr ST Rtg":"f_rtg", "S&P LT LC":"sp_rtg", "Market Cap":"mc", "CFO/Debt LF":"cfo/debt"})

In [5]:
stock_data = stock_data.fillna('-1')

In [6]:
#covert T to trillions, B to billions, M to millions, K to thousands. Thanks for truncating Bloomberg! We're using regex expressions here
stock_data['mv'] = stock_data['mv'].replace({'[kK]': '*1e3', '[mM]': '*1e6', '[bB]': '*1e9', '[tT]': '*1e12'}, regex=True).map(pd.eval).astype(int)
stock_data['liabilities'] = stock_data['liabilities'].replace({'[kK]': '*1e3', '[mM]': '*1e6', '[bB]': '*1e9', '[tT]': '*1e12'}, regex=True).map(pd.eval).astype(int)
stock_data['cfo'] = stock_data['cfo'].replace({'[kK]': '*1e3', '[mM]': '*1e6', '[bB]': '*1e9', '[tT]': '*1e12'}, regex=True).map(pd.eval).astype(int)
stock_data['st_borrowing'] = stock_data['st_borrowing'].replace({'[kK]': '*1e3', '[mM]': '*1e6', '[bB]': '*1e9', '[tT]': '*1e12'}, regex=True).map(pd.eval).astype(int)
stock_data['lt_borrowing'] = stock_data['lt_borrowing'].replace({'[kK]': '*1e3', '[mM]': '*1e6', '[bB]': '*1e9', '[tT]': '*1e12'}, regex=True).map(pd.eval).astype(int)
stock_data['net_debt'] = stock_data['net_debt'].replace({'[kK]': '*1e3', '[mM]': '*1e6', '[bB]': '*1e9', '[tT]': '*1e12'}, regex=True).map(pd.eval).astype(int)
stock_data['mc'] = stock_data['mc'].replace({'[kK]': '*1e3', '[mM]': '*1e6', '[bB]': '*1e9', '[tT]': '*1e12'}, regex=True).map(pd.eval).astype(int)
stock_data['net_debt/ebitda'] = stock_data['net_debt/ebitda'].replace({'[kK]': '*1e3', '[mM]': '*1e6', '[bB]': '*1e9', '[tT]': '*1e12'}, regex=True).map(pd.eval).astype(int)
stock_data['debt/ebitda'] = stock_data['debt/ebitda'].replace({'[kK]': '*1e3', '[mM]': '*1e6', '[bB]': '*1e9', '[tT]': '*1e12'}, regex=True).map(pd.eval).astype(int)

In [7]:
stock_data

Unnamed: 0,ticker,full_name,ebitda/net_sales,gm,net_debt/e,mv,roe,roa,liabilities,net_interest_coverage,...,cfo,net_debt/ebitda,debt/ebitda,st_borrowing,mc,lt_borrowing,net_debt,m_rtg,f_rtg,sp_rtg
0,TXMC US Equity,TIREX CORP,-1,-1,-1,-1,-1,45801300.00%,4980000,-1,...,-192630,-1,-1,675750,670050,-1,-1,-1,-1,-1
1,EUBG US Equity,ENTREPRENEUR UNI,-1,-1,-1,1100000,-1,1847.86%,182030,-1,...,-45020,0,0,176880,170980,0,176410,-1,-1,-1
2,SPQS US Equity,SPORTSQUEST INC,-1,-1,-1,1160000,-1,800.34%,340000,-1,...,-165,0,0,0,1380,0,-96,-1,-1,-1
3,SCRH US Equity,SCORES HOLDING C,-120.82%,-1,-1,882560,-1,591.33%,325160,-1,...,-449800,-1,-1,22080,330370,0,-3380,-1,-1,-1
4,SBR US Equity,SABINE ROYALTY,-1,-1,-167.83,394610000,732.52%,488.27%,3560000,-1,...,41570000,-1,-1,0,401660000,0,-8359999,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,MDKM US Equity,MDECHEM INC,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,32110,-1,-1,-1,-1,-1
2996,IHGP US Equity,INTERACT HOLDING,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,40,-1,-1,-1,-1,-1
2997,AVOT US Equity,AMER VIDEO TELEC,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
2998,MMTS US Equity,MULTI-MEDIA TUTL,-59.34%,86.25%,-1,2890000,-1,-1,6970000,-1,...,-1,-1,-1,2240000,32530,538020,2780000,-1,-1,-1


In [8]:
#convert % to decimal
#first, we use regex to replace '-' with -1* and to remove the % sign
stock_data['ebitda/net_sales'] = stock_data['ebitda/net_sales'].replace({'-':'-1*', '%':''}, regex=True).map(pd.eval).astype(int)
stock_data['roe'] = stock_data['roe'].replace({'-':'-1*', '%':''}, regex=True).map(pd.eval).astype(int)
stock_data['roa'] = stock_data['roa'].replace({'-':'-1*', '%':''}, regex=True).map(pd.eval).astype(int)
stock_data['gm'] = stock_data['gm'].replace({'-':'-1*', '%':''}, regex=True).map(pd.eval).astype(int)

In [9]:
#next, we must divide by 100 to convert the percents to decimal
stock_data['ebitda/net_sales'] = stock_data[stock_data['ebitda/net_sales'] != -1]['ebitda/net_sales'].div(100)
stock_data['roe'] = stock_data[stock_data['roe'] != -1]['roe'].div(100)
stock_data['roa'] = stock_data[stock_data['roa'] != -1]['roa'].div(100)
stock_data['gm'] = stock_data[stock_data['gm'] != -1]['gm'].div(100)

In [10]:
#refill all NaN values with our placeholder, -1
stock_data = stock_data.fillna('-1')

In [11]:
pd.set_option('display.max_rows', 20)

In [12]:
#eliminate any rows that don't have ratings from any agencies (moody's, fitch, nor s&p)
#this is a pretty cheeky way to accomplish this problem using one line - check if all the data isn't the same (the min and the max are not =). If min(x) and max(x) are
#the same, x must be -1
#this works in this case because the ratings system for each agency is different
stock_data = stock_data[stock_data.iloc[:,18:22].apply(lambda x: min(x) != max(x), 1)]

In [13]:
global temp
temp = stock_data

In [14]:
#make a dict of how we'll interpret ratings. For entries
#with multiple entries, we'll take the average
ratings_dict = {'AAA':1, 'AA+':2, 'AA':3, 'AA-':4, 'A+':5, 'A':6, 'A-':7, 'BBB+':8, 'BBB':9, 'BBB-':10, 'BB+':11, 'BB':12, 'BB-':13, 'B+':14, 'B':15, 'B-':16, 'CCC+':17, 'CCC':18, 'CCC-':19, 'CC':20, 'C':21, 'SD':22, 'D':23, 'Aaa':1, 'Aa1':2, 'Aa2':3, 'Aa3':3, 'A1':4, 'A2':5, 'A3':6, 'Baa1':7, 'Baa2':8, 'Baa3':9, 'Ba1':10, 'Ba2':11, 'Ba3':12, 'F1+':3, 'F1':6, 'F2':8, 'F3':10}

In [15]:
#this data is sparse - we need to fill in the empty unknown features. Some typical approaches are regression of the column
#the mean of the column, or stochastic regression of the column. We will try the column mean for all others
#with the same rating for now. We also have to remember to ignore WRs (withdrawn ratings)

#find the average value to replace missing data given a rating and column
def find_class_mean(data):
    col = len(data)
    col_names = list(temp.columns)
    rating = []
    count = 0
    data = np.array(data)
    for i in range(col-3, col):
        if data[i] != '-1' and data[i] != 'WR':
            rating.append(data[i])
            
            if count == 0:
                to_avg_df = temp[temp[col_names[i]] == data[i]] 
                new_rating = ratings_dict[data[i]]
            else:
                to_avg_df.append(temp[temp[col_names[i]] == data[i]])
                new_rating = new_rating + ratings_dict[data[i]]
            
            count = count + 1
    new_rating = new_rating/count
    for i in range(2, col-3):
        if data[i] == '-1' or data[i] == -1:
            calc = to_avg_df[col_names[i]]
            calc = pd.to_numeric(calc)
            men = calc.mean()
            data[i] = men
    
    data = np.append(data, new_rating)
    
    if (new_rating <= 10):
        data = np.append(data, 1)
    else:
        data = np.append(data, 0)

    return data

In [16]:
#apply our empty data averager to the entire dataframe
col_names = list(stock_data.columns)
new_data = stock_data.apply(find_class_mean, axis=1)

In [17]:
#make the returned data into a numpy matrix
new_data = np.vstack(new_data)
col_names = np.append(col_names, 'label')
col_names = np.append(col_names, 'i/g')

In [18]:
#reconstruct the dataframe
data = pd.DataFrame(new_data, columns = col_names)

In [19]:
data

Unnamed: 0,ticker,full_name,ebitda/net_sales,gm,net_debt/e,mv,roe,roa,liabilities,net_interest_coverage,...,debt/ebitda,st_borrowing,mc,lt_borrowing,net_debt,m_rtg,f_rtg,sp_rtg,label,i/g
0,ASCS US Equity,AMERICAN CR-PREF,0.08,-0.04,59.14,8.45358e+10,2.47,0.85,351400000,50.4,...,0,118800000,2.60923e+10,128060000,246730000,-1,-1,BBB+,8,1
1,IHRT US Equity,IHEARTMEDIA-CL A,-2.09,0.61,490.1,7370000000,-0.316889,0.82,645840000,43.69,...,19.4333,112590000,1040000000,6710000000,6170000000,-1,-1,B,15,0
2,WFTLF US Equity,WEATHERFORD INTE,-0.52,0.24,98.93,2560000000,-0.670909,0.49,1640000000,11.1,...,4.27273,116000000,140030000,2400000000,1840000000,-1,-1,CCC,18,0
3,GBL US Equity,GAMCO INVESTO-A,0.37,-0.177857,-82.08,328670000,1.58,0.48,2.62101e+09,3.73664,...,0,0,347510000,24200000,-53640000,-1,-1,BBB-,10,1
4,EAF US Equity,GRAFTECH INTERNA,0.55,0.56,213.171,4010000000,-0.0308772,0.43,222540000,7.87,...,1,138000,1720000000,1810000000,1660000000,-1,-1,BB-,13,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1289,NNHE US Equity,NEENAH ENTERPRIS,-0.769211,0.138158,624.248,2.31264e+09,-0.708421,-0.171579,4.32593e+08,-0.300263,...,9.63158,1.4386e+08,6.05584e+08,1.57946e+09,1.62882e+09,-1,-1,CCC+,17,0
1290,AMK US Equity,ASSETMARK FINANC,0.1,0.0904808,9.27,1800000000,0.0194231,-0.0101923,3.70428e+09,3.55029,...,3.67308,2910000,1930000000,158380000,81120000,-1,-1,BB+,11,0
1291,OCESO US Equity,OCEAN SPRAY CRAN,-0.0928571,-0.177857,99.9571,2.14083e+10,0.0287143,-0.0105714,2.62101e+09,3.73664,...,4.22857,1.09516e+09,1.08925e+10,5.24545e+09,5.04906e+09,-1,-1,BBB-,10,1
1292,VKSC US Equity,VISKASE COS I,0.15,0.2,723.67,453970000,-0.670909,-0.18,351670000,0.520909,...,4.27273,269150000,54140000,31950000,288240000,-1,-1,CCC,18,0


In [20]:
#construct a dictionary of all of the credit tranches so we can give our companies a single label
#right now, there are 3, moody's, fitch, and S&P
m_rtg = np.unique(data['m_rtg'])
f_rtg = np.unique(data['f_rtg'])
sp_rtg = np.unique(data['sp_rtg'])

In [21]:
f_rtg

array(['-1', 'B', 'F1', 'F1+', 'F2', 'F3'], dtype=object)

In [22]:
#This is a multiclass classifier.
#Given credit quality feature it will predict which tranche the bond belongs to

In [23]:
#as alot of our features are likely co-linear (only differ by a consant, it is prudent
#to try to figure out which are most valuable using a feature selection technique.
#The choice of algorithm isn't that important as long as it is skillful and consistent
#To keep things simple, we'll use Univariate Selection
#Univariate selection selects features that have the strongest relationship with the output variable
#In this case, we can pretty easily use sckit learn's SelectKBest Implementation

In [24]:
m_data = data.drop(['m_rtg', 'f_rtg', 'label', 'i/g'], axis=1)

In [25]:
m_data = m_data[m_data['sp_rtg'] != '-1']

In [26]:
#find top 4 features
test = SelectKBest(score_func=f_classif, k=5)
fit = test.fit(m_data.iloc[:,2:-1],m_data.iloc[:,-1])
scores = pd.DataFrame(fit.scores_, index=m_data.iloc[:,2:-1].columns)
features = fit.transform(m_data.iloc[:,2:-1])

In [27]:
scores

Unnamed: 0,0
ebitda/net_sales,4.265572
gm,5.493388
net_debt/e,1.784069
mv,11.548956
roe,19.380237
roa,11.798887
liabilities,25.030727
net_interest_coverage,27.692001
cfo/debt,4.175923
cfo,24.725785


In [28]:
multi_data = m_data.drop(['ebitda/net_sales', 'gm', 'net_debt/e', 'mv', 'roa', 'cfo/debt', 'debt/ebitda', 'net_debt/e', 'st_borrowing', 'lt_borrowing'], axis=1)

In [29]:
# get a one-hot-encoded representation of s&p ratings
sp_labels = pd.get_dummies(multi_data['sp_rtg'])
sp_labels.columns

Index(['A', 'A+', 'A-', 'AA', 'AA+', 'AA-', 'AAA', 'B', 'B+', 'B-', 'BB',
       'BB+', 'BB-', 'BBB', 'BBB+', 'BBB-', 'CC', 'CCC', 'CCC+', 'CCC-', 'D',
       'SD'],
      dtype='object')

In [30]:
class2idx = {
    'A+':0,
    'A':1,
    'A-':2,
    'AA+':3,
    'AA':4,
    'AA-':5,
    'AAA':6,
    'B+':7,
    'B':8,
    'B-':9,
    'BB+':10,
    'BB':11,
    'BB-':12,
    'BBB+':21,
    'BBB':13,
    'BBB-':14,
    'CC':15,
    'CCC+':16,
    'CCC':17,
    'CCC-':18,
    'D':19,
    'SD':20
}
idx2class = {v: k for k, v in class2idx.items()}
multi_data['sp_rtg'].replace(class2idx, inplace=True)

In [31]:
sp_labels=multi_data['sp_rtg']

In [32]:
multi_data = multi_data.drop(['sp_rtg'], axis=1)

In [33]:
multi_data = multi_data.iloc[:,2:-1]

In [34]:
multi_data

Unnamed: 0,roe,liabilities,net_interest_coverage,cfo,net_debt/ebitda,mc
0,2.47,351400000,50.4,113570000,0,2.60923e+10
1,-0.316889,645840000,43.69,431270000,16.1444,1040000000
2,-0.670909,1640000000,11.1,-407000000,3.81818,140030000
3,1.58,2.62101e+09,3.73664,89990000,0,347510000
4,-0.0308772,222540000,7.87,787780000,1,1720000000
...,...,...,...,...,...,...
1289,-0.708421,4.32593e+08,-0.300263,1.49124e+08,8.68421,6.05584e+08
1290,0.0194231,3.70428e+09,3.55029,1.16786e+09,2.55769,1930000000
1291,0.0287143,2.62101e+09,3.73664,1.08829e+09,3.40714,1.08925e+10
1292,-0.670909,351670000,0.520909,4.96609e+07,3.81818,54140000


In [52]:
from sklearn.preprocessing import StandardScaler
model=StandardScaler()

In [53]:
X = model.fit_transform(multi_data)
Y = np.array(sp_labels)
dtype=torch.float
device=torch.device("cpu")

In [54]:
data = torch.from_numpy(X)
target = torch.from_numpy(Y)
data,target=data.type(torch.FloatTensor),target.type(torch.LongTensor)
target=target.view(-1)
data_length,data_width=data.shape

In [55]:
hook = sy.TorchHook(torch)
import torch.nn.functional as F
from torch import optim



In [56]:
bob = sy.VirtualWorker(hook, id="bob")
alice = sy.VirtualWorker(hook, id="alice")
secure_worker = sy.VirtualWorker(hook, id="secure_worker")

bob.add_workers([alice, secure_worker])
alice.add_workers([bob, secure_worker])
secure_worker.add_workers([alice, bob])



<VirtualWorker id:secure_worker #objects:46>

In [57]:
#Sending Data
bobs_data = data[0:int(data_length/2)].send(bob)
bobs_target = target[0:int(data_length/2)].send(bob)

In [58]:
alices_data = data[int(data_length/2):].send(alice)
alices_target = target[int(data_length/2):].send(alice)

In [59]:
EPOCHS = 300
BATCH_SIZE = 16
LEARNING_RATE = 0.0007
NUM_FEATURES = 6
NUM_CLASSES = 22

In [60]:
class MulticlassClassification(torch.nn.Module):
    def __init__(self, num_feature, num_class):
        super(MulticlassClassification, self).__init__()
        
        self.fc1 = nn.Linear(num_feature, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, num_class) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2)
        self.batchnorm1 = nn.BatchNorm1d(512)
        self.batchnorm2 = nn.BatchNorm1d(128)
        self.batchnorm3 = nn.BatchNorm1d(64)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.batchnorm1(x)
        x = self.relu(x)
        
        x = self.fc2(x)
        x = self.batchnorm2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.fc3(x)
        x = self.batchnorm3(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.fc4(x)
        
        return x

In [62]:
model = MulticlassClassification(num_feature = NUM_FEATURES, num_class=NUM_CLASSES)
loss = torch.nn.CrossEntropyLoss()
l = nn.CrossEntropyLoss()
print(model)

MulticlassClassification(
  (fc1): Linear(in_features=6, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=22, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.2, inplace=False)
  (batchnorm1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [63]:
bobs_model = model.copy().send(bob)
alices_model = model.copy().send(alice)
bobs_opt = optim.SGD(params=bobs_model.parameters(),lr=0.1)
alices_opt = optim.SGD(params=alices_model.parameters(),lr=0.1)

In [64]:
iterations = 10
worker_iters = 200

for a_iter in range(iterations):
    
    bobs_model = model.copy().send(bob)
    alices_model = model.copy().send(alice)

    bobs_opt = optim.Adam(params=bobs_model.parameters(),lr=0.01)
    alices_opt = optim.Adam(params=alices_model.parameters(),lr=0.01)

    for wi in range(worker_iters):

        # Train Bob's Model
        bobs_opt.zero_grad()
        bobs_pred = bobs_model.forward(bobs_data)
        bobs_loss = loss(bobs_pred,bobs_target)
        bobs_loss.backward()        
        bobs_opt.step()
        bobs_loss = bobs_loss.get().data

        # Train Alice's Model
        alices_opt.zero_grad()
        alices_pred = alices_model.forward(alices_data)
        alices_loss = loss(alices_pred,alices_target)
        alices_loss.backward()
        alices_opt.step()
        alices_loss = alices_loss.get().data

    
    alices_model.move(secure_worker)
    bobs_model.move(secure_worker)
    with torch.no_grad():
        model.fc1.weight.set_(((alices_model.fc1.weight.data + bobs_model.fc1.weight.data) / 2).get())
        model.fc1.bias.set_(((alices_model.fc1.bias.data + bobs_model.fc1.bias.data) / 2).get())
        model.fc2.weight.set_(((alices_model.fc2.weight.data + bobs_model.fc2.weight.data) / 2).get())
        model.fc2.bias.set_(((alices_model.fc2.bias.data + bobs_model.fc2.bias.data) / 2).get())
        model.fc3.weight.set_(((alices_model.fc3.weight.data + bobs_model.fc3.weight.data) / 2).get())
        model.fc3.bias.set_(((alices_model.fc3.bias.data + bobs_model.fc3.bias.data) / 2).get())
        model.fc4.weight.set_(((alices_model.fc4.weight.data + bobs_model.fc4.weight.data) / 2).get())
        model.fc4.bias.set_(((alices_model.fc4.bias.data + bobs_model.fc4.bias.data) / 2).get())
    
    print("Bob:" + str(bobs_loss) + " Alice:" + str(alices_loss))

Bob:tensor(0.8220) Alice:tensor(0.6484)
Bob:tensor(0.7057) Alice:tensor(0.5315)
Bob:tensor(0.5467) Alice:tensor(0.3993)
Bob:tensor(0.4298) Alice:tensor(0.3529)
Bob:tensor(0.4863) Alice:tensor(0.3325)
Bob:tensor(0.4574) Alice:tensor(0.3056)
Bob:tensor(0.4074) Alice:tensor(0.3481)
Bob:tensor(0.3518) Alice:tensor(0.2987)
Bob:tensor(0.3652) Alice:tensor(0.2641)
Bob:tensor(0.3630) Alice:tensor(0.2898)
