## 라이브러리 임포트

In [1]:
import torch
from torchvision.datasets import MNIST
from torch.utils.data import random_split, DataLoader
import torchvision.transforms as transforms
import torch.nn as nn
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd
%matplotlib inline
plt.rcParams['figure.figsize'] = [5, 5]

## 데이터 세트 로드

In [2]:
df=pd.read_csv("../Integrated_data/all_dataset2.csv",encoding="utf-8")

In [None]:
train_dataset, dev_dataset = random_split(train_dataset, [int(len(train_dataset) * 0.83), int(len(train_dataset) * 0.17)])

In [3]:
print("ID 개수: ",len(df['ID'].unique()))
print("Record 개수: ", len(df[df['ID']=='#AAGQKY']))

ID 개수:  327
Record 개수:  182


In [4]:
df.head()

Unnamed: 0,ID,collect_datetime,gender,grade,height,weight,step_count,burned calory,eat_calory,Sleep_time,before_height,before_weight,before_waist,after_height,after_weight,after_waist,bmi,waist_bmi
0,#AAGQKY,2022-07-14,2.0,4.0,148.0,45.15,72.0,8.877,2000.0,12.0,148.0,45.3,-,148.0,45.0,,20.612673,0.306081
1,#AAGQKY,2022-07-15,2.0,4.0,148.0,45.15,72.0,8.877,2000.0,12.0,148.0,45.3,-,148.0,45.0,,20.612673,0.306081
2,#AAGQKY,2022-07-16,2.0,4.0,148.0,45.15,72.0,8.877,2000.0,12.0,148.0,45.3,-,148.0,45.0,,20.612673,0.306081
3,#AAGQKY,2022-07-17,2.0,4.0,148.0,45.15,72.0,8.877,2000.0,12.0,148.0,45.3,-,148.0,45.0,,20.612673,0.306081
4,#AAGQKY,2022-07-18,2.0,4.0,148.0,45.15,72.0,8.877,2000.0,12.0,148.0,45.3,-,148.0,45.0,,20.612673,0.306081


In [None]:
total_train_size = len(train_dataset)
total_test_size = len(test_dataset)
total_dev_size = len(dev_dataset)

classes = 10
input_dim = 784

num_clients = 8
rounds = 30
batch_size = 128
epochs_per_client = 3
learning_rate = 2e-2

## GPU 설정

In [5]:
def get_device():
    return torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def to_device(data, device):
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader(DataLoader):
        def __init__(self, dl, device):
            self.dl = dl
            self.device = device

        def __iter__(self):
            for batch in self.dl:
                yield to_device(batch, self.device)

        def __len__(self):
            return len(self.dl)

device = get_device()

## 딥러닝 메인 모델정의

In [6]:
class DNNModel(torch.nn.Module):
    def __init__(self):
        super(DNNModel,self).__init__()
        self.input_layer=nn.Linear(6,128)
        self.hidden_layer1=nn.Linear(128, 256)
        self.hidden_layer2=nn.Linear(256,128)
        self.output_layer=nn.Linear(128,3)
        self.relu=nn.ReLU()
        self.track_layers={'hidden_layer1':self.hidden_layer1,'hidden_layer2':self.hidden_layer2,'output_layer':self.output_layer}
        
    def forward(self,x):
        out=self.relu(self.input_layer(x))
        out=self.relu(self.hidden_layer1(out))
        
    def get_track_layer(self):
        return self.track_layers
    
    def apply_parameters(self, parameters_dict):
        with torch.no_grad():
            for layer_name in parameters_dict:
                self.track_layer[layer_name].weight.data*=0 #track_layer[layer_name]의 가중치 값을 초기화 한다.
                self.track_layer[layer_name].bias.data*=0   #track_layer[layer_name]의 bias 값을 초기화 한다. 
                self.track_layer[layer_name].weight.data=parameters_dict[layer_name]['weight']
                self.track_layer[layer_name].bias.data=parameters_dict[layer_name]['bias']
            
    def get_parameters(self):
        parameters_dict=dict()
        for layer_name in self.track_layers:
            parameters_dict[layer_name]={
                'weight':self.track_layers[layer_name].weight.data,
                'bias': self.track_layers[layer_name].bias.data
            }
        return parameters_dict
    
    def batch_accuracy(self,outputs,labels):
        with torch.no_grad():
            _, predictions=torch.max(outputs,dim=1)
            return torch.tensor(torch.sum(predictions==labels).item()/len(predictions))
          
    def _process_batch(self, batch):
        images, labels = batch
        outputs = self(images)
        loss = torch.nn.functional.cross_entropy(outputs, labels)
        accuracy = self.batch_accuracy(outputs, labels)
        return (loss, accuracy)
    
    def fit(self, dataset, epochs,lr,batch_size=128,opt=torch.optim.SGD): # 학습시키는 함수 
        dataloader=DeviceDataLoader(DataLoader(dataset, batchsize, shuffle=True), device)
        optimizer=opt(self.parameters(),lr)
        history=[]
        for epoch in range(epochs):
            losses=[]
            accs=[]
            for batch in dataloader:
                loss,acc=self._process_batch(batch)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
                loss.detach() # losses의 역전파를 멈추는 코드이다.
                losses.append(loss)
                accs.append(acc)
            avg_loss=torch.stack(losses).mean().item()
            avg_acc=torch.stack(accs).mean().item()
            history.append((avg_loss,avg_acc))
        return history
    
    def evaluate(self, dataset, batch_size=128):
        dataloader=DeviceDataLoader(DataLoader(dataset,batch_size),device)
        losses=[]
        accs=[]
        with torch.no_grad():
            for batch in dataloader:
                loss, acc=self._process_batch(batch)
                losses.append(loss)
                accs.append(acc)
        avg_loss=torch.stack(losses).mean().item()
        avg_acc=torch.stack(accs).mean.item()
        return (avg_loss,avg_acc)       

In [7]:
class Client:
    def __init__(self, client_id, dataset):
        self.client_id = client_id
        self.dataset = dataset
    
    def get_dataset_size(self):
        return len(self.dataset)
    
    def get_client_id(self):
        return self.client_id
    
    def train(self, parameters_dict):
        net = to_device(FederatedNet(), device)
        net.apply_parameters(parameters_dict)
        train_history = net.fit(self.dataset, epochs_per_client, learning_rate, batch_size)
        print('{}: Loss = {}, Accuracy = {}'.format(self.client_id, round(train_history[-1][0], 4), round(train_history[-1][1], 4)))
        return net.get_parameters()

In [8]:
examples_per_client = total_train_size // num_clients
client_datasets = random_split(train_dataset, [min(i + examples_per_client, 
           total_train_size) - i for i in range(0, total_train_size, examples_per_client)])
clients = [Client('client_' + str(i), client_datasets[i]) for i in range(num_clients)]

NameError: name 'total_train_size' is not defined

In [None]:
global_net = to_device(FederatedNet(), device)
history = []
for i in range(rounds):
    print('Start Round {} ...'.format(i + 1))
    curr_parameters = global_net.get_parameters()
    new_parameters = dict([(layer_name, {'weight': 0, 'bias': 0}) for layer_name in curr_parameters])
    for client in clients:
        client_parameters = client.train(curr_parameters)
        fraction = client.get_dataset_size() / total_train_size
        for layer_name in client_parameters:
            new_parameters[layer_name]['weight'] += fraction * client_parameters[layer_name]['weight']
            new_parameters[layer_name]['bias'] += fraction * client_parameters[layer_name]['bias']
    global_net.apply_parameters(new_parameters)
    
    train_loss, train_acc = global_net.evaluate(train_dataset)
    dev_loss, dev_acc = global_net.evaluate(dev_dataset)
    print('After round {}, train_loss = {}, dev_loss = {}, dev_acc = {}\n'.format(i + 1, round(train_loss, 4), 
            round(dev_loss, 4), round(dev_acc, 4)))
    history.append((train_loss, dev_loss))

In [None]:
plt.plot([i + 1 for i in range(len(history))], [history[i][0] for i in range(len(history))], color='r', label='train loss')
plt.plot([i + 1 for i in range(len(history))], [history[i][1] for i in range(len(history))], color='b', label='dev loss')
plt.legend()
plt.title('Training history')
plt.show()