In [None]:
import os
import numpy as np
import pandas as pd
import random
from const import *


### 数据准备

In [None]:
class DatasetBase(object):
    def __init__(self,type_) -> None:
        super().__init__()

        self.type = type_
        assert self.type in ["rt","tp","user","service"],f"类型不符，请在{['rt','tp','user','service']}中选择"
        
    def get_row_data(self):
        if self.type == "rt":
            data =  np.loadtxt(RT_MATRIX_DIR)
        elif self.type == "tp":
            data =  np.loadtxt(TP_MATRIX_DIR)
        elif self.type == "user":
            data =  pd.read_csv(USER_DIR,sep="\t")
        elif self.type == "service":
            data =  pd.read_csv(WS_DIR,sep="\t")
        return data

class MatrixDataset(DatasetBase):
    def __init__(self,type_) -> None:
        super().__init__(type_)
    
    def get_row_data(self):
        data = super().get_row_data()
        self.row_n,self.col_n = data.shape
        return data

    def get_triad(self,nan_symbol = -1):
        """生成三元组(uid,iid,rate)

        Args:
            nan_symbol (int, optional): 数据集中用于表示数据缺失的值. Defaults to -1.

        Returns:
            list[list]: (uid,iid,rate)
        """
        triad_data = []
        row_data = self.get_row_data()
        if isinstance(row_data,pd.DataFrame):
            row_data = row_data.to_numpy()
        row_data[row_data == nan_symbol] = 0
        non_zero_index_tuple = np.nonzero(row_data)
        for uid,iid in zip(non_zero_index_tuple[0],non_zero_index_tuple[1]):
            triad_data.append([uid,iid,row_data[uid,iid]])
        
        triad_data = np.array(triad_data)
    
        return triad_data

    def split_train_test(self,density,nan_symbol = -1,shuffle=True):
        traid_data = self.get_triad(nan_symbol)

        if shuffle:
            random.shuffle(shuffle)

        train_n =  int(self.row_n * self.col_n * density) # 训练集数量
        train_data,test_data = traid_data[:train_n,:],traid_data[train_n:,:]

        return train_data,test_data


    def get_mini_triad(self,nan_symbol = -1, sample_nums=200):
        total_triad_data = self.get_triad(nan_symbol)
        return random.sample(total_triad_data,sample_nums)
    
    def mini_split_train_test(self,density,nan_symbol = -1,shuffle=True):

        traid_data = self.get_mini_triad(nan_symbol)

        if shuffle:
            random.shuffle(shuffle)

        train_n =  int(self.row_n * self.col_n * density) # 训练集数量
        train_data,test_data = traid_data[:train_n,:],traid_data[train_n:,:]

        return train_data,test_data

import pandas as pd
from const import *
db = DatasetBase("user")
db.get_row_data()[["[Latitude]","[Longitude]"]].to_csv("./test2.csv",index=False)

### 模型准备

In [None]:
import torch
from torch import nn

def _mlp_layer(n,dim,layers=[32,16,8]):
    embedding_layer = nn.Embedding(n,dim)
    fc_layers = []
    for in_size,out_size in zip(layers[:-1],layers[1:]):
        fc_layers.append(nn.Linear(in_size,out_size))
    return nn.Sequential(
        embedding_layer,
        *fc_layers
    )


class MLPModel(nn.Module):
    def __init__(self,n,dim,layers=[32,16,8],output_dim=1) -> None:
        """一个简单的MLP 特征提取网络

        Args:
            n ([type]): 用户或者物品的数量
            dim ([type]): 特征空间的维度
            layers (list, optional): 多层感知机的层数. Defaults to [16,32,16,8].
            output_dim (int, optional): 最后输出的维度. Defaults to 1.
        """
        super(MLPModel,self).__init__()
        self.n = n # 
        self.latent_dim = dim

        self.embedding = nn.Embedding(num_embeddings=self.n, embedding_dim=self.latent_dim)

        self.fc_layers = nn.ModuleList()

        for in_size,out_size in zip(layers[:-1],layers[1:]):
            self.fc_layers.append(nn.Linear(in_size,out_size))
        
        self.fc_output = nn.Linear(layers[-1],output_dim)

    def forward(self,idx):
        x = self.embedding(idx)
        for fc_layer in self.fc_layers:
            x = fc_layer(x)
            x = nn.ReLU()(x)
        x = self.fc_output(x)
        return x



class QoSNet(nn.Module):
    def __init__(self,n_user,n_item,dim,mlp_layers=[32,16,8],qos_layers=[8],output_dims=1) -> None:
        super(QoSNet,self).__init__()
        self.u_layers = _mlp_layer(n_user,dim,mlp_layers)
        self.s_layers = _mlp_layer(n_item,dim,mlp_layers)
        d = mlp_layers[-1] * 2
        layers = [d] + qos_layers[:]
        self.fc_layers = nn.ModuleList()
        for in_size,out_size in zip(layers[:-1],layers[1:]):
            self.fc_layers.append(nn.Linear(in_size,out_size))
        self.fc_output = nn.Linear(qos_layers[-1],output_dims)

    def forward(self,n_idx,s_idx):
        u_feature = self.u_layers(n_idx)
        s_feature = self.s_layers(s_idx)
        x = torch.cat([u_feature,s_feature],dim=1)
        for fc_layer in self.fc_layers:
            x = fc_layer(x)
            x = nn.ReLU()(x)
        x = self.fc_output(x)
        return x,u_feature,s_feature






## 训练准备

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.optim import SGD,Adam
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

tb = SummaryWriter()


def train(model,epochs,train_loader,eval_loader,loss_fn,optimizer):
    model.train()
    train_loss_list = []
    eval_loss_list = []
    for epoch in tqdm(range(epochs)):
        train_total_loss = 0
        eval_total_loss = 0
        for batch_id,batch in tqdm(enumerate(train_loader)):
            user,item,rating = batch[0],batch[1],batch[2]
            y_real = rating.reshape(-1,1)

            optimizer.zero_grad()
            y_pred = model(user,item)
            loss = loss_fn(y_pred,y_real) # must be (1. nn output, 2. target)
            loss.backward()
            optimizer.step()
        
            train_total_loss += loss.item()
        
        loss_per_epoch = train_total_loss/len(train_loader)
        train_loss_list.append(loss_per_epoch)
        print(f"Training Epoch:[{epoch}/{epochs}] Loss:{loss_per_epoch:.4f}")

        tb.add_scalar("Train Loss",loss_per_epoch,epoch)

        if (epoch+1) % 10 == 0:
            test_loss = 0
            model.eval()
            with torch.no_grad():
                for batch_id,batch in tqdm(enumerate(eval_loader)):
                    user,item,rating = batch[0],batch[1],batch[2]
                    y_pred = model(user,item)
                    y_real = rating.reshape(-1,1)
                    loss = loss_func(y_pred,y_real)
                    eval_total_loss += loss.item()
                loss_per_epoch = eval_total_loss/len(eval_loader)
                eval_loss_list.append(loss_per_epoch)
                print(f"Test loss:",loss_per_epoch) 

                tb.add_scalar("Eval loss",loss_per_epoch,epoch)
    return train_loss_list,eval_loss_list

## 处理用户服务信息

In [None]:
from data import MatrixDataset,InfoDataset,ToTorchDataset
from tqdm import tqdm
from models.FedXXX.model import FedXXXModel,Embedding,FedXXXLaunch,FedXXX
import numpy as np
import torch
from torch import nn
from models.FedXXX.utils import ResNetBasicBlock
from torch.optim import Adam
from torch.utils.data import DataLoader
from functools import partial

In [None]:
"""
RESULT MODEL:
"""

IS_FED = True

epochs = 10
desnity = 0.05
type_ = "rt"

u_enable_columns = ["[User ID]", "[Country]"]
i_enable_columns = ["[Service ID]", "[Country]"]

def data_preprocess(traid,u_info_obj:InfoDataset,i_info_obj:InfoDataset,is_dtraid=False):
    """生成d_traid

    Args:
        traid ([type]): [description]
        u_info_obj (InfoDataset): [description]
        i_info_obj (InfoDataset): [description]
        need_uid (bool, optional): [description]. Defaults to False.

    Returns:
        [type]: [description]
    """
    r = []
    for row in tqdm(traid,desc="Data preprocess"):
        uid,iid,rate = int(row[0]),int(row[1]),float(row[2])
        u = u_info_obj.query(uid)
        i = i_info_obj.query(iid)
        r.append([[uid,iid,rate],[u,i,rate]]) if is_dtraid else r.append([uid,iid,rate])
    return r

fed_data_preprocess = partial(data_preprocess,is_dtraid=True)



md = MatrixDataset(type_)
u_info = InfoDataset("user",u_enable_columns)
i_info = InfoDataset("service",i_enable_columns)
train,test = md.split_train_test(desnity)

user_params = {
    "type_":"stack", # embedding层整合方式 stack or cat
    "embedding_nums":u_info.embedding_nums,# 每个要embedding的特征的总个数
    "embedding_dims":[16,16],
    "in_size":16, # embedding后接一个全连阶层在进入resnet
    "blocks_sizes":[16,8], # 最后的输出是8
    "deepths":[2],
    "activation":nn.ReLU,
    "block":ResNetBasicBlock
}

item_params = {
    "type_":"stack", # embedding层整合方式 stack or cat
    "embedding_nums":i_info.embedding_nums,# 每个要embedding的特征的总个数
    "embedding_dims":[16,16],
    "in_size":16,
    "blocks_sizes":[16,8], # item最后的输出是8
    "deepths":[2],
    "activation":nn.ReLU,
    "block":ResNetBasicBlock
}

loss_fn = nn.SmoothL1Loss()


train_data = fed_data_preprocess(train,u_info,i_info)
test_data = fed_data_preprocess(test,u_info,i_info)

model = FedXXXLaunch(train_data,user_params,item_params,[16],loss_fn,1,nn.ReLU)
# model = FedXXX(user_params, item_params, [16], output_dim=1, activation=nn.ReLU)
model.fit(epochs, test_d_traid=test_data, lr=0.001)

# from utils.model_util import traid_to_matrix
# traid_to_matrix()


In [None]:
import numpy as np
a = np.arange(1,10)
b = np.arange(11,20)
np.stack([a,b])

class B:
    def __init__(self,vec) -> None:
        self.vec = vec

class A:
    def __init__(self) -> None:
        self.user_vec = np.array([1,2,3,4,5])
    
    def to_b(self):
        return B(self.user_vec)

a = A()
b = a.to_b()
b.vec[0] = 10

a.user_vec

In [None]:
np.reshape([1,2,3],(1,-1))

In [None]:
from sklearn import preprocessing
import numpy as np
from utils.preprocess import min_max_scaler,l2_norm,z_score


#创建一组特征数据，每一行标识一个样本，每一列标识一个特征
X_train = np.array([[ 1., -1.,  2.],
                    [ 2.,  0.,  0.],
                    [ 0.,  1., -1.]])
min_max,_ = z_score(X_train)
lis = [1,2,3,4]
test = np.array(lis,dtype=np.float)
# data = _.transform(test)
# print(data)
print(test)
# test = np.reshape([1,2,3],(1,-1))
_.inverse_transform(test)


In [None]:
from data import MatrixDataset
from utils.evaluation import mae, mse, rmse
from utils.model_util import freeze_random

from models.FedMF import Clients, Server
from models.FedMF.model import FedMF
"""
RESULT FedMF: 
1000epoch
Density:0.05,type:rt,mae:0.6090604947629363,mse:2.3006280931616536,rmse:1.5167821508580768
Density:0.1,type:rt,mae:0.5071641017174705,mse:1.7203263210368958,rmse:1.3116121076891962
Density:0.15,type:rt,mae:0.46475316376452325,mse:1.4854062714808631,rmse:1.2187724445034287
Density:0.2,type:rt,mae:0.43765304163567553,mse:1.3690546770840173,rmse:1.1700660994508034

"""

freeze_random()  # 冻结随机数 保证结果一致

for density in [0.05]:

    # 1
    type_ = "tp"
    latent_dim = 8
    lr = 0.001
    lambda_ = 0.1
    epochs = 1000

    # 2
    # type_ = "rt"
    # latent_dim = 12
    # lr = 0.0005
    # lambda_ = 0.1
    # epochs = 2000

    md_data = MatrixDataset(type_)
    train_data, test_data = md_data.split_train_test(density,
                                                     normalize_type="z_score")
    clients = Clients(train_data, md_data.row_n, latent_dim)

    server = Server(md_data.col_n, latent_dim)

    print(md_data.scaler)
    test = np.array([1,2,3,4,5],dtype=np.float)
    md_data.scaler.inverse_transform(test)
    # mf = FedMF(server, clients)
    # mf.fit(epochs, lambda_, lr, test_data, scaler=md_data.scaler)
    # y, y_pred = mf.predict(test_data)

    # mae_ = mae(y, y_pred)
    # mse_ = mse(y, y_pred)
    # rmse_ = rmse(y, y_pred)

    # print(f"Density:{density},type:{type_},mae:{mae_},mse:{mse_},rmse:{rmse_}")


In [None]:
import numpy as np


Wtrue = np.random.rand(40, 5)
Htrue = np.random.rand(5, 10)

V0 = Wtrue @ Htrue
print(V0.shape)

m,n = V0.shape


In [None]:
r = 6
W = np.random.rand(m,r)
H = np.random.rand(r,n)
np.size(W,0)


In [None]:
from scipy.stats import pearsonr
import numpy as np

np.corrcoef([0.5,0.4],[0.5,0.3])


In [None]:
from typing import OrderedDict
import torch
from models.base import FedModelBase
from torch import nn
from tqdm import tqdm
from utils.evaluation import mae, mse, rmse
from utils.model_util import load_checkpoint, save_checkpoint
from utils.mylogger import TNLog

class MLP(nn.Module):
    def __init__(self,
                 n_user,
                 n_item,
                 dim,
                 layers=[8],
                 output_dim=1) -> None:
        """
        Args:
            n_user ([type]): 用户数量
            n_item ([type]): 物品数量
            dim ([type]): 特征空间的维度
            layers (list, optional): 多层感知机的层数. Defaults to [16,32,16,8].
            output_dim (int, optional): 最后输出的维度. Defaults to 1.
        """
        super(MLP, self).__init__()
        self.num_users = n_user
        self.num_items = n_item
        self.latent_dim = dim

        self.embedding_user = nn.Embedding(num_embeddings=self.num_users,
                                           embedding_dim=self.latent_dim)
        self.embedding_item = nn.Embedding(num_embeddings=self.num_items,
                                           embedding_dim=self.latent_dim)


        self.my_layer = nn.Sequential(OrderedDict({
            "my_layer":nn.Linear(layers[-1],layers[-1])
        }))
        
        self.fc_output = nn.Linear(layers[-1], output_dim)

    def forward(self, user_idx, item_idx):
        user_embedding = self.embedding_user(user_idx)
        item_embedding = self.embedding_item(item_idx)
        x = torch.cat([user_embedding, item_embedding], dim=-1)
        for fc_layer in self.fc_layers:
            x = fc_layer(x)
            x = nn.ReLU()(x)
        x = self.my_layer(x)
        x = self.fc_output(x)
        return x


def show_params(model):
    for name,param in model.named_parameters():
        print(param)


mlp_center = MLP(10,10,8)
mlp1 = MLP(10,10,8)


show_params(mlp1)
# show_params(mlp_center)

print("=====")

state_dic = mlp_center.state_dict()


# 拿到中心模型的参数,把用本地模型的参数去替换

for name,param in mlp1.named_parameters():
    if "my_layer" in name:
        state_dic[name] = param
        print(param)

mlp1.load_state_dict(state_dic)

show_params(mlp1)



In [None]:
import torch
from torch import nn
input1 = torch.randn(100, 128)
input2 = torch.randn(100, 128)
cos = nn.CosineSimilarity(dim=1, eps=1e-6)
output = cos(input1, input2)
output.shape

In [14]:
import pandas as pd
import json
with open("/Users/wenzhuo/Desktop/test.json") as f:
    data = json.load(f)

df = pd.DataFrame(data, index=[1]).T
df.to_excel("test.xlsx")