## 提取MTA的feature

### 导入相应的包

In [1]:
import sys
import torch
sys.path.append('..')
from torch.utils.data import DataLoader
import pandas as pd
from Preprocess.util_ccc import concordance_correlation_coefficient
import os
from os.path import join as pjoin
import torch.optim as optim
import torch.nn as nn
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


### 模型进行实例化，并将最优权重进行读取

In [23]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np



# parameters
## input_channels = [256,512,1024,2048]
## attention_channels = 2048

class NonLocalBlock(nn.Module):
    """ NonLocalBlock Module"""

    def __init__(self, in_channels):
        super(NonLocalBlock, self).__init__()

        conv_nd = nn.Conv1d

        self.in_channels = in_channels
        self.inter_channels = self.in_channels // 2

        self.ImageAfterASPP_bnRelu = nn.Sequential(
            nn.BatchNorm1d(self.in_channels),
            nn.ReLU(inplace=True),
        )

        self.DepthAfterASPP_bnRelu = nn.Sequential(
            nn.BatchNorm1d(self.in_channels),
            nn.ReLU(inplace=True),
        )

        self.R_g = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels,
                           kernel_size=1, stride=1, padding=0)
        self.R_theta = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels,
                               kernel_size=1, stride=1, padding=0)
        self.R_phi = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels,
                             kernel_size=1, stride=1, padding=0)
        self.R_W = conv_nd(in_channels=self.inter_channels, out_channels=self.in_channels,
                           kernel_size=1, stride=1, padding=0)

        self.F_g = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels,
                           kernel_size=1, stride=1, padding=0)
        self.F_theta = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels,
                               kernel_size=1, stride=1, padding=0)
        self.F_phi = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels,
                             kernel_size=1, stride=1, padding=0)
        self.F_W = conv_nd(in_channels=self.inter_channels, out_channels=self.in_channels,
                           kernel_size=1, stride=1, padding=0)

    def forward(self, self_fea, mutual_fea, alpha, selfImage):

        if selfImage:

            selfNonLocal_fea = self.ImageAfterASPP_bnRelu(self_fea)
            mutualNonLocal_fea = self.DepthAfterASPP_bnRelu(mutual_fea)

            batch_size = selfNonLocal_fea.size(0)
            g_x = self.R_g(selfNonLocal_fea).view(batch_size, self.inter_channels, -1)
            g_x = g_x.permute(0, 2, 1)
            # using mutual feature to generate attention
            theta_x = self.F_theta(mutualNonLocal_fea).view(batch_size, self.inter_channels, -1)
            theta_x = theta_x.permute(0, 2, 1)
            phi_x = self.F_phi(mutualNonLocal_fea).view(batch_size, self.inter_channels, -1)
            f = torch.matmul(theta_x, phi_x)

            # using self feature to generate attention
            self_theta_x = self.R_theta(selfNonLocal_fea).view(batch_size, self.inter_channels, -1)
            self_theta_x = self_theta_x.permute(0, 2, 1)
            self_phi_x = self.R_phi(selfNonLocal_fea).view(batch_size, self.inter_channels, -1)
            self_f = torch.matmul(self_theta_x, self_phi_x)
            # add self_f and mutual f
            f_div_C = F.softmax(alpha * f + self_f, dim=-1)
            y = torch.matmul(f_div_C, g_x)
            y = y.permute(0, 2, 1).contiguous()
            y = y.view(batch_size, self.inter_channels, *selfNonLocal_fea.size()[2:])
            W_y = self.R_W(y)
            z = W_y + self_fea
            return z

        else:
            selfNonLocal_fea = self.DepthAfterASPP_bnRelu(self_fea)## [30,2408,1]


            mutualNonLocal_fea = self.ImageAfterASPP_bnRelu(mutual_fea)##[30,2048,1]

            batch_size = selfNonLocal_fea.size(0) ##30

            g_x = self.F_g(selfNonLocal_fea).view(batch_size, self.inter_channels, -1) ##[30,1,1024]
            g_x = g_x.permute(0, 2, 1)

            # using mutual feature to generate attention
            theta_x = self.R_theta(mutualNonLocal_fea).view(batch_size, self.inter_channels, -1)
            theta_x = theta_x.permute(0, 2, 1)
            phi_x = self.R_phi(mutualNonLocal_fea).view(batch_size, self.inter_channels, -1)
            f = torch.matmul(theta_x, phi_x)

            # using self feature to generate attention
            self_theta_x = self.F_theta(selfNonLocal_fea).view(batch_size, self.inter_channels, -1)
            self_theta_x = self_theta_x.permute(0, 2, 1)
            self_phi_x = self.F_phi(selfNonLocal_fea).view(batch_size, self.inter_channels, -1)
            self_f = torch.matmul(self_theta_x, self_phi_x)

            # add self_f and mutual f
            f_div_C = F.softmax(alpha * f + self_f, dim=-1)
            print(g_x.shape)
            print(f_div_C.shape)
            y = torch.matmul(f_div_C, g_x)
            y = y.permute(0, 2, 1).contiguous()
            y = y.view(batch_size, self.inter_channels, *selfNonLocal_fea.size()[2:])
            W_y = self.F_W(y)
            z = W_y + self_fea
            return z

class MTA(nn.Module):
    def __init__(self,in_channel,input_channels,attention_channels,outchannels):
        super(MTA, self).__init__()
        self.input_channels = input_channels
        ## 定义多个channels，得到多尺度特征【Batch，256，1】，【Batch，512，1】，【Batch，1024，1】，【Batch，2048，1】
        self.conv1 =  nn.ModuleList()
        for i in input_channels:
            temp_part = nn.Sequential(

                nn.Conv1d(in_channels=in_channel, out_channels=i, kernel_size=1),
                nn.BatchNorm1d(i),
                nn.ReLU(inplace=True)

            )
            self.conv1.append(temp_part)

        self.conv2 = nn.ModuleList()
        for i in input_channels:
            temp_part_2 = nn.Sequential(
                nn.Conv1d(in_channels=i, out_channels=attention_channels, kernel_size=1),
                nn.BatchNorm1d(attention_channels),
                nn.ReLU(inplace=True)

            )
            self.conv2.append(temp_part_2)
        ## 通过attetnion 需要将他们对其到同一个尺度 return list[Batch,2048,1] * 4
        self.conv3 = nn.Conv1d(in_channels= attention_channels *2,out_channels=2, kernel_size=1)
        self.nonblock = NonLocalBlock(in_channels= attention_channels)
        self.conv4 = nn.ModuleList()
        for i in input_channels:
            temp_part_4 = nn.Sequential(
                nn.Conv1d(in_channels=attention_channels, out_channels=outchannels, kernel_size=1),
                nn.BatchNorm1d(outchannels),
                nn.ReLU(inplace=True)

            )
            self.conv4.append(temp_part_4)

        self.reg = nn.Sequential(
            nn.Linear(in_features=outchannels * len(self.input_channels) * 30, out_features= 2048 ),
            nn.Dropout(p=0.1),
            nn.ReLU(inplace=True),
            nn.Linear(2048,1)
        )

        # self.conv4 = nn.ModuleList([nn.Conv1d(in_channels= attention_channels,out_channels=outchannels,kernel_size=1) for i in range(len(input_channels))])

    def  forward(self,x):
        outs = [in_channel(x) for in_channel in self.conv1]
        outs = [in_channel(outs[idx])for idx,in_channel in enumerate(self.conv2)]

        if len(self.input_channels) == 4:
            conncat_tensor_01 = torch.cat([outs[0], outs[1]], dim=1)
            conncat_tensor_01_conv = self.conv3(conncat_tensor_01)
            alpha_01 = F.softmax(conncat_tensor_01_conv,dim=1)
            alpha_01_1 = alpha_01[:,1,:].unsqueeze(dim=2)
            feature_attention_0 = self.nonblock(outs[0], outs[1], alpha_01_1, True)

            conncat_tensor_12 = torch.cat([outs[1], outs[2]], dim=1)
            conncat_tensor_12_conv = self.conv3(conncat_tensor_12)
            alpha_12 = F.softmax(conncat_tensor_12_conv, dim=1)
            alpha_12_2 = alpha_12[:, 1, :].unsqueeze(dim=2)
            feature_attention_1 = self.nonblock(outs[1], outs[2], alpha_12_2, True)

            conncat_tensor_23 = torch.cat([outs[2], outs[3]], dim=1)
            conncat_tensor_23_conv = self.conv3(conncat_tensor_23)
            alpha_23 = F.softmax(conncat_tensor_23_conv, dim=1)
            alpha_23_3 = alpha_23[:, 1, :].unsqueeze(dim=2)
            feature_attention_2 = self.nonblock(outs[2], outs[3], alpha_23_3, True)

            conncat_tensor_30 = torch.cat([outs[3], outs[0]], dim=1)
            conncat_tensor_30_conv = self.conv3(conncat_tensor_30)
            alpha_30 = F.softmax(conncat_tensor_30_conv, dim=1)
            alpha_30_3 = alpha_30[:, 1, :].unsqueeze(dim=2)
            feature_attention_3 = self.nonblock(outs[3], outs[0], alpha_30_3, True)

            outs = [feature_attention_0, feature_attention_1, feature_attention_2, feature_attention_3]
        elif len(self.input_channels) == 3:
            conncat_tensor_01 = torch.cat([outs[0], outs[1]], dim=1)
            conncat_tensor_01_conv = self.conv3(conncat_tensor_01)
            alpha_01 = F.softmax(conncat_tensor_01_conv, dim=1)
            alpha_01_1 = alpha_01[:, 1, :].unsqueeze(dim=2)
            feature_attention_0 = self.nonblock(outs[0], outs[1], alpha_01_1, True)

            conncat_tensor_12 = torch.cat([outs[1], outs[2]], dim=1)
            conncat_tensor_12_conv = self.conv3(conncat_tensor_12)
            alpha_12 = F.softmax(conncat_tensor_12_conv, dim=1)
            alpha_12_2 = alpha_12[:, 1, :].unsqueeze(dim=2)
            feature_attention_1 = self.nonblock(outs[1], outs[2], alpha_12_2, True)

            conncat_tensor_20 = torch.cat([outs[2], outs[0]], dim=1)
            conncat_tensor_20_conv = self.conv2(conncat_tensor_20)
            alpha_20 = F.softmax(conncat_tensor_20_conv, dim=1)
            alpha_20_2 = alpha_20[:, 1, :].unsqueeze(dim=2)
            feature_attention_2 = self.nonblock(outs[2], outs[0], alpha_20_2, True)

            outs = [feature_attention_0, feature_attention_1, feature_attention_2]

        outs = [in_channel(outs[idx]) for idx, in_channel in enumerate(self.conv4)]

        input_feature = torch.cat(outs,dim = 1 )
        input_feature = input_feature.view(input_feature.shape[0],-1)
        outs = self.reg(input_feature)


        return outs,input_feature,self.reg


####　测试部分
## 现在我们得到了多个尺度的特征
#
# x1 = torch.rand([30,2048,1])
# x2 = torch.rand([30,2048,1])
# x3 = torch.rand([30,2048,1])
# x4 = torch.rand([30,2048,1])
# outs = [x1,x2,x3,x4]
# ### 1. concat feature
#
# conncat_tensor_01 = torch.cat([outs[0], outs[1]], dim=1)
# print(conncat_tensor_01.shape)
# conv1 = nn.Conv1d(in_channels=2048 *2 , out_channels=2, kernel_size=1)
# conncat_tensor_01_conv = conv1(conncat_tensor_01)
# alpha_01 = F.softmax(conncat_tensor_01_conv,dim=1)
# alpha_0 = alpha_01[:,0,:]
# alpha_1 = alpha_01[:,1,:].unsqueeze(dim=2)






# nonblock = NonLocalBlock(in_channels= 2048)
# temp_feature_0 = nonblock(outs[0], outs[1], alpha_1, False)



# conv1 = nn.Conv1d(in_channels=2048 * 2 ,out_channels=2,kernel_size=1)
# print(conv1(conncat_tensor_01).shape)

# feature = torch.rand([2,2048,30])
# in_channel = 2048
# input_channels = [256,512,1024,2048]
# attention_channels = 2048
# outchannels = 1024
# model = MTA(in_channel = in_channel, input_channels=input_channels,attention_channels= attention_channels,outchannels=outchannels)
# print(model)
#
# results = model(feature)
#
# print(results.shape)






In [24]:
in_channel = 2048
input_channels = [256, 512, 1024, 2048]
attention_channels = 2048
outchannels = 512
model = MTA(in_channel=in_channel, input_channels=input_channels, attention_channels=attention_channels,
            outchannels=outchannels)

In [25]:
model.load_state_dict(torch.load('/hy-tmp/Code/Best_weights/Resnet-MTA/Epcoh_159_Rmse_6.803299427032471_PCC_0.2584606433517226_CCC_0.15121600196380586.pth'))

<All keys matched successfully>

### 数据集进行载入

- [x] 这个时候发现一个现象，原始的dataset传入到的只有 feature & label。保存的时候 对名字也进行一个提取和保存

In [26]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import scipy.io as scio

class DepressDataset(Dataset):
    def __init__(self,txt_path):
        self.txt_path = txt_path

        with open(self.txt_path, 'r', encoding='utf-8') as f:
            data = f.readlines()
            features = [i.rstrip('\n') for i in data]
        self.features = features
    def __getitem__(self, index):
        data_file  = self.features[index]
        
        ## 提取sample_id
        
        sample_id = data_file.split('/')[-2]
        
        feature_name = data_file.split('/')[-1]
        
        data = scio.loadmat(data_file)  ## return 一个字典。选择‘feature’
        feature = data['feature']  ##array 形式
        feature_th = torch.from_numpy(feature)


        label = data['label']
        label_th = torch.from_numpy(label).reshape(-1).float()

        return feature_th,label_th,sample_id,feature_name

    def __len__(self):
        return len(self.features)


In [27]:
#测试部分

train_dataset = DepressDataset(txt_path= '../../train_data.txt')
train_dataloader = DataLoader(dataset=train_dataset,batch_size=10,shuffle=False)
for data in train_dataloader:
    feature, label, sample_id,feature_name = data
    print(feature_name)
    break

('1.mat', '2.mat', '3.mat', '4.mat', '5.mat', '6.mat', '7.mat', '8.mat', '9.mat', '10.mat')


### 开始模型的提取

由于每个样本数据，对应的特征数量是不一致的。因此，先对每个样本先单独保存，之后在进行聚合

In [33]:
train_file = '../../train_valid.txt'

train_batch_size = 1

#### train dataloader
train_dataset = DepressDataset(txt_path=train_file)
train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=False)


In [34]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [35]:
save_path = '/hy-tmp/Feature_save_Resnet/'
save_path_check = os.path.exists(save_path)

if not save_path_check:
        os.mkdir(save_path)

model.to(device)
model.eval()


for data in tqdm(train_loader):
    
    feature, label, sample_id,feature_name = data
    
    feature = feature.permute(0, 2, 1)
    
    feature = feature.to(device)
    
    _,mta_feature,reg_model = model(feature)
    
    reg_model.eval()
    temp_model = reg_model[:-1]
    
    temp_feature = temp_model.forward(mta_feature).to('cpu').data.numpy()
        
    label = label.data.numpy()
    
    
 
    save_dir = save_path + sample_id[0]
    
    save_dir_check = os.path.exists(save_dir)

    if not save_dir_check:
            os.mkdir(save_dir)

    scio.savemat(pjoin(save_dir,feature_name[0]),{'feature':temp_feature, 'label': label})
    
    

100%|██████████| 213322/213322 [42:35<00:00, 83.46it/s]


In [36]:
mat_file  = '/hy-tmp/Feature_save_Resnet/302/1.mat'

mat_data = scio.loadmat(mat_file)

mat_data['feature'].shape

(1, 2048)

### 将每个文件夹的特征进行融合

#### numpy 对于空数组拼接。有点麻烦！！！ 采用tensor 进行拼接 转化为numpy

In [37]:
### 1. 读取文件夹内容。

mat_path = '/hy-tmp/Feature_save_Resnet/'

mat_file = [i for i in os.listdir(mat_path)]



### 1.1 定义 fusion feature path

mta_fusion_dir = '/hy-tmp/Feature_fusion_resnet_mta_save/'

mta_fusion_dir_check = os.path.exists(mta_fusion_dir)

if not mta_fusion_dir_check:
    os.mkdir(mta_fusion_dir)


### 2. 读取对应的feature & label

for file in tqdm(mat_file):
    
    mat_full_path = pjoin(mat_path,file)
    
    mat_data = [ pjoin(mat_full_path,temp_file ) for temp_file in os.listdir(mat_full_path)]
    
    ### data_file:/hy-tmp/Feature_save/302/1.mat

    total_feature = torch.Tensor()
    
    total_label = torch.Tensor()
    
    
    for data_file in mat_data:
        
        data = scio.loadmat(data_file)
        
        mat_feature = data['feature']
        
        mat_label = data['label']
        
        
        total_feature = torch.cat([total_feature,torch.from_numpy(mat_feature)], dim= 0)
        
        total_label= torch.cat([total_label,torch.from_numpy(mat_label)], dim= 0)
    
    save_name = file + '.mat'

    scio.savemat(pjoin(mta_fusion_dir,save_name),{'feature':total_feature.numpy(), 'label':mat_label, 'check_label':total_label.numpy()})
    
    


100%|██████████| 275/275 [01:35<00:00,  2.88it/s]


####  检验特征的使用

In [38]:
check_mat_file = '/hy-tmp/Feature_fusion_resnet_mta_save/300.mat'

check_mat = scio.loadmat(check_mat_file)

check_mat

{'__header__': b'MATLAB 5.0 MAT-file Platform: posix, Created on: Mon Aug 29 18:06:43 2022',
 '__version__': '1.0',
 '__globals__': [],
 'feature': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 'label': array([[2.]], dtype=float32),
 'check_label': array([[2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
        [2.],
  

In [89]:
check_mat['check_label'].shape

(648, 1)

#### 后续使用方便，将训练&验证，测试的样本id，进行保存

In [79]:
full_df = pd.read_csv('/hy-tmp/file_count.csv')

full_df

Unnamed: 0.1,Unnamed: 0,file,num
0,0,300,648
1,1,301,824
2,2,302,758
3,3,303,985
4,4,304,792
...,...,...,...
270,270,713,790
271,271,715,1303
272,272,716,969
273,273,717,979


In [81]:
test_df = pd.read_csv('/hy-tmp/test_count.csv')
                      
test_df

Unnamed: 0.1,Unnamed: 0,file,num
0,189,600,685
1,191,602,851
2,193,604,627
3,194,605,808
4,195,606,529
5,196,607,792
6,198,609,1160
7,200,615,1226
8,202,618,890
9,203,619,1168


In [82]:
test_index = test_df['file'].values

np.save('/hy-tmp/test_index',test_index)

In [86]:
train_valid_index= full_df[~full_df['file'].isin(test_index)].file.values

np.save('/hy-tmp/train_valid_index',train_valid_index)