In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.experimental import enable_iterative_imputer
from sklearn.model_selection import train_test_split
from sklearn.impute import *
from torch.utils.data import Dataset, DataLoader
from sklearn import impute
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
import time
import os

import warnings
warnings.filterwarnings('ignore')

In [2]:
def get_result(test_true_1array, test_pred_1array):
    """
    评估函数
    """
    mae=metrics.mean_absolute_error(test_true_1array, test_pred_1array)
    mse=metrics.mean_squared_error(test_true_1array, test_pred_1array)
    mape=metrics.mean_absolute_percentage_error(test_true_1array, test_pred_1array)
    r2=metrics.r2_score(test_true_1array, test_pred_1array)
    return [mae,mse,mape,r2]

result_dict={}
predict_dict={}
mask_pred_dict={}

# 1、生成掩码数据

In [3]:
np.random.seed(42)#固定随机数
mask_rate=0.5#===========================掩码率===================
file_path=os.listdir('./data')
for data_name in file_path:
    print(data_name)
    data=pd.read_excel(f'./data/{data_name}',index_col=0)#------------读取数据
    #-------------引入缺失值
    df_missing = data.mask(np.random.random(data.shape) < mask_rate)
    df_missing.to_excel(f'./mask_data/{data_name}')#保存代码文件
    
    break

16、代表年数据 - 副本.xlsx


In [4]:
#----------设置处理的文件----------
data_name=file_path[0]
df_missing=pd.read_excel(f'./mask_data/{data_name}',index_col=0)#读取处理的数据对象
data=pd.read_excel(f'./data/{data_name}',index_col=0)#------------读取数据

#----------提取时间信息--------------
df_stamp=df_missing.copy()
df_stamp['date']=pd.to_datetime(df_stamp.index)
df_stamp=df_stamp[['date']]
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)

# data_stamp = df_stamp.drop(['date'], 1).values

del df_stamp['date']
data_stamp = df_stamp.values

#------------数据划分-------------
train_miss,test_miss,train_data,test_data,train_stamp,test_stamp=train_test_split(df_missing,
                                                                                  data,data_stamp,
                                                                                  train_size=0.7,shuffle=False)#数据集划分

# 2、MICE插值

In [5]:
# 初始化 MICE 计算器
mice_imputer = impute.IterativeImputer(max_iter=10, random_state=42)
 
# 拟合和转换数据集以填补缺失值
mice_imputer.fit(train_miss)
train_imputed = mice_imputer.transform(train_miss)
start_time=time.time()
test_imputed = mice_imputer.transform(test_miss)
end_time=time.time()
train_true_1array=train_data.values[np.isnan(train_miss)]#缺失处的真实值
train_pred_1array=train_imputed[np.isnan(train_miss)]#缺失处的真实值
test_true_1array=test_data.values[np.isnan(test_miss)]#缺失处的真实值
test_pred_1array=test_imputed[np.isnan(test_miss)]#缺失处的真实值

# 计算评估指标
mice_result=get_result(test_true_1array, test_pred_1array)+[end_time-start_time]
result_dict['MICE']=mice_result
predict_dict['MICE']={'true':test_true_1array,'pred':test_pred_1array}

test_imputed_df=pd.DataFrame(test_imputed,index=test_miss.index,columns=test_miss.columns)
true_mask_df=test_data.mask(test_miss.isna()==False)
pred_mask_df=test_imputed_df.mask(test_miss.isna()==False)
mask_pred_dict['MICE']={'true':true_mask_df,'pred':pred_mask_df}

# 3、KNNI插值

In [6]:
# 初始化 MICE 计算器
knni_imputer = impute.KNNImputer(n_neighbors=10)

knni_imputer.fit(train_miss)
train_imputed = knni_imputer.transform(train_miss)
start_time=time.time()
test_imputed = knni_imputer.transform(test_miss)
end_time=time.time()
train_true_1array=train_data.values[np.isnan(train_miss)]#缺失处的真实值
train_pred_1array=train_imputed[np.isnan(train_miss)]#缺失处的真实值
test_true_1array=test_data.values[np.isnan(test_miss)]#缺失处的真实值
test_pred_1array=test_imputed[np.isnan(test_miss)]#缺失处的真实值


# 计算评估指标
knni_result=get_result(test_true_1array, test_pred_1array)+[end_time-start_time]
result_dict['KNNI']=knni_result
predict_dict['KNNI']={'true':test_true_1array,'pred':test_pred_1array}

test_imputed_df=pd.DataFrame(test_imputed,index=test_miss.index,columns=test_miss.columns)
true_mask_df=test_data.mask(test_miss.isna()==False)
pred_mask_df=test_imputed_df.mask(test_miss.isna()==False)
mask_pred_dict['KNNI']={'true':true_mask_df,'pred':pred_mask_df}

# 4、SimpleImputer插值

In [7]:
simp_imputer = SimpleImputer(missing_values=np.nan, strategy='mean')

simp_imputer.fit(train_miss)
train_imputed = simp_imputer.transform(train_miss)
start_time=time.time()
test_imputed = simp_imputer.transform(test_miss)
end_time=time.time()

train_true_1array=train_data.values[np.isnan(train_miss)]#缺失处的真实值
train_pred_1array=train_imputed[np.isnan(train_miss)]#缺失处的真实值
test_true_1array=test_data.values[np.isnan(test_miss)]#缺失处的真实值
test_pred_1array=test_imputed[np.isnan(test_miss)]#缺失处的真实值

# 计算评估指标
simp_result=get_result(test_true_1array, test_pred_1array)+[end_time-start_time]
result_dict['SimpleImputer']=simp_result
predict_dict['SimpleImputer']={'true':test_true_1array,'pred':test_pred_1array}

test_imputed_df=pd.DataFrame(test_imputed,index=test_miss.index,columns=test_miss.columns)
true_mask_df=test_data.mask(test_miss.isna()==False)
pred_mask_df=test_imputed_df.mask(test_miss.isna()==False)
mask_pred_dict['SimpleImputer']={'true':true_mask_df,'pred':pred_mask_df}

# 5、线性插值

In [8]:
train_imputed=train_miss.interpolate(method='linear',limit_direction='both')
start_time=time.time()
test_imputed=test_miss.interpolate(method='linear',limit_direction='both')
end_time=time.time()

train_true_1array=train_data.values[np.isnan(train_miss)]#缺失处的真实值
train_pred_1array=train_imputed[np.isnan(train_miss)]#缺失处的真实值
test_true_1array=test_data.values[np.isnan(test_miss)]#缺失处的真实值
test_pred_1array=test_imputed.values[np.isnan(test_miss)]#缺失处的真实值

# 计算评估指标
line_result=get_result(test_true_1array, test_pred_1array)+[end_time-start_time]
result_dict['Linear']=line_result
predict_dict['Linear']={'true':test_true_1array,'pred':test_pred_1array}

test_imputed_df=pd.DataFrame(test_imputed,index=test_miss.index,columns=test_miss.columns)
true_mask_df=test_data.mask(test_miss.isna()==False)
pred_mask_df=test_imputed_df.mask(test_miss.isna()==False)
mask_pred_dict['Linear']={'true':true_mask_df,'pred':pred_mask_df}

# 6、深度学习模型插值

# 6.1、Transformer

In [9]:
def get_debatch_data(preds):
    """
    预测的数据是由batch生成的，导致不同batch之间有重叠，需要消除这个重叠的影响
    """
    windows_size=preds.shape[1]
    all_new_df=pd.DataFrame()
    for num,i in enumerate(preds):
        i_df=pd.DataFrame(i,index=range(num,num+windows_size))
        i_df=i_df.loc[list(set(i_df.index)-set(all_new_df.index))]#找到i_dfyou ,all_new_df没有的行
        i_df=i_df.sort_index()
        all_new_df=pd.concat([all_new_df,i_df])
    return all_new_df.values

import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from layers.Embed import DataEmbedding
import numpy as np

# from data_provider.data_factory import data_provider
# from exp.exp_basic import Exp_Basic
from utils.tools import EarlyStopping, adjust_learning_rate, visual
from utils.metrics import metric
# import torch
# import torch.nn as nn
from torch import optim
import os
import time
import warnings
# import numpy as np
import argparse

class Model(nn.Module):
    def __init__(self, configs):
        super(Model, self).__init__()
        # Embedding
        self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
                                           configs.dropout)
        # Encoder
        self.encoder = Encoder(
            [
                EncoderLayer(
                    AttentionLayer(
                        FullAttention(False, configs.factor, attention_dropout=configs.dropout,
                                      output_attention=configs.output_attention), configs.d_model, configs.n_heads),
                    configs.d_model,
                    configs.d_ff,
                    dropout=configs.dropout,
                    activation=configs.activation
                ) for l in range(configs.e_layers)
            ],
            norm_layer=torch.nn.LayerNorm(configs.d_model)
        )
        # Decoder
        self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)

    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
        # Embedding
        enc_out = self.enc_embedding(x_enc, x_mark_enc)
        enc_out, attns = self.encoder(enc_out, attn_mask=None)

        dec_out = self.projection(enc_out)
        return dec_out

    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
        dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
        return dec_out  # [B, L, D]
    
class Exp_Imputation():
    def __init__(self, args,train_loader,test_loader,model=None):
        #super(Exp_Imputation, self).__init__(args)
        self.test_loader=test_loader
        self.train_loader=train_loader
        self.args=args
        
        self.device=torch.device('cuda' if args.use_gpu else 'cpu')
        
        if model is None:
            self.model=Model(self.args).to(self.device)
        else:
            self.model=model.to(self.device)
        
    def _select_optimizer(self):
        model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
        return model_optim

    def _select_criterion(self):
        criterion = nn.MSELoss()
        return criterion
    
    def vali(self, test_loader, criterion):
        total_loss = []
        self.model.eval()
        with torch.no_grad():
            for i, (batch_x_mask, batch_x, batch_x_mark) in enumerate(test_loader):
                batch_x=batch_x.float().to(device)
                batch_x_mark = batch_x_mark.float().to(device)
                mask=(batch_x_mask.isnan()==False).int().to(device)
                batch_x_mask[batch_x_mask.isnan()]=0.0#---------------用0填充缺失值----
                batch_x_mask=batch_x_mask.to(torch.float32).to(device)
                
                outputs = self.model(batch_x_mask, batch_x_mark, None, None, mask)
                pred = outputs.detach().cpu()
                true = batch_x.detach().cpu()
                mask = mask.detach().cpu()
                
                loss = criterion(pred[mask == 0], true[mask == 0])
                total_loss.append(loss.item())
        total_loss = np.average(total_loss)
        self.model.train()
        return total_loss
            
    
    def train(self, setting):
        time_now = time.time()
        train_steps = len(self.train_loader)
        early_stopping = EarlyStopping(patience=10, verbose=True)
        model_optim = self._select_optimizer()
        criterion = self._select_criterion()
        
        path = os.path.join('./result/', setting)
        if not os.path.exists(path):
            os.makedirs(path)
        
        self.loss_dict={}
        self.best_loss=np.inf
        for epoch in range(self.args.train_epochs):
            iter_count = 0
            train_loss = []
            self.model.train()
            
            epoch_time = time.time()
            for i, (batch_x_mask, batch_x, batch_x_mark) in enumerate(self.train_loader):
                iter_count += 1
                model_optim.zero_grad()                
                
                batch_x=batch_x.float().to(device)
                batch_x_mark = batch_x_mark.float().to(device)
                mask=(batch_x_mask.isnan()==False).int().to(device)
                batch_x_mask[batch_x_mask.isnan()]=0.0#---------------用0填充缺失值----
                batch_x_mask=batch_x_mask.to(torch.float32).to(device)
                
                outputs = self.model(batch_x_mask, batch_x_mark, None, None, mask)
                
                loss = criterion(outputs[mask == 0], batch_x[mask == 0])
                train_loss.append(loss.item())
                
                if (i + 1) % 100 == 0:
                    print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
                    speed = (time.time() - time_now) / iter_count
                    left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
                    print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
                    iter_count = 0
                    time_now = time.time()

                loss.backward()
                model_optim.step()
                
            print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
            train_loss = np.average(train_loss)
            
            test_loss = self.vali(self.test_loader, criterion)
            
            self.loss_dict[epoch]=[train_loss,test_loss]
            
            print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f}  Test Loss: {3:.7f}".format(
                epoch + 1, train_steps, train_loss, test_loss))
            early_stopping(test_loss, self.model, path)
            if early_stopping.early_stop:
                print("Early stopping")
                break
            adjust_learning_rate(model_optim, epoch + 1, self.args)
            
            if self.best_loss>test_loss:
                best_model_path = path + '/' + 'checkpoint.pth'
                torch.save(self.model.state_dict(), best_model_path)
                self.best_model_path=best_model_path
                #self.model.load_state_dict(torch.load(best_model_path))
        
    def test(self, setting, test=0):
        if test:
            print('loading model')
            self.model.load_state_dict(torch.load(self.best_model_path))
            
        preds = []
        trues = []
        masks = []
        
        folder_path = './test_results/' + setting + '/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
            
        self.model.eval()
        with torch.no_grad():
            for i, (batch_x_mask, batch_x, batch_x_mark) in enumerate(test_loader):
                batch_x=batch_x.float().to(device)
                batch_x_mark = batch_x_mark.float().to(device)
                mask=(batch_x_mask.isnan()==False).int().to(device)
                batch_x_mask[batch_x_mask.isnan()]=0.0#---------------用0填充缺失值----
                batch_x_mask=batch_x_mask.to(torch.float32).to(device)
                
                outputs = self.model(batch_x_mask, batch_x_mark, None, None, mask)
                
                outputs = outputs.detach().cpu().numpy()
                pred = outputs
                true = batch_x.detach().cpu().numpy()
                preds.append(pred)
                trues.append(true)
                masks.append(mask.detach().cpu().numpy())
                
                if i % 20 == 0:
                    filled = true[0, :, -1].copy()
                    filled = filled * mask[0, :, -1].detach().cpu().numpy() + \
                             pred[0, :, -1] * (1 - mask[0, :, -1].detach().cpu().numpy())
                    visual(true[0, :, -1], filled, os.path.join(folder_path, str(i) + '.pdf'))

        preds = np.concatenate(preds, 0)
        trues = np.concatenate(trues, 0)
        masks = np.concatenate(masks, 0)
        print('test shape:', preds.shape, trues.shape)

        # result save
        folder_path = './results/' + setting + '/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        mae, mse, rmse, mape, mspe = metric(preds[masks == 0], trues[masks == 0])

        print('mse:{}, mae:{}'.format(mse, mae))
        f = open("result_imputation.txt", 'a')
        f.write(setting + "  \n")
        f.write('mse:{}, mae:{}'.format(mse, mae))
        f.write('\n')
        f.write('\n')
        f.close()

        np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
        np.save(folder_path + 'pred.npy', preds)
        np.save(folder_path + 'true.npy', trues)
        return preds,trues,masks


class Dataset_me(Dataset):
    def __init__(self,miss_data,full_data,date,windows_size=96):
        super(Dataset_me,self).__init__()
        self.miss_data=miss_data
        self.full_data=full_data
        self.date=date
        self.windows_size=windows_size
        
    def __len__(self):
        return len(self.miss_data) - self.windows_size + 1
    
    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.windows_size
        
        seq_miss=self.miss_data[s_begin:s_end]#带有mask的数据
        seq_full=self.full_data[s_begin:s_end]#完整的数据
        seq_date=self.date[s_begin:s_end]#时间信息
        return torch.FloatTensor(seq_miss),torch.FloatTensor(seq_full),torch.FloatTensor(seq_date)

In [10]:
windows_size=96
batch_size=128

#-----------------归一化处理-----------------
scolar=StandardScaler()
scolar.fit(train_data)
train_miss_normal=scolar.transform(train_miss)
train_data_normal=scolar.transform(train_data)
test_miss_normal=scolar.transform(test_miss)
test_data_normal=scolar.transform(test_data)

#--------------数据打包--------------------
train_dataset=Dataset_me(train_miss_normal,train_data_normal,train_stamp,windows_size=windows_size)
test_dataset=Dataset_me(test_miss_normal,test_data_normal,test_stamp,windows_size=windows_size)

train_loader=DataLoader(train_dataset,batch_size=batch_size)
test_loader=DataLoader(test_dataset,batch_size=batch_size,shuffle=False)

#---------------------定义参数--------------------
parser = argparse.ArgumentParser(description='Transformer')
parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
parser.add_argument('--d_model', type=int, default=256, help='dimension of model')
parser.add_argument('--embed', type=str, default='timeF',help='time features encoding, options:[timeF, fixed, learned]')

parser.add_argument('--freq', type=str, default='h',
                    help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')
parser.add_argument('--dropout', type=float, default=0.1, help='dropout')

parser.add_argument('--factor', type=int, default=1, help='attn factor')
parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
parser.add_argument('--d_ff', type=int, default=256, help='dimension of fcn')
parser.add_argument('--activation', type=str, default='gelu', help='activation')
parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
parser.add_argument('--c_out', type=int, default=7, help='output size')
parser.add_argument('--train_epochs', type=int, default=20, help='output train_epochs')
parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
parser.add_argument('--learning_rate', type=float, default=0.001, help='learning_rate')
parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
args = parser.parse_args(args=[])

args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False
args.enc_in=train_data_normal.shape[1]#更新参数，输入数据的特征维度数
args.c_out=train_data_normal.shape[1]#更新参数，输入数据的特征维度数

device=torch.device('cuda' if args.use_gpu else 'cpu')

#--------------------定义模型并测试-----------------
setting = 'transformer'
model=Model(args)
exp=Exp_Imputation(args,train_loader,test_loader,model)
exp.train(setting)#训练

#---------反归一化处理----------
start_time=time.time()
preds,trues,masks=exp.test(setting, test=1)#测试验证集
end_time=time.time()
trues=get_debatch_data(trues)
preds=get_debatch_data(preds)
masks=get_debatch_data(masks)

preds_anti=scolar.inverse_transform(preds)
trues_anti=scolar.inverse_transform(trues)

# 计算评估指标
tran_result=get_result(trues_anti[masks == 0], preds_anti[masks == 0])+[end_time-start_time]
result_dict['Transformer']=tran_result
predict_dict['Transformer']={'true':trues_anti[masks == 0],'pred':preds_anti[masks == 0]}

trues_anti_df=pd.DataFrame(trues_anti,index=test_miss.index,columns=test_miss.columns)
preds_anti_df=pd.DataFrame(preds_anti,index=test_miss.index,columns=test_miss.columns)
true_mask_df=trues_anti_df.mask(masks==1)
pred_mask_df=preds_anti_df.mask(masks==1)
mask_pred_dict['Transformer']={'true':true_mask_df,'pred':pred_mask_df}

Epoch: 1 cost time: 3.3104100227355957
Epoch: 1, Steps: 48 | Train Loss: 1.0391156  Test Loss: 1.4832276
Validation loss decreased (inf --> 1.483228).  Saving model ...
Updating learning rate to 0.001
Epoch: 2 cost time: 2.7999377250671387
Epoch: 2, Steps: 48 | Train Loss: 0.6968239  Test Loss: 1.3600604
Validation loss decreased (1.483228 --> 1.360060).  Saving model ...
Updating learning rate to 0.0005
Epoch: 3 cost time: 2.8275134563446045
Epoch: 3, Steps: 48 | Train Loss: 0.6311453  Test Loss: 0.5401893
Validation loss decreased (1.360060 --> 0.540189).  Saving model ...
Updating learning rate to 0.00025
Epoch: 4 cost time: 2.8958332538604736
Epoch: 4, Steps: 48 | Train Loss: 0.5106380  Test Loss: 0.3605473
Validation loss decreased (0.540189 --> 0.360547).  Saving model ...
Updating learning rate to 0.000125
Epoch: 5 cost time: 2.8622918128967285
Epoch: 5, Steps: 48 | Train Loss: 0.4604556  Test Loss: 0.3255971
Validation loss decreased (0.360547 --> 0.325597).  Saving model ...
U

AttributeError: module 'numpy.core.multiarray' has no attribute 'generic'

In [30]:
        if 1:
            print('loading model')
            exp.model.load_state_dict(torch.load(exp.best_model_path))
            
        preds = []
        trues = []
        masks = []
        
        folder_path = './test_results/' + setting + '/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
            
        exp.model.eval()
        with torch.no_grad():
            for i, (batch_x_mask, batch_x, batch_x_mark) in enumerate(test_loader):
                batch_x=batch_x.float().to(device)
                batch_x_mark = batch_x_mark.float().to(device)
                mask=(batch_x_mask.isnan()==False).int().to(device)
                batch_x_mask[batch_x_mask.isnan()]=0.0#---------------用0填充缺失值----
                batch_x_mask=batch_x_mask.to(torch.float32).to(device)
                
                outputs = exp.model(batch_x_mask, batch_x_mark, None, None, mask)
                
                outputs = np.array(outputs.detach().cpu().tolist())
                pred = outputs
                true = np.array(batch_x.detach().cpu().tolist())
                # print(true)
                preds.append(pred)
                trues.append(true)
                mask=np.array(mask.detach().cpu().tolist())
                masks.append(mask)
                
                if i % 20 == 0:
                    filled = true[0, :, -1].copy()
                    filled = filled * mask[0, :, -1] + \
                             pred[0, :, -1] * (1 - mask[0, :, -1])
                    visual(true[0, :, -1], filled, os.path.join(folder_path, str(i) + '.pdf'))

        preds = np.concatenate(preds, 0)
        trues = np.concatenate(trues, 0)
        masks = np.concatenate(masks, 0)
        print('test shape:', preds.shape, trues.shape)

        # result save
        folder_path = './results/' + setting + '/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        mae, mse, rmse, mape, mspe = metric(preds[masks == 0], trues[masks == 0])

        print('mse:{}, mae:{}'.format(mse, mae))
        f = open("result_imputation.txt", 'a')
        f.write(setting + "  \n")
        f.write('mse:{}, mae:{}'.format(mse, mae))
        f.write('\n')
        f.write('\n')
        f.close()

        np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
        np.save(folder_path + 'pred.npy', preds)
        np.save(folder_path + 'true.npy', trues)

loading model


AttributeError: module 'numpy.core.multiarray' has no attribute 'generic'

In [31]:
true

array([[[-0.98985839, -1.04402041, -1.07619405, ...,  1.2530396 ,
          0.89157671,  0.05124401],
        [-1.25333214, -1.33691347, -1.24807525, ...,  0.81658381,
          0.3181479 , -0.27584717],
        [-0.52877933, -0.50573051, -0.49097961, ...,  0.15944654,
         -0.09434149, -0.39960593],
        ...,
        [ 0.16090201,  0.19483794,  0.16789822, ...,  0.68367016,
          0.48492375,  0.00261005],
        [-0.61402082, -0.5809328 , -0.52371889, ...,  0.67175281,
          0.43706506, -0.03945175],
        [-0.24980709, -0.23262756, -0.15540209, ...,  0.53729695,
          0.31347874, -0.10739774]],

       [[-1.25333214, -1.33691347, -1.24807525, ...,  0.81658381,
          0.3181479 , -0.27584717],
        [-0.52877933, -0.50573051, -0.49097961, ...,  0.15944654,
         -0.09434149, -0.39960593],
        [-0.35829628, -0.35136798, -0.28226677, ..., -0.22779757,
         -0.5256533 , -0.67482764],
        ...,
        [-0.61402082, -0.5809328 , -0.52371889, ...,  

In [29]:
mask

tensor([[[1, 1, 0,  ..., 1, 1, 0],
         [1, 0, 1,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 1, 1, 0],
         ...,
         [1, 0, 0,  ..., 1, 1, 1],
         [0, 1, 1,  ..., 0, 0, 1],
         [0, 0, 0,  ..., 0, 1, 0]],

        [[1, 0, 1,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 1, 1, 0],
         [0, 0, 0,  ..., 1, 1, 0],
         ...,
         [0, 1, 1,  ..., 0, 0, 1],
         [0, 0, 0,  ..., 0, 1, 0],
         [1, 1, 0,  ..., 1, 0, 1]],

        [[0, 0, 0,  ..., 1, 1, 0],
         [0, 0, 0,  ..., 1, 1, 0],
         [1, 0, 0,  ..., 1, 1, 1],
         ...,
         [0, 0, 0,  ..., 0, 1, 0],
         [1, 1, 0,  ..., 1, 0, 1],
         [1, 1, 1,  ..., 1, 0, 0]],

        ...,

        [[0, 0, 0,  ..., 0, 1, 0],
         [1, 0, 1,  ..., 1, 1, 0],
         [1, 0, 0,  ..., 0, 1, 1],
         ...,
         [1, 1, 1,  ..., 1, 1, 1],
         [0, 0, 0,  ..., 1, 0, 0],
         [0, 1, 1,  ..., 0, 1, 1]],

        [[1, 0, 1,  ..., 1, 1, 0],
         [1, 0, 0,  ..., 0, 1, 1],
         [1,

# Autoformer

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.Embed import DataEmbedding, DataEmbedding_wo_pos
from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer
from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp
import math
import numpy as np


class AutoformerNet(nn.Module):
    def __init__(self, configs):
        super(AutoformerNet, self).__init__()
        self.seq_len = configs.seq_len
        self.output_attention = configs.output_attention

        # Embedding
        self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, 
                                                  configs.embed, configs.freq,
                                                  configs.dropout)
        # Encoder
        self.encoder = Encoder(
            [
                EncoderLayer(
                    AutoCorrelationLayer(
                        AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
                                        output_attention=configs.output_attention),
                        configs.d_model, configs.n_heads),
                    configs.d_model,
                    configs.d_ff,
                    moving_avg=configs.moving_avg,
                    dropout=configs.dropout,
                    activation=configs.activation
                ) for l in range(configs.e_layers)
            ],
            norm_layer=my_Layernorm(configs.d_model)
        )
        # Decoder
        self.projection = nn.Linear(
                configs.d_model, configs.c_out, bias=True)


    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
        # enc
        enc_out = self.enc_embedding(x_enc, x_mark_enc)
        enc_out, attns = self.encoder(enc_out, attn_mask=None)
        # final
        dec_out = self.projection(enc_out)
        return dec_out

    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
        dec_out = self.imputation(
                x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
        return dec_out  # [B, L, D]

In [None]:
windows_size=96
batch_size=128

#-----------------归一化处理-----------------
scolar=StandardScaler()
scolar.fit(train_data)
train_miss_normal=scolar.transform(train_miss)
train_data_normal=scolar.transform(train_data)
test_miss_normal=scolar.transform(test_miss)
test_data_normal=scolar.transform(test_data)

#--------------数据打包--------------------
train_dataset=Dataset_me(train_miss_normal,train_data_normal,train_stamp,windows_size=windows_size)
test_dataset=Dataset_me(test_miss_normal,test_data_normal,test_stamp,windows_size=windows_size)

train_loader=DataLoader(train_dataset,batch_size=batch_size)
test_loader=DataLoader(test_dataset,batch_size=batch_size,shuffle=False)

#---------------------定义参数--------------------
parser = argparse.ArgumentParser(description='AutoformerNet')
parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
parser.add_argument('--d_model', type=int, default=256, help='dimension of model')
parser.add_argument('--embed', type=str, default='timeF',help='time features encoding, options:[timeF, fixed, learned]')

parser.add_argument('--freq', type=str, default='h',
                    help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')
parser.add_argument('--dropout', type=float, default=0.1, help='dropout')

parser.add_argument('--factor', type=int, default=1, help='attn factor')
parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
parser.add_argument('--d_ff', type=int, default=512, help='dimension of fcn')
parser.add_argument('--activation', type=str, default='gelu', help='activation')
parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
parser.add_argument('--c_out', type=int, default=7, help='output size')
parser.add_argument('--train_epochs', type=int, default=20, help='output train_epochs')
parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
parser.add_argument('--learning_rate', type=float, default=0.001, help='learning_rate')
parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
parser.add_argument('--pred_len', type=int, default=0, help='prediction sequence length')
parser.add_argument('--top_k', type=int, default=5, help='for TimesBlock')
parser.add_argument('--num_kernels', type=int, default=3, help='for Inception')
parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
args = parser.parse_args(args=[])

args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False
args.enc_in=train_data_normal.shape[1]#更新参数，输入数据的特征维度数
args.c_out=train_data_normal.shape[1]#更新参数，输出数据的特征维度数
args.seq_len=windows_size#输入数据的序列长度


device=torch.device('cuda' if args.use_gpu else 'cpu')

#--------------------定义模型并测试-----------------
setting = 'AutoformerNet'
model=AutoformerNet(args)
exp=Exp_Imputation(args,train_loader,test_loader,model)
exp.train(setting)#训练

#---------反归一化处理----------
start_time=time.time()
preds,trues,masks=exp.test(setting, test=1)#测试验证集
end_time=time.time()
trues=get_debatch_data(trues)
preds=get_debatch_data(preds)
masks=get_debatch_data(masks)

preds_anti=scolar.inverse_transform(preds)
trues_anti=scolar.inverse_transform(trues)

# 计算评估指标
AutoformerNet_result=get_result(trues_anti[masks == 0], preds_anti[masks == 0])+[end_time-start_time]
result_dict['AutoformerNet']=AutoformerNet_result
predict_dict['AutoformerNet']={'true':trues_anti[masks == 0],'pred':preds_anti[masks == 0]}

trues_anti_df=pd.DataFrame(trues_anti,index=test_miss.index,columns=test_miss.columns)
preds_anti_df=pd.DataFrame(preds_anti,index=test_miss.index,columns=test_miss.columns)
true_mask_df=trues_anti_df.mask(masks==1)
pred_mask_df=preds_anti_df.mask(masks==1)
mask_pred_dict['AutoformerNet']={'true':true_mask_df,'pred':pred_mask_df}

AutoformerNet_result

In [None]:
#------------结果保存----------------------
result_dict_df=pd.DataFrame(result_dict,index=['mae','mse','mape','r2','time']).T
result_dict_df.to_csv('./所有模型的性能指标.csv',encoding='utf_8_sig')
result_dict_df

In [None]:
del plt
import matplotlib.pyplot as plt

In [None]:
new_pred_true_df={}
for name,values in predict_dict.items():
    new_pred_true_df[name]=values['pred']
new_pred_true_df['True']=values['true']
new_pred_true_df=pd.DataFrame(new_pred_true_df)
new_pred_true_df.to_csv('./所有模型的预测.csv',encoding='utf_8_sig')

import matplotlib
matplotlib.use('TkAgg')

import matplotlib.pyplot as plt
new_pred_true_df[:100].plot(figsize=(16,8))
plt.xlabel('data id')
plt.ylabel('value')
plt.savefig('./所有模型的部分预测结果展示.png',dpi=400)
plt.show()


#----------------展示某一列的预测效果--------------
columns_id=0#想要展示的第N列
columnd_data={}
for name,value in mask_pred_dict.items():
    true=value['true']
    pred=value['pred']
    columnd_data[name]=pred[test_miss.columns[columns_id]]
columnd_data['True']=true[test_miss.columns[columns_id]]
columnd_data['Raw']=test_data[test_miss.columns[columns_id]]
columnd_data_df=pd.DataFrame(columnd_data)[:100]#取前100行进行可视化
plt.figure(figsize=(16,6))
plt.plot(columnd_data_df['Raw'],label='Raw')
plt.plot(columnd_data_df['True'],'^',label='Mask')
for column in columnd_data_df.columns:
    if column in ['Raw','True']:
        continue
    plt.plot(columnd_data_df[column],'*',label=column)
plt.legend(loc='best')
plt.xlabel('Time')
plt.ylabel('Values')
plt.savefig('./所有模型的某一列预测结果展示.png',dpi=400)
plt.show()