In [10]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torchvision 
import pandas as pd

In [11]:
train_data=pd.read_csv(r'F:\study\ml\HonyiLee2021\Lhy_Machine_Learning-main\01 Introduction\dataset\covid.train.csv',)
test_data=pd.read_csv(r'F:\study\ml\HonyiLee2021\Lhy_Machine_Learning-main\01 Introduction\dataset\covid.test.csv')

In [12]:
train_data.shape

(2700, 95)

In [13]:
test_data.shape

(893, 94)

In [14]:
all_features=pd.concat((train_data.iloc[:,1:-1],test_data.iloc[:,1:]))
all_features.shape

(3593, 93)

In [16]:
all_features.isnull().sum().sum()

0

In [17]:
all_features.iloc[:,40:]=all_features.iloc[:,40:].apply(lambda x: (x-x.mean()) / x.std())

In [18]:
all_features.shape

(3593, 93)

In [19]:
n_train=train_data.shape[0]
train_features=torch.Tensor(all_features.iloc[:n_train,:].values)
test_features=torch.Tensor(all_features.iloc[n_train,:].values)
train_labels=torch.Tensor(train_data.tested_positive.values).view(-1,1)

In [20]:
loss=nn.MSELoss()

In [27]:
def get_net(num_features):
    net=nn.Linear(num_features,1)
    for p in net.parameters():
        nn.init.normal_(p,mean=0,std=0.01)
    return net

In [28]:
def rmse(net,features,labels):
    net.eval()
    with torch.no_grad():
        rmse=torch.sqrt(2*loss(net(features),labels).mean()).item()
    net.train()
    return rmse

In [39]:
def train(net,train_features,train_labels,test_features,test_labels,
         num_epochs,learning_rate,weight_decay,batch_size):
    train_ls,test_ls=[],[]
    dataset=torch.utils.data.TensorDataset(train_features,train_labels)
    train_iter=torch.utils.data.DataLoader(dataset,batch_size,shuffle=True)
    optimizer=torch.optim.Adam(net.parameters(),lr=learning_rate,weight_decay=weight_decay)
    min_rmse=1000.
    for e in range(num_epochs):
        for x,y in train_iter:
            l=loss(net(x),y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()

        
        train_ls.append(rmse(net,train_features,train_labels))
        if test_labels is not None:
            test_ls.append(rmse(net,test_features,test_labels))
            dev_rmse=test_ls[-1]    
            if dev_rmse<min_rmse:
                min_rmse=dev_rmse
                print('save model (epoch %d) train rmse %f, test rmse %f ' %(
                e+1,train_ls[-1],min_rmse))
                torch.save(net.state_dict(),r'F:\study\ml\HonyiLee2021\Lhy_Machine_Learning-main\01 Introduction\models\model.pht')
                
        
    return train_ls,test_ls

In [40]:
def get_k_fold_data(k,i,x,y):
    assert k>1
    fold_size=x.shape[0]//k
    x_train,y_train=None,None
    index_list=list(range(x.shape[0]))
    np.random.shuffle(index_list)
    for j in range(k):
        idx=slice(j*fold_size,(j+1)*fold_size)
        x_part,y_part=x[index_list[idx],:],y[index_list[idx]]
        
        if j==i:
            x_valid,y_valid=x_part,y_part
        elif x_train is None:
            x_train,y_train=x_part,y_part
        else:
            x_train=torch.cat((x_train,x_part))
            y_train=torch.cat((y_train,y_part))
    
    return x_train,y_train,x_valid,y_valid

In [41]:
def k_fold(k,x_train,y_train,num_epochs,learning_rate,weight_decay,batch_size):
    train_l_sum,valid_l_sum=0,0
    for i in range(k):
        data=get_k_fold_data(k,i,x_train,y_train)
        net=get_net(x_train.shape[1])
        train_ls,valid_ls=train(net,*data,num_epochs,learning_rate,
                           weight_decay,batch_size)
        train_l_sum+=train_ls[-1]
        valid_l_sum+=valid_ls[-1]
        
        print('fold %d,train rmse %f,valid %f' % (
            i,train_ls[-1],valid_ls[-1]))
    return train_l_sum/k,valid_l_sum/k
        

In [43]:
k, num_epochs, lr, weight_decay, batch_size = 5, 100, 0.05, 0, 64
train_l, valid_l = k_fold(k, train_features, train_labels, num_epochs, lr,
                          weight_decay, batch_size)
print('\n %d-fold validation: avg train rmse %f, avg valid rmse %f' %
(k, train_l, valid_l))

save model (epoch 1) train rmse 19.131882, test rmse 19.218702 
save model (epoch 2) train rmse 15.250222, test rmse 15.356034 
save model (epoch 3) train rmse 11.813848, test rmse 11.929383 
save model (epoch 4) train rmse 9.114777, test rmse 9.146272 
save model (epoch 5) train rmse 6.615634, test rmse 6.719354 
save model (epoch 6) train rmse 4.711744, test rmse 4.809836 
save model (epoch 7) train rmse 3.283542, test rmse 3.332742 
save model (epoch 8) train rmse 2.280216, test rmse 2.347567 
save model (epoch 9) train rmse 1.566052, test rmse 1.599944 
save model (epoch 10) train rmse 1.124855, test rmse 1.164639 
save model (epoch 11) train rmse 0.880633, test rmse 0.906301 
save model (epoch 12) train rmse 0.761269, test rmse 0.775518 
save model (epoch 13) train rmse 0.699326, test rmse 0.709686 
save model (epoch 14) train rmse 0.669431, test rmse 0.673521 
save model (epoch 15) train rmse 0.649473, test rmse 0.654517 
save model (epoch 16) train rmse 0.635771, test rmse 0.635

save model (epoch 50) train rmse 0.250259, test rmse 0.277415 
save model (epoch 51) train rmse 0.236703, test rmse 0.263002 
save model (epoch 52) train rmse 0.236130, test rmse 0.260262 
save model (epoch 53) train rmse 0.229869, test rmse 0.251389 
save model (epoch 54) train rmse 0.211965, test rmse 0.235701 
save model (epoch 55) train rmse 0.203458, test rmse 0.226054 
save model (epoch 56) train rmse 0.197531, test rmse 0.218132 
save model (epoch 57) train rmse 0.195088, test rmse 0.217495 
save model (epoch 58) train rmse 0.188109, test rmse 0.207406 
save model (epoch 59) train rmse 0.172499, test rmse 0.192508 
save model (epoch 60) train rmse 0.171820, test rmse 0.191502 
save model (epoch 61) train rmse 0.161288, test rmse 0.178928 
save model (epoch 62) train rmse 0.157645, test rmse 0.175462 
save model (epoch 63) train rmse 0.155158, test rmse 0.171611 
save model (epoch 64) train rmse 0.143977, test rmse 0.159591 
save model (epoch 65) train rmse 0.134800, test rmse 0.

save model (epoch 99) train rmse 0.025559, test rmse 0.028114 
save model (epoch 100) train rmse 0.024628, test rmse 0.027343 
fold 2,train rmse 0.024628,valid 0.027343
save model (epoch 1) train rmse 19.095234, test rmse 19.192610 
save model (epoch 2) train rmse 15.194485, test rmse 15.303797 
save model (epoch 3) train rmse 11.858932, test rmse 11.967269 
save model (epoch 4) train rmse 8.917784, test rmse 9.018898 
save model (epoch 5) train rmse 6.573620, test rmse 6.676973 
save model (epoch 6) train rmse 4.717829, test rmse 4.802364 
save model (epoch 7) train rmse 3.275779, test rmse 3.357151 
save model (epoch 8) train rmse 2.250414, test rmse 2.332033 
save model (epoch 9) train rmse 1.569803, test rmse 1.649834 
save model (epoch 10) train rmse 1.119719, test rmse 1.180998 
save model (epoch 11) train rmse 0.867639, test rmse 0.928741 
save model (epoch 12) train rmse 0.758910, test rmse 0.794339 
save model (epoch 13) train rmse 0.677517, test rmse 0.715866 
save model (epo

save model (epoch 46) train rmse 0.319647, test rmse 0.323650 
save model (epoch 47) train rmse 0.308325, test rmse 0.307956 
save model (epoch 49) train rmse 0.295806, test rmse 0.297502 
save model (epoch 50) train rmse 0.286238, test rmse 0.284464 
save model (epoch 51) train rmse 0.273456, test rmse 0.277542 
save model (epoch 52) train rmse 0.267523, test rmse 0.267478 
save model (epoch 53) train rmse 0.257387, test rmse 0.257619 
save model (epoch 54) train rmse 0.251622, test rmse 0.253017 
save model (epoch 55) train rmse 0.246947, test rmse 0.248737 
save model (epoch 56) train rmse 0.238553, test rmse 0.238436 
save model (epoch 57) train rmse 0.221081, test rmse 0.222191 
save model (epoch 58) train rmse 0.220903, test rmse 0.221877 
save model (epoch 59) train rmse 0.210121, test rmse 0.214887 
save model (epoch 60) train rmse 0.201314, test rmse 0.204541 
save model (epoch 61) train rmse 0.191621, test rmse 0.189174 
save model (epoch 63) train rmse 0.180978, test rmse 0.