### Data processing

In [37]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd

def encode_categorical(df):
    label_encoder = LabelEncoder()
    for column in df.columns:
        if df[column].dtype == 'object':
            df[column] = label_encoder.fit_transform(df[column])
        elif df[column].dtype == 'float64' or df[column].dtype == 'int64':
            df[column] = df[column].fillna(df[column].mean())
    return df

df1 = pd.read_csv('train.csv')
df1 = encode_categorical(df1)


### Building dataset

In [38]:
from sklearn.model_selection import train_test_split
import torch.utils.data as Data
import torch


batch_sizes = 32

x = df1.iloc[:,1:-1]
y= df1.iloc[:,-1:]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2004)

x_train_tensor = torch.FloatTensor(x_train.values)
y_train_tensor = torch.FloatTensor(y_train.values)
x_test_tensor = torch.FloatTensor(x_test.values)
y_test_tensor = torch.FloatTensor(y_test.values)

train_dataset = Data.TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = Data.TensorDataset(x_test_tensor, y_test_tensor)

train_dataloader = Data.DataLoader(train_dataset,batch_sizes,shuffle=True)
test_dataloader = Data.DataLoader(test_dataset,batch_sizes,shuffle=True)
len(train_dataset)

1168

### Model building

In [39]:
import torch.nn as nn

class LinearNet(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.linear_1 = nn.Linear(in_features, 40)  # nn.Module
        self.linear_2 = nn.Linear(40, 10)
        self.linear_3 = nn.Linear(10, out_features)
        self.sigmoid = nn.Sigmoid()  
        
    def forward(self, x):
        x = self.linear_1(x)
#         x = self.sigmoid(x)  
        x = self.linear_2(x)
#         x = self.sigmoid(x)
        x = self.linear_3(x)
        return x


### Parameter determination


In [40]:
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LinearNet(79,1).to(device)
loss = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=1e-3)


### Model training

In [41]:

def train_model(model,dataloader):
    model.train()
    
    total_loss = 0
    for idx,(x,y) in enumerate(dataloader):
        x = x.to(device)
        y = y.to(device)
        y_pred = model(x)
        cur_loss = loss(y_pred,y)
        optimizer.zero_grad()
        cur_loss.backward()
        optimizer.step()
        
        total_loss += cur_loss.item()
    print(f"train loss:{total_loss/len(train_dataset)}")
    

# Model testing

In [42]:

def test_model(model, dataloader):
    model.eval()

    total_loss = 0.
    for idx, (x, y) in enumerate(dataloader):
        x = x.to(device)
        y = y.to(device)
        y_pred = model(x)
        cur_loss = loss(y_pred, y)
        total_loss += cur_loss.item()
    print(f"Test loss: {total_loss/len(test_dataset)}")

In [43]:
num_epoch = 50
for i in range(num_epoch):
    print(f"==== Epoch {i} ====")
    train_model(model, train_dataloader)
    test_model(model, test_dataloader)

==== Epoch 0 ====
train loss:1186687838.6849315
Test loss: 1331636728.9863014
==== Epoch 1 ====
train loss:1090496941.589041
Test loss: 1162788976.2191782
==== Epoch 2 ====
train loss:815934922.520548
Test loss: 730908437.0410959
==== Epoch 3 ====
train loss:408323300.82191783
Test loss: 302773237.4794521
==== Epoch 4 ====
train loss:223581846.3561644
Test loss: 245777296.65753424
==== Epoch 5 ====
train loss:206090857.42465752
Test loss: 262005866.95890412
==== Epoch 6 ====
train loss:187409255.67123288
Test loss: 227957047.23287672
==== Epoch 7 ====
train loss:174027556.2739726
Test loss: 194766521.31506848
==== Epoch 8 ====
train loss:166786495.34246576
Test loss: 192137730.630137
==== Epoch 9 ====
train loss:152256171.06849316
Test loss: 190870518.79452056
==== Epoch 10 ====
train loss:144467848.10958904
Test loss: 176281371.61643836
==== Epoch 11 ====
train loss:137272158.41095892
Test loss: 172699956.60273972
==== Epoch 12 ====
train loss:128359959.34246576
Test loss: 161801737.2