In [1]:
import warnings

warnings.filterwarnings("ignore")

import os
from torch.utils import data
from dataset import*
from config import settings
from torch.utils.tensorboard import SummaryWriter
from model import*
from train_test import *


## load training data

In [None]:
df = load_data('train', total_sample=None, random_sample=settings.totalN)

In [None]:
print(df.head())

In [None]:
print(df.shape)

## Training and validation data loader

In [None]:
BATCH_SIZE = 64
train_dataset = DataFolder(split='train', df=df)
train_dataloader = data.DataLoader(dataset=train_dataset,
                                  batch_size=BATCH_SIZE,
                                  shuffle=True,
                                  drop_last=False,
                                  num_workers=4)

In [None]:
valid_dataset = DataFolder(split='valid', df=df)
valid_dataloader = data.DataLoader(dataset=valid_dataset,
                                  batch_size=BATCH_SIZE,
                                  shuffle=False,
                                  drop_last=False,
                                  num_workers=0)

## Mode select val or train

In [None]:
Mode_train = True

## Device

In [None]:
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

## Model select

In [None]:
model_name = 'FCNN32 with BN'
save_model_name = './model/'+ model_name +'.pth'
loss_filename = './loss curve/' + model_name + '.txt'

In [None]:
input_dim = np.size(train_dataset.features, 1)
print(f'feature dimension = {input_dim}')
if Mode_train:
    model = FCNN(input_dim=input_dim).to(device) 
else:
    model = FCNN(input_dim=input_dim)
    model.load_state_dict(torch.load(save_model_name))
    model = model.to(device)
print(model)

## Loss function and optimizer

In [None]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
# optimizer2 = torch.optim.SGD(model.parameters(), lr=1e-2, weight_decay=0, momentum=0.9)

## Training epoch and stop condition

In [None]:
trained = False

In [None]:
writer = SummaryWriter('./logs/' + model_name)

In [None]:
if Mode_train:
    epochs = 50
    if not trained:
        loss_record = [1e6, 1e6, 1e6, 1e6, 1e6]
    for epoch in range(epochs):
        train_loss = train(train_dataloader, model, loss_fn, optimizer, writer=writer, record_batches=200)
        val_loss = val(valid_dataloader, model, loss_fn)
        writer.add_scalar("loss/training", train_loss, epoch+1)
        writer.add_scalar("loss/validation", val_loss, epoch+1)
        print(f"Epoch {epoch + 1:2d}: Loss = {train_loss:.4f}") 
        print(f'valid loss = {val_loss:.4f}')
        loss_record.append(train_loss)
        # if train_loss > sum(loss_record[-5:])/5*1.05 or train_loss < 0.1:
        #     print('Early stop!')
        #     break

    if not trained:        
        del loss_record[0:5]
    trained = True    
    print("Done!")

## Plot training loss curve

In [None]:
import matplotlib.pyplot as plt

def plot_loss_curve(loss_list):
    plt.plot(loss_list)
    plt.title('Loss Curve')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.show()

In [None]:
if Mode_train:
    plot_loss_curve(loss_record)
else:
    print('Auto load loss curve')
    with open(loss_filename) as fh:
        s = fh.readline()
        L = s[1:-1].split(', ')
        loss_record = [float(x) for x in L]        
    plot_loss_curve(loss_record)

## Evaluate

In [None]:
#final_train_loss = val(train_dataloader, model, loss_fn)
final_val_loss = val(valid_dataloader, model, loss_fn)
print(f'final valid loss = {final_val_loss:.4f}')

## Save model

In [None]:
if Mode_train:
    torch.save(model.state_dict(), save_model_name)
    with open(loss_filename, 'w') as fh:
        fh.writelines(str(loss_record))   

## Output test result

In [None]:
test_dataset = DataFolder(split='test')
test_dataloader = data.DataLoader(dataset=test_dataset,
                                  batch_size=BATCH_SIZE,
                                  shuffle=False,
                                  drop_last=False,
                                  num_workers=0)

In [None]:
df_test = pd.DataFrame(test_dataset.key_list, columns=["key"])

predictions = test(test_dataloader, model)

df_test["fare_amount"] = predictions

# 將dataframe保存為CSV文件
df_test.to_csv("predictions.csv", index=False)