In [1]:
import pandas as pd
import numpy as np
import torch

In [2]:
# set seed
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.manual_seed(1)
if device == "cuda":
    torch.cuda.manual_seed(1)

In [3]:
# load data
train_df = pd.read_csv("./data/train_data.csv")
test_df = pd.read_csv("./data/test_data.csv")
submission_df = pd.read_csv("./data/submission.csv")

In [4]:
# preprocessing
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
area_name = np.array(pd.concat([train_df["area_name"], test_df["area_name"]], axis=0))
le.fit(area_name)
train_df["area_name"] = le.transform(train_df["area_name"])
test_df["area_name"] = le.transform(test_df["area_name"])

In [5]:
# set data in torch
train_x = np.array(train_df.drop(["gas_usage", "year"], axis=1))
test_x = np.array(test_df.drop(["year"], axis=1))
train_y = np.array(train_df["gas_usage"])

train_x = torch.Tensor(train_x).to(device)
test_x = torch.Tensor(test_x).to(device)
train_y = torch.Tensor(train_y).to(device)

print("data shape :", train_x.shape, train_y.shape)

data shape : torch.Size([1314, 2]) torch.Size([1314])


In [6]:
# define and init layer (Use NN)
layer1 = torch.nn.Linear(2, 16).to(device)
layer2 = torch.nn.Linear(16, 1).to(device)
relu = torch.nn.ReLU()

torch.nn.init.xavier_normal_(layer1.weight)
torch.nn.init.xavier_normal_(layer2.weight)

# define model
model = torch.nn.Sequential(layer1, relu, layer2).to(device)

In [7]:
# set learning param
epochs = 2000
lr = 1e-4
loss = torch.nn.MSELoss()
optim = torch.optim.Adam(model.parameters(), lr=lr)

# learning
for epoch in range(epochs + 1):
    output = model(train_x)
    cost = loss(output, train_y.unsqueeze(1))

    optim.zero_grad()
    cost.backward()
    optim.step()
    if epoch % (epochs / 10) == 0:
        print(epoch, cost.item())

0 358.75384521484375
100 199.37811279296875
200 121.95584106445312
300 91.63488006591797
400 82.23683166503906
500 79.76009368896484
600 78.880615234375
700 77.50785827636719
800 76.72615051269531
900 76.03026580810547
1000 75.36429595947266


In [8]:
# submission
with torch.no_grad():
    predict = model(test_x)
    predict = predict.cpu().detach()
    submission_df['gas_usage']= predict
print(submission_df)
submission_df.to_csv("submission.csv",index=False)

tensor([[ 8.7245],
        [12.4296],
        [13.4512],
        [11.8854],
        [13.6616],
        [ 8.8028],
        [ 8.8991],
        [12.7524],
        [12.6161],
        [11.9095],
        [11.5753],
        [12.2359],
        [11.8757],
        [ 4.3376],
        [11.2203],
        [ 8.1669],
        [ 8.8412],
        [ 9.0851],
        [ 4.3920],
        [ 8.7004],
        [11.4731],
        [10.7619],
        [ 4.8279],
        [ 3.0848],
        [11.6978],
        [10.9154],
        [ 5.7104],
        [ 9.5915],
        [ 2.7892],
        [11.3978],
        [ 3.8849],
        [12.2400],
        [ 7.3951],
        [ 7.5878],
        [ 7.1061],
        [12.2270],
        [ 3.7601],
        [ 5.1836],
        [ 6.0265],
        [10.9846],
        [13.3027],
        [ 5.9814],
        [13.0829],
        [ 9.1092],
        [10.6956],
        [12.8390],
        [11.5867],
        [ 3.4566],
        [ 8.8653],
        [ 9.5674],
        [12.1406],
        [13.5016],
        [ 5.