In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/2022-ai-w6p2/X_test.csv
/kaggle/input/2022-ai-w6p2/train.csv
/kaggle/input/2022-ai-w6p2/sample_submit.csv


In [2]:
import random
import torch
import torch.optim as optim
import torch.nn as nn

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

# GPU

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


# Dataset

In [4]:
train = pd.read_csv('../input/2022-ai-w6p2/train.csv')
train

Unnamed: 0,Day,Hour,Minute,DHI,DNI,WS,RH,T,TARGET
0,0,0,0,0,0,1.5,69.08,-12,0.000000
1,0,0,30,0,0,1.5,69.06,-12,0.000000
2,0,1,0,0,0,1.6,71.78,-12,0.000000
3,0,1,30,0,0,1.6,71.75,-12,0.000000
4,0,2,0,0,0,1.6,75.20,-12,0.000000
...,...,...,...,...,...,...,...,...,...
9994,208,5,0,0,0,0.5,64.12,12,0.000000
9995,208,5,30,26,383,0.8,56.18,13,5.630068
9996,208,6,0,41,578,1.1,47.46,15,13.887196
9997,208,6,30,52,699,1.4,44.51,17,23.269925


In [5]:
y_train = train.iloc[:,-1]
y_train

0        0.000000
1        0.000000
2        0.000000
3        0.000000
4        0.000000
          ...    
9994     0.000000
9995     5.630068
9996    13.887196
9997    23.269925
9998    33.027555
Name: TARGET, Length: 9999, dtype: float64

In [6]:
X_train = train.drop(['TARGET'],axis=1)
X_train

Unnamed: 0,Day,Hour,Minute,DHI,DNI,WS,RH,T
0,0,0,0,0,0,1.5,69.08,-12
1,0,0,30,0,0,1.5,69.06,-12
2,0,1,0,0,0,1.6,71.78,-12
3,0,1,30,0,0,1.6,71.75,-12
4,0,2,0,0,0,1.6,75.20,-12
...,...,...,...,...,...,...,...,...
9994,208,5,0,0,0,0.5,64.12,12
9995,208,5,30,26,383,0.8,56.18,13
9996,208,6,0,41,578,1.1,47.46,15
9997,208,6,30,52,699,1.4,44.51,17


In [7]:
X_test = pd.read_csv('../input/2022-ai-w6p2/X_test.csv')
X_test = X_test.drop(['ID'],axis=1)
X_test

Unnamed: 0,Day,Hour,Minute,DHI,DNI,WS,RH,T
0,1043,7,30,34,584,2.0,63.02,3
1,1043,8,0,45,730,2.3,61.54,5
2,1043,8,30,52,817,2.4,57.34,6
3,1043,9,0,57,878,2.6,52.46,8
4,1043,9,30,61,917,2.2,45.74,10
...,...,...,...,...,...,...,...,...
2476,1094,21,30,0,0,2.4,70.70,-4
2477,1094,22,0,0,0,2.4,66.79,-4
2478,1094,22,30,0,0,2.2,66.78,-4
2479,1094,23,0,0,0,2.1,67.72,-4


## StandardScaler

In [8]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_train

array([[-1.72373894, -1.65890442, -0.99989999, ..., -0.64111658,
         0.22429055, -2.11395557],
       [-1.72373894, -1.65890442,  1.00010002, ..., -0.64111658,
         0.22338068, -2.11395557],
       [-1.72373894, -1.51449677, -0.99989999, ..., -0.56156771,
         0.34712225, -2.11395557],
       ...,
       [ 1.73515443, -0.79245849, -0.99989999, ..., -0.95931203,
        -0.75927291,  0.70334046],
       [ 1.73515443, -0.79245849,  1.00010002, ..., -0.72066544,
        -0.89347792,  0.91202906],
       [ 1.73515443, -0.64805084, -0.99989999, ..., -0.48201885,
        -1.19873744,  1.12071765]])

In [9]:
X_test = scaler.transform(X_test)
X_test

array([[ 1.56206158e+01, -6.48050836e-01,  1.00010002e+00, ...,
        -2.43372264e-01, -5.13983779e-02, -5.48791105e-01],
       [ 1.56206158e+01, -5.03643181e-01, -9.99899995e-01, ...,
        -4.72567502e-03, -1.18728347e-01, -3.40102510e-01],
       [ 1.56206158e+01, -5.03643181e-01,  1.00010002e+00, ...,
         7.48231878e-02, -3.09799880e-01, -2.35758212e-01],
       ...,
       [ 1.64687098e+01,  1.51806399e+00,  1.00010002e+00, ...,
        -8.42745379e-02,  1.19656137e-01, -1.27920119e+00],
       [ 1.64687098e+01,  1.66247165e+00, -9.99899995e-01, ...,
        -1.63823401e-01,  1.62419766e-01, -1.27920119e+00],
       [ 1.64687098e+01,  1.66247165e+00,  1.00010002e+00, ...,
        -1.63823401e-01,  1.61509902e-01, -1.27920119e+00]])

## Tensor & GPU

In [10]:
X_train = torch.FloatTensor(X_train).to(device)
X_train

tensor([[-1.7237, -1.6589, -0.9999,  ..., -0.6411,  0.2243, -2.1140],
        [-1.7237, -1.6589,  1.0001,  ..., -0.6411,  0.2234, -2.1140],
        [-1.7237, -1.5145, -0.9999,  ..., -0.5616,  0.3471, -2.1140],
        ...,
        [ 1.7352, -0.7925, -0.9999,  ..., -0.9593, -0.7593,  0.7033],
        [ 1.7352, -0.7925,  1.0001,  ..., -0.7207, -0.8935,  0.9120],
        [ 1.7352, -0.6481, -0.9999,  ..., -0.4820, -1.1987,  1.1207]],
       device='cuda:0')

In [11]:
y_train = torch.FloatTensor(y_train).to(device)
y_train

tensor([ 0.0000,  0.0000,  0.0000,  ..., 13.8872, 23.2699, 33.0276],
       device='cuda:0')

In [12]:
X_test = torch.FloatTensor(X_test).to(device)
X_test

tensor([[ 1.5621e+01, -6.4805e-01,  1.0001e+00,  ..., -2.4337e-01,
         -5.1398e-02, -5.4879e-01],
        [ 1.5621e+01, -5.0364e-01, -9.9990e-01,  ..., -4.7257e-03,
         -1.1873e-01, -3.4010e-01],
        [ 1.5621e+01, -5.0364e-01,  1.0001e+00,  ...,  7.4823e-02,
         -3.0980e-01, -2.3576e-01],
        ...,
        [ 1.6469e+01,  1.5181e+00,  1.0001e+00,  ..., -8.4275e-02,
          1.1966e-01, -1.2792e+00],
        [ 1.6469e+01,  1.6625e+00, -9.9990e-01,  ..., -1.6382e-01,
          1.6242e-01, -1.2792e+00],
        [ 1.6469e+01,  1.6625e+00,  1.0001e+00,  ..., -1.6382e-01,
          1.6151e-01, -1.2792e+00]], device='cuda:0')

## Shape 확인

In [13]:
print(np.shape(X_train))
print(np.shape(y_train))
print(np.shape(X_test))

torch.Size([9999, 8])
torch.Size([9999])
torch.Size([2481, 8])


# Model Train

In [14]:
linear1 = nn.Linear(8,64,bias=True)
linear2 = nn.Linear(64,64,bias=True)
linear3 = nn.Linear(64,1,bias=True)
sigmoid = nn.ReLU()

In [15]:
model = nn.Sequential(linear1,sigmoid,linear2,sigmoid,linear3,sigmoid).to(device)
model

Sequential(
  (0): Linear(in_features=8, out_features=64, bias=True)
  (1): ReLU()
  (2): Linear(in_features=64, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=1, bias=True)
  (5): ReLU()
)

In [16]:
loss = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr = 1e-2)

In [17]:
for stop in range(10000):
    optimizer.zero_grad()
    
    hypothesis = model(X_train)
    cost = loss(hypothesis,y_train.unsqueeze(1))

    cost.backward()
    optimizer.step()
    
    if stop % 1000 == 0:
        print(stop, cost.item())

0 1022.19921875
1000 0.31719306111335754
2000 0.1696276217699051
3000 0.11988723278045654
4000 0.09617777913808823
5000 0.08305584639310837
6000 0.07502429187297821
7000 0.06726711988449097
8000 0.09244856238365173
9000 0.059859927743673325


In [18]:
hypothesis

tensor([[ 0.0000],
        [ 0.0000],
        [ 0.0000],
        ...,
        [14.3895],
        [24.2998],
        [34.4765]], device='cuda:0', grad_fn=<ReluBackward0>)

# Model Test

In [19]:
with torch.no_grad():
    predicted = model(X_test)
    print(predicted)

tensor([[ 0.8167],
        [ 7.7958],
        [12.0954],
        ...,
        [ 7.9950],
        [ 8.6693],
        [ 9.7318]], device='cuda:0')


In [20]:
predicted = np.array(predicted.cpu())
predicted

array([[ 0.8166812],
       [ 7.795754 ],
       [12.095368 ],
       ...,
       [ 7.994978 ],
       [ 8.669312 ],
       [ 9.731771 ]], dtype=float32)

# Submit

In [21]:
submit = pd.read_csv('../input/2022-ai-w6p2/sample_submit.csv')
submit

Unnamed: 0,ID,TARGET
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0
...,...,...
2476,2476,0
2477,2477,0
2478,2478,0
2479,2479,0


In [22]:
submit['TARGET'] = predicted
submit

Unnamed: 0,ID,TARGET
0,0,0.816681
1,1,7.795754
2,2,12.095368
3,3,17.313595
4,4,21.066521
...,...,...
2476,2476,5.902294
2477,2477,6.614272
2478,2478,7.994978
2479,2479,8.669312


In [23]:
submit.to_csv('submit.csv',index=False)
submit

Unnamed: 0,ID,TARGET
0,0,0.816681
1,1,7.795754
2,2,12.095368
3,3,17.313595
4,4,21.066521
...,...,...
2476,2476,5.902294
2477,2477,6.614272
2478,2478,7.994978
2479,2479,8.669312
