# 평균판매량

In [1]:
import pandas as pd
import numpy as np
import torch.optim as optim
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
import warnings
import torch.nn as nn
warnings.filterwarnings('ignore')

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.float32)

In [3]:
def create_sequences(data, seq_length):
  xs = [] 
  ys = []
  for i in range(len(data.T)-seq_length-10):
    x = data[:, i:(i+seq_length)]
    y = data[:, (i+seq_length):(i+seq_length+10)]
    xs.append(x)
    ys.append(y)
  return np.array(xs), np.array(ys)

In [4]:
import torch.nn as nn
import torch

class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):

        x=x.permute(0, 2, 1) 
        # Initialize hidden state with zeros
        h0 = torch.zeros(1, x.shape[0], self.hidden_dim)
        
        # Initialize cell state with zeros
        c0 = torch.zeros(1, x.shape[0], self.hidden_dim)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0)) 
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])

        return out

In [5]:
data = pd.read_csv('./data.csv')
data = pd.DataFrame(data.iloc[0])

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data).reshape(1, -1)

X, y =create_sequences(scaled_data, seq_length=30)

# 스케일링된 데이터로 데이터셋 재구성
train_size = int(0.64 * len(X))
val_size = int(0.16 * len(X))
test_size = int(0.20 * len(X))

X_train, X_val, X_test = X[:train_size], X[train_size:(train_size+val_size)], X[(train_size+val_size):]
y_train, y_val, y_test = y[:train_size], y[train_size:(train_size+val_size)], y[(train_size+val_size):]

train_dataset = TimeSeriesDataset(X_train,y_train)
train_loader = DataLoader(train_dataset, batch_size=32)
val_dataset = TimeSeriesDataset(X_val,y_val)
val_loader = DataLoader(val_dataset, batch_size=32)
test_dataset = TimeSeriesDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset))

# 모델 정의
model = LSTM(input_dim=1, hidden_dim=32, output_dim=10)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [6]:
epochs = 280  # 에포크 수

for i, epoch in enumerate(range(epochs)):
    
    model.train()
    train_losses = []
    for seq, labels in (train_loader):

        y_pred = model(seq)
        loss = criterion(y_pred, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

        model.eval()
        with torch.no_grad():
            val_losses = []
            for seq, labels in test_loader:
                y_pred  = model(seq)
                val_loss = criterion(y_pred, labels)
                val_losses.append(val_loss.item()) 
    if i%10==0:
        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {np.mean(train_losses):.4f}, Validation Loss: {np.mean(val_losses):.4f}')


Epoch 1/280, Train Loss: 0.1433, Validation Loss: 0.0622
Epoch 11/280, Train Loss: 0.0126, Validation Loss: 0.0430
Epoch 21/280, Train Loss: 0.0126, Validation Loss: 0.0426
Epoch 31/280, Train Loss: 0.0126, Validation Loss: 0.0421
Epoch 41/280, Train Loss: 0.0125, Validation Loss: 0.0415
Epoch 51/280, Train Loss: 0.0125, Validation Loss: 0.0411
Epoch 61/280, Train Loss: 0.0125, Validation Loss: 0.0406
Epoch 71/280, Train Loss: 0.0125, Validation Loss: 0.0403
Epoch 81/280, Train Loss: 0.0125, Validation Loss: 0.0399
Epoch 91/280, Train Loss: 0.0124, Validation Loss: 0.0396
Epoch 101/280, Train Loss: 0.0124, Validation Loss: 0.0393
Epoch 111/280, Train Loss: 0.0124, Validation Loss: 0.0391
Epoch 121/280, Train Loss: 0.0124, Validation Loss: 0.0388
Epoch 131/280, Train Loss: 0.0124, Validation Loss: 0.0386
Epoch 141/280, Train Loss: 0.0124, Validation Loss: 0.0385
Epoch 151/280, Train Loss: 0.0124, Validation Loss: 0.0383
Epoch 161/280, Train Loss: 0.0124, Validation Loss: 0.0381
Epoch 17

In [7]:
model.eval()
with torch.no_grad():
  for seq, labels in test_loader:
     y_pred  = model(seq)
     val_loss = criterion(y_pred, labels)
  print(val_loss)

tensor(0.0371)


# 평균판매금액

In [8]:
import pandas as pd
import numpy as np
import torch.optim as optim
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
import warnings
import torch.nn as nn
warnings.filterwarnings('ignore')

In [9]:
import torch
from torch.utils.data import Dataset, DataLoader

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.float32)

In [10]:
def create_sequences(data, seq_length):
  xs = [] 
  ys = []
  for i in range(len(data.T)-seq_length-10):
    x = data[:, i:(i+seq_length)]
    y = data[:, (i+seq_length):(i+seq_length+10)]
    xs.append(x)
    ys.append(y)
  return np.array(xs), np.array(ys)

In [11]:
import torch.nn as nn
import torch

class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):

        x=x.permute(0, 2, 1) 
        # Initialize hidden state with zeros
        h0 = torch.zeros(1, x.shape[0], self.hidden_dim)
        
        # Initialize cell state with zeros
        c0 = torch.zeros(1, x.shape[0], self.hidden_dim)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0)) 
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])

        return out

In [12]:
data = pd.read_csv('./data.csv')
data = pd.DataFrame(data.iloc[1])

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data).reshape(1, -1)

X, y =create_sequences(scaled_data, seq_length=30)

# 스케일링된 데이터로 데이터셋 재구성
train_size = int(0.64 * len(X))
val_size = int(0.16 * len(X))
test_size = int(0.20 * len(X))

X_train, X_val, X_test = X[:train_size], X[train_size:(train_size+val_size)], X[(train_size+val_size):]
y_train, y_val, y_test = y[:train_size], y[train_size:(train_size+val_size)], y[(train_size+val_size):]

train_dataset = TimeSeriesDataset(X_train,y_train)
train_loader = DataLoader(train_dataset, batch_size=32)
val_dataset = TimeSeriesDataset(X_val,y_val)
val_loader = DataLoader(val_dataset, batch_size=32)
test_dataset = TimeSeriesDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset))

# 모델 정의
model = LSTM(input_dim=1, hidden_dim=32, output_dim=10)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [13]:
epochs = 230  # 에포크 수

for i, epoch in enumerate(range(epochs)):
    
    model.train()
    train_losses = []
    for seq, labels in (train_loader):

        y_pred = model(seq)
        loss = criterion(y_pred, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

        model.eval()
        with torch.no_grad():
            val_losses = []
            for seq, labels in test_loader:
                y_pred  = model(seq)
                val_loss = criterion(y_pred, labels)
                val_losses.append(val_loss.item()) 
    if i%10==0:
        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {np.mean(train_losses):.4f}, Validation Loss: {np.mean(val_losses):.4f}')


Epoch 1/230, Train Loss: 0.3988, Validation Loss: 0.2645
Epoch 11/230, Train Loss: 0.0150, Validation Loss: 0.1616
Epoch 21/230, Train Loss: 0.0149, Validation Loss: 0.1627
Epoch 31/230, Train Loss: 0.0149, Validation Loss: 0.1624
Epoch 41/230, Train Loss: 0.0148, Validation Loss: 0.1621
Epoch 51/230, Train Loss: 0.0147, Validation Loss: 0.1620
Epoch 61/230, Train Loss: 0.0146, Validation Loss: 0.1622
Epoch 71/230, Train Loss: 0.0146, Validation Loss: 0.1627
Epoch 81/230, Train Loss: 0.0145, Validation Loss: 0.1634
Epoch 91/230, Train Loss: 0.0144, Validation Loss: 0.1644
Epoch 101/230, Train Loss: 0.0144, Validation Loss: 0.1653
Epoch 111/230, Train Loss: 0.0143, Validation Loss: 0.1660
Epoch 121/230, Train Loss: 0.0143, Validation Loss: 0.1666
Epoch 131/230, Train Loss: 0.0143, Validation Loss: 0.1669
Epoch 141/230, Train Loss: 0.0143, Validation Loss: 0.1672
Epoch 151/230, Train Loss: 0.0142, Validation Loss: 0.1673
Epoch 161/230, Train Loss: 0.0142, Validation Loss: 0.1675
Epoch 17

In [14]:
model.eval()
with torch.no_grad():
  for seq, labels in test_loader:
     y_pred  = model(seq)
     val_loss = criterion(y_pred, labels)
  print(val_loss)

tensor(0.1679)


# 평균 언급량

In [15]:
import pandas as pd
import numpy as np
import torch.optim as optim
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
import warnings
import torch.nn as nn
warnings.filterwarnings('ignore')

In [16]:
import torch
from torch.utils.data import Dataset, DataLoader

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.float32)

In [17]:
def create_sequences(data, seq_length):
  xs = [] 
  ys = []
  for i in range(len(data.T)-seq_length-10):
    x = data[:, i:(i+seq_length)]
    y = data[:, (i+seq_length):(i+seq_length+10)]
    xs.append(x)
    ys.append(y)
  return np.array(xs), np.array(ys)

In [18]:
import torch.nn as nn
import torch

class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):

        x=x.permute(0, 2, 1) 
        # Initialize hidden state with zeros
        h0 = torch.zeros(1, x.shape[0], self.hidden_dim)
        
        # Initialize cell state with zeros
        c0 = torch.zeros(1, x.shape[0], self.hidden_dim)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0)) 
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])

        return out

In [19]:
data = pd.read_csv('./data.csv')
data = pd.DataFrame(data.iloc[2])

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data).reshape(1, -1)

X, y =create_sequences(scaled_data, seq_length=30)

# 스케일링된 데이터로 데이터셋 재구성
train_size = int(0.64 * len(X))
val_size = int(0.16 * len(X))
test_size = int(0.20 * len(X))

X_train, X_val, X_test = X[:train_size], X[train_size:(train_size+val_size)], X[(train_size+val_size):]
y_train, y_val, y_test = y[:train_size], y[train_size:(train_size+val_size)], y[(train_size+val_size):]

train_dataset = TimeSeriesDataset(X_train,y_train)
train_loader = DataLoader(train_dataset, batch_size=32)
val_dataset = TimeSeriesDataset(X_val,y_val)
val_loader = DataLoader(val_dataset, batch_size=32)
test_dataset = TimeSeriesDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset))

# 모델 정의
model = LSTM(input_dim=1, hidden_dim=32, output_dim=10)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [20]:
epochs = 50  # 에포크 수

for i, epoch in enumerate(range(epochs)):
    
    model.train()
    train_losses = []
    for seq, labels in (train_loader):

        y_pred = model(seq)
        loss = criterion(y_pred, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

        model.eval()
        with torch.no_grad():
            val_losses = []
            for seq, labels in test_loader:
                y_pred  = model(seq)
                val_loss = criterion(y_pred, labels)
                val_losses.append(val_loss.item()) 
    if i%10==0:
        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {np.mean(train_losses):.4f}, Validation Loss: {np.mean(val_losses):.4f}')


Epoch 1/50, Train Loss: 0.0767, Validation Loss: 0.0591
Epoch 11/50, Train Loss: 0.0090, Validation Loss: 0.0100
Epoch 21/50, Train Loss: 0.0089, Validation Loss: 0.0100
Epoch 31/50, Train Loss: 0.0089, Validation Loss: 0.0100
Epoch 41/50, Train Loss: 0.0088, Validation Loss: 0.0100


In [21]:
model.eval()
with torch.no_grad():
  for seq, labels in test_loader:
     y_pred  = model(seq)
     val_loss = criterion(y_pred, labels)
  print(val_loss)

tensor(0.0100)


In [22]:
import matplotlib.pyplot as plt
# 임의의 훈련 샘플 선택 (여기서는 마지막 샘플을 사용)
sample_seq, sample_label = test_dataset[-500]

# 모델을 평가 모드로 설정
model.eval()

# 예측 수행
with torch.no_grad():
    sample_pred, _ = model(sample_seq.unsqueeze(0))  # 배치 차원 추가

# 실제 값과 예측 값을 numpy 배열로 변환
sample_seq = sample_seq.numpy()
sample_label = sample_label.numpy()
sample_pred = sample_pred.numpy()

# 시각화
plt.figure(figsize=(15, 5))

# 훈련 데이터 부분 (노란색)
plt.plot(np.arange(len(sample_seq)), sample_seq, label='Train Data', color='black')

# 실제 레이블 부분 (빨간색) — 마지막 훈련 데이터 포인트 다음에 위치해야 함
# 훈련 데이터 길이 + 예측 지점
actual_indices = np.arange(len(sample_seq), len(sample_seq) + len(sample_label))
plt.plot(actual_indices, sample_label, label='Actual', color='red')

# 예측 부분 (초록색) — 마지막 훈련 데이터 포인트 다음에 위치해야 함
# 훈련 데이터 길이 + 예측 지점
pred_indices = np.arange(len(sample_seq), len(sample_seq) + len(sample_pred[0]))
plt.plot(pred_indices, sample_pred[0], label='Prediction', color='green')

plt.title('Sample Train Data, Actual and Predicted Values')
plt.xlabel('Time Steps')
plt.ylabel('Value')
plt.legend()
plt.show()


IndexError: index -500 is out of bounds for axis 0 with size 84

In [None]:
len(sample_seq) + len(sample_label)

33

In [None]:
sample_label

array([0.50807816, 0.36243066, 0.35278514], dtype=float32)

In [None]:
predicted_test[:, 0].shape

NameError: name 'predicted_test' is not defined