In [4]:
import pandas as pd
features = pd.read_csv('features.csv')
targets = pd.read_csv('train_y.csv')
print(f"len_targets : {len(targets)} len_features : {len(features)}")

len_targets : 77152 len_features : 77152


In [5]:
train_test_split_ratio = 0.9

n = int(len(features)*train_test_split_ratio)

train_features = features.iloc[:n]
train_targets = targets.iloc[:n]

test_features = features.iloc[n:]
test_targets = targets.iloc[n:]

print(f"len_train_features : {len(train_features)}  len_test_features : {len(test_features)}")

len_train_features : 69436  len_test_features : 7716


In [6]:
import torch
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        sample = self.features.iloc[idx]
        target = self.targets.iloc[idx]
        feature = torch.tensor(sample.values, dtype=torch.float32)
        feature = torch.reshape(feature, (6,7))
        target = torch.tensor(target, dtype=torch.float32)
        return feature, target

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

input_size = 7
hidden_size = 256
num_layers = 1
learning_rate = 0.0001
batch_size = 64
num_epochs = 50


class RNN(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers):
    super(RNN, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
    self.fc1 = nn.Linear(hidden_size, 32)
    # self.fc2 = nn.Linear(128, 64)
    # self.fc3 = nn.Linear(64,32)
    self.fc4 = nn.Linear(32,1)

  def forward(self, x):
    h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

    _, h_n = self.rnn(x,h0)    # h_n.shape = (1, batch_size, hidden_size)
    h_n = torch.squeeze(h_n)
    out = self.fc1(h_n)
    # out = self.fc2(out)
    # out = self.fc3(out)
    out = self.fc4(out)

    return out

In [9]:
train_dataset = CustomDataset(train_features, train_targets)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = CustomDataset(test_features, train_targets)
test_dataloader = DataLoader(test_dataset, batch_size=len(test_features), shuffle=False)

In [10]:
model = RNN(input_size, hidden_size, num_layers).to(device)

In [11]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [12]:
import matplotlib.pyplot as plt

In [16]:
#Training
train_losses = []
for epoch in range(num_epochs):
  running_loss = 0.0
  for batch_idx, (data, targets) in enumerate(train_dataloader):
    data = data.to(device=device)
    targets = targets.to(device=device)

    scores = model(data)
    loss = criterion(scores, targets)
    train_losses.append(loss)

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()
    running_loss += loss.item()

  # Calculate average loss for the epoch
  epoch_loss = running_loss / len(train_dataloader)
  train_losses.append(epoch_loss)

  print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.

In [None]:

for batch_idx, (data, targets) in enumerate(test_dataloader):
    data = data.to(device=device)
    targets = targets.to(device=device)
    scores = model(data)
    loss = criterion(scores, targets)

print(loss)