In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

from sklearn.model_selection import train_test_split
import random
import os

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler

from statsmodels.stats.outliers_influence import variance_inflation_factor


In [None]:
mpl.rc('font', family='Malgun Gothic')

In [None]:
df = pd.read_csv("train.csv")
test = pd.read_csv("test_.csv")

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic=True
    torch.backends.cudnn.benchmark = True

In [None]:
seed_everything(0)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
class Custom_Dataset(Dataset):
    def __init__(self, Dataset, trainset=True):
        self.Dataset = Dataset
        self.trainset = trainset

        if trainset:
            self.target = pd.DataFrame(Dataset[Dataset.columns[-1]].values)
            self.data = pd.DataFrame(Dataset[Dataset.columns[:-1]].values)
        else:
            self.target = None
            self.data = pd.DataFrame(Dataset)

    def __len__(self):
        return len(self.data)
        

    def __getitem__(self, idx):
        data = torch.FloatTensor(self.data.iloc[idx].values)

        if self.target is not None:
            target = torch.FloatTensor(self.target.iloc[idx].values)
            return data, target
        else:
            return data

In [None]:
train_dataset = Custom_Dataset(df)
test_dataset = Custom_Dataset(test, trainset=False)
num_features_train = len(train_dataset[0][0])
model = nn.Linear(in_features= num_features_train, out_features= 1).to(device)
dataloader = DataLoader(train_dataset, batch_size=2)
criterion = nn.MSELoss()

In [None]:
nb_epochs = 25
for epoch in range(nb_epochs + 1):
    for batch_idx, samples in enumerate(dataloader):
        x_train, y_train = samples
        x_train, y_train = x_train.to(device), y_train.to(device)  # Move to the same device as the model
        
        prediction = model(x_train)
        cost = F.mse_loss(prediction, y_train)
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        # print('Epoch {:4d}/{} Batch {}/{} Cost: {:.6f}'.format(
        # epoch, nb_epochs, batch_idx+1, len(dataloader), cost.item()
        # ))

In [None]:

model.eval()

test_dataloader = DataLoader(test_dataset, batch_size=1)
predictions = []

with torch.no_grad():
    for samples in test_dataloader:
        x_test = samples[0].to(device)
        prediction = model(x_test)
        predictions.append(prediction.item())
        
pred_df = pd.DataFrame(predictions, columns=['Predicted_Target'])

result_df = pd.concat([teid, pred_df], axis=1)

In [1]:
result_df.to_csv('submission_linear.csv')

NameError: name 'result_df' is not defined