In [3]:
import random
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm
from sklearn.metrics import f1_score

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [39]:
EPOCHS = 1000
LR = 1e-2
BS = 16384
SEED = 41

In [6]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED) # Seed 고정

In [8]:
train_df = pd.read_csv('C:/Users/김승우/Desktop/파이썬/train.csv')
train_df = train_df.drop(columns=['ID'])
val_df = pd.read_csv('C:/Users/김승우/Desktop/파이썬/val.csv')
val_df = val_df.drop(columns=['ID'])

In [9]:
class MyDataset(Dataset):
    def __init__(self, df, eval_mode):
        self.df = df
        self.eval_mode = eval_mode
        if self.eval_mode:
            self.labels = self.df['Class'].values
            self.df = self.df.drop(columns=['Class']).values
        else:
            self.df = self.df.values
        
    def __getitem__(self, index):
        if self.eval_mode:
            self.x = self.df[index]
            self.y = self.labels[index]
            return torch.Tensor(self.x), self.y
        else:
            self.x = self.df[index]
            return torch.Tensor(self.x)
        
    def __len__(self):
        return len(self.df)

In [19]:
train_dataset = MyDataset(df=train_df, eval_mode=False)
train_loader = DataLoader(train_dataset, batch_size=BS, shuffle=True, num_workers=0)

val_dataset = MyDataset(df = val_df, eval_mode=True)
val_loader = DataLoader(val_dataset, batch_size=BS, shuffle=False, num_workers=0)

In [35]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.Encoder = nn.Sequential(
            nn.Linear(30,64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Linear(64,128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
        )
        self.Decoder = nn.Sequential(
            nn.Linear(128,64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Linear(64,30),
        )
        
    def forward(self, x):
        x = self.Encoder(x)
        x = self.Decoder(x)
        return x

In [33]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.Encoder = nn.Sequential(
            nn.Linear(30,15),
            nn.BatchNorm1d(15),
            nn.LeakyReLU(),
            nn.Linear(15,3),
            nn.BatchNorm1d(3),
            nn.LeakyReLU(),
        )
        self.Decoder = nn.Sequential(
            nn.Linear(3,15),
            nn.BatchNorm1d(15),
            nn.LeakyReLU(),
            nn.Linear(15,30),
        )
        
    def forward(self, x):
        x = self.Encoder(x)
        x = self.Decoder(x)
        return x

In [37]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.Encoder = nn.Sequential(
            nn.Linear(30,20),
            nn.BatchNorm1d(20),
            nn.LeakyReLU(),
            nn.Linear(20,10),
            nn.BatchNorm1d(10),
            nn.LeakyReLU(),
        )
        self.Decoder = nn.Sequential(
            nn.Linear(10,20),
            nn.BatchNorm1d(20),
            nn.LeakyReLU(),
            nn.Linear(20,30),
        )
        
    def forward(self, x):
        x = self.Encoder(x)
        x = self.Decoder(x)
        return x

#29퍼

In [41]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.Encoder = nn.Sequential(
            nn.Linear(30,60),
            nn.BatchNorm1d(60),
            nn.LeakyReLU(),
            nn.Linear(60,120),
            nn.BatchNorm1d(120),
            nn.LeakyReLU(),
            nn.Linear(120,240),
            nn.BatchNorm1d(240),
            nn.LeakyReLU()
        )
        self.Decoder = nn.Sequential(
            nn.Linear(240,120),
            nn.BatchNorm1d(120),
            nn.LeakyReLU(),
            nn.Linear(120,60),
            nn.BatchNorm1d(60),
            nn.LeakyReLU(),
            nn.Linear(60,30)
        )
        
    def forward(self, x):
        x = self.Encoder(x)
        x = self.Decoder(x)
        return x
    
# 77퍼

In [43]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.Encoder = nn.Sequential(
            nn.Linear(30,40),
            nn.BatchNorm1d(40),
            nn.LeakyReLU(),
            nn.Linear(40,50),
            nn.BatchNorm1d(50),
            nn.LeakyReLU(),
            nn.Linear(50,60),
            nn.BatchNorm1d(60),
            nn.LeakyReLU()
        )
        self.Decoder = nn.Sequential(
            nn.Linear(60,50),
            nn.BatchNorm1d(50),
            nn.LeakyReLU(),
            nn.Linear(50,40),
            nn.BatchNorm1d(40),
            nn.LeakyReLU(),
            nn.Linear(40,30)
        )
        
    def forward(self, x):
        x = self.Encoder(x)
        x = self.Decoder(x)
        return x
    
#50퍼

In [21]:
class Trainer():
    def __init__(self, model, optimizer, train_loader, val_loader, scheduler, device):
        self.model = model
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.scheduler = scheduler
        self.device = device
        # Loss Function
        self.criterion = nn.L1Loss().to(self.device)
        
    def fit(self, ):
        self.model.to(self.device)
        best_score = 0
        for epoch in range(EPOCHS):
            self.model.train()
            train_loss = []
            for x in iter(self.train_loader):
                x = x.float().to(self.device)
                self.optimizer.zero_grad()

                _x = self.model(x)
                loss = self.criterion(x, _x)

                loss.backward()
                self.optimizer.step()

                train_loss.append(loss.item())

            score = self.validation(self.model, 0.95)
            print(f'Epoch : [{epoch}] Train loss : [{np.mean(train_loss)}] Val Score : [{score}])')

            if self.scheduler is not None:
                self.scheduler.step(score)

            if best_score < score:
                best_score = score
                torch.save(model.module.state_dict(), './best_model.pth', _use_new_zipfile_serialization=False)
    
    def validation(self, eval_model, thr):
        cos = nn.CosineSimilarity(dim=1, eps=1e-6)
        eval_model.eval()
        pred = []
        true = []
        with torch.no_grad():
            for x, y in iter(self.val_loader):
                x = x.float().to(self.device)

                _x = self.model(x)
                diff = cos(x, _x).cpu().tolist()
                batch_pred = np.where(np.array(diff)<thr, 1,0).tolist()
                pred += batch_pred
                true += y.tolist()

        return f1_score(true, pred, average='macro')

In [44]:
model = nn.DataParallel(AutoEncoder())
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = LR)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=10, threshold_mode='abs', min_lr=1e-8, verbose=True)

trainer = Trainer(model, optimizer, train_loader, val_loader, scheduler, device)
trainer.fit()

Epoch : [0] Train loss : [0.627136835030147] Val Score : [0.0010529271374420891])
Epoch : [1] Train loss : [0.5143784710339138] Val Score : [0.0014752523784167275])
Epoch : [2] Train loss : [0.44464688641684397] Val Score : [0.0036866388447829718])
Epoch : [3] Train loss : [0.39273429768426077] Val Score : [0.0300469750844658])
Epoch : [4] Train loss : [0.346460155078343] Val Score : [0.04674926570980915])
Epoch : [5] Train loss : [0.3074759244918823] Val Score : [0.08154858949951575])
Epoch : [6] Train loss : [0.2792639264038631] Val Score : [0.12313211409590935])
Epoch : [7] Train loss : [0.25706655638558523] Val Score : [0.1967026311655492])
Epoch : [8] Train loss : [0.24170141347817012] Val Score : [0.22882626241160542])
Epoch : [9] Train loss : [0.2285268349306924] Val Score : [0.28072243527164215])
Epoch : [10] Train loss : [0.21754797654492514] Val Score : [0.3114867275878908])
Epoch : [11] Train loss : [0.2074135967663356] Val Score : [0.3445639369092003])
Epoch : [12] Train lo

Epoch : [100] Train loss : [0.07311975849526269] Val Score : [0.5044963554870168])
Epoch : [101] Train loss : [0.07374451948063714] Val Score : [0.5042033980974364])
Epoch : [102] Train loss : [0.0737408207995551] Val Score : [0.5041410174273849])
Epoch : [103] Train loss : [0.07344902519668851] Val Score : [0.5041202545351772])
Epoch : [104] Train loss : [0.07440844923257828] Val Score : [0.5043076744515074])
Epoch : [105] Train loss : [0.07369480282068253] Val Score : [0.5044753279074133])
Epoch : [106] Train loss : [0.0739289043205125] Val Score : [0.5046439946618249])
Epoch : [107] Train loss : [0.07156032643147878] Val Score : [0.5041202545351772])
Epoch : [108] Train loss : [0.07260386858667646] Val Score : [0.5048562793580369])
Epoch : [109] Train loss : [0.07270460575819016] Val Score : [0.5048989325478999])
Epoch : [110] Train loss : [0.0745554119348526] Val Score : [0.5047924228377401])
Epoch : [111] Train loss : [0.0764068141579628] Val Score : [0.5048562793580369])
Epoch : 

KeyboardInterrupt: 

In [23]:
model = AutoEncoder()
model.load_state_dict(torch.load('./best_model.pth'))
model = nn.DataParallel(model)
model.eval()

DataParallel(
  (module): AutoEncoder(
    (Encoder): Sequential(
      (0): Linear(in_features=30, out_features=64, bias=True)
      (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
      (3): Linear(in_features=64, out_features=128, bias=True)
      (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): LeakyReLU(negative_slope=0.01)
    )
    (Decoder): Sequential(
      (0): Linear(in_features=128, out_features=64, bias=True)
      (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
      (3): Linear(in_features=64, out_features=30, bias=True)
    )
  )
)

In [25]:
test_df = pd.read_csv('C:/Users/김승우/Desktop/파이썬/test.csv')
test_df = test_df.drop(columns=['ID'])

In [27]:
test_dataset = MyDataset(test_df, False)
test_loader = DataLoader(test_dataset, batch_size=BS, shuffle=False, num_workers=0)

In [28]:
def prediction(model, thr, test_loader, device):
    model.to(device)
    model.eval()
    cos = nn.CosineSimilarity(dim=1, eps=1e-6)
    pred = []
    with torch.no_grad():
        for x in iter(test_loader):
            x = x.float().to(device)
            
            _x = model(x)
            
            diff = cos(x, _x).cpu().tolist()
            batch_pred = np.where(np.array(diff)<thr, 1,0).tolist()
            pred += batch_pred
    return pred

In [29]:
preds = prediction(model, 0.95, test_loader, device)

In [31]:
submit = pd.read_csv('C:/Users/김승우/Desktop/파이썬/sample_submission.csv')
submit['Class'] = preds
submit.to_csv('./submit_autoencoder.csv', index=False)