In [1]:
import numpy as np
import pandas as pd
import torch
# import torchvision.transforms as transforms

from PIL import Image
from torch.optim import SGD
from tqdm import tqdm

from dataset_class import get_dataloader
# from cnn_class import CnnModel
# from torch.utils.data import DataLoader
# from test_model import CnnModel
from torch.nn.functional import softmax
from torch.utils.tensorboard import SummaryWriter

In [7]:
train_data = pd.read_csv("./data/train1.txt", sep=',')

validation_data = pd.read_csv("./data/validation1.txt", sep=',')

test_data = pd.read_csv("./data/test.txt", sep=',')

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [11]:
len(validation_data.id)

1173

In [6]:
def return_pixel_rgb_values(img_name, path = "./data/train_validation/") :
    return np.array(list(Image.open(path + img_name).getdata())).reshape((16, 16, 3))


train_images = np.array([return_pixel_rgb_values(x) for x in train_data.id])/255

validation_images = np.array([return_pixel_rgb_values(x) for x in validation_data.id])/255

test_images = np.array([return_pixel_rgb_values(x, "./data/test/") for x in test_data.id])/255



train_mean_image = np.mean(train_images, axis=(0,1,2))

validation_mean_image = np.mean(validation_images, axis=(0,1,2))

test_mean_image = np.mean(test_images, axis=(0,1,2))


train_std = np.std(train_images, axis=(0,1,2))

validation_std = np.std(validation_images, axis=(0,1,2))

test_std = np.std(test_images, axis=(0,1,2))

In [15]:

train_dataloader = get_dataloader("./data/train_validation/", "./data/train_txt.txt", "train", tuple(train_mean_image), tuple(train_mean_image), batch_size=7)
validation_dataloader = get_dataloader("./data/train_validation/", "./data/validation_txt.txt", "validation", tuple(validation_mean_image), tuple(validation_std), batch_size=7)
# test_dataloader = get_dataloader("./data/test/", "./data/sample_submission.txt", "test", test_mean_image, test_std)


In [8]:
def train_model(model, optimizer, criterion, train_dataloader, epoch, writer) :

    model.train()

    running_loss = 0.0

    for images,labels in tqdm(train_dataloader, "Train"):

        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)

        loss = criterion(outputs, labels)

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        running_loss += loss.item() * len(images)

    running_loss /= len(train_dataloader.dataset)

    writer.add_scalar(tag="Loss/Train", scalar_value=running_loss, global_step=epoch)
    writer.flush()

In [9]:
def eval_model(model, validation_dataloader, criterion, epoch, writer, best_loss, second_loss, best_accuracy, second_accuracy, nr, validation = True):
    model.eval()
    running_loss = 0.0
    true_preds = 0

    with torch.no_grad():
        for images,labels in tqdm(validation_dataloader, "Validation"):

            images = images.to(device)
            labels = labels.to(device)

            logits = model(images)

            # Compute the loss
            loss = criterion(logits, labels)

            # Compute accuracy
            scores = softmax(logits, dim=1)

            # Get the prediction labels
            pred_label = torch.argmax(scores, dim=1)

            # Get the acc
            true_preds += (pred_label == labels).sum()

            running_loss += loss.item() * len(images)

        accuracy = true_preds / len(validation_dataloader.dataset)
        print(f"acc is {accuracy}")
        if accuracy > 0.62:
            save_model(model,"./models/", "greater_62")
        if accuracy > 0.605:
            save_model(model,"./models/", "greater_60")
        running_loss = running_loss / len(validation_dataloader.dataset)

        if validation :
            writer.add_scalar(scalar_value=accuracy, tag="Acc/Valid", global_step=epoch)
            writer.add_scalar(scalar_value=running_loss, tag="Loss/Valid", global_step=epoch)
        else :
             writer.add_scalar(scalar_value=accuracy, tag="Acc/Train", global_step=epoch)

        writer.flush()

        if running_loss < best_loss :
             torch.save(model.state_dict(), "./models/best_loss.pth")
             best_loss = running_loss
             print(" am salvat modelul cu best loss")
        elif running_loss < second_loss :
             torch.save(model.state_dict(), "./models/second_loss.pth")
             second_loss = running_loss
             print(" am salvat modelul cu second loss")

        if accuracy > best_accuracy :
             torch.save(model.state_dict(), "./models/best_acc.pth")
             best_accuracy = accuracy
             print(" am salvat modelul cu best acc")
        elif accuracy > second_accuracy :
             torch.save(model.state_dict(), "./models/second_acc.pth")
             second_accuracy = accuracy
             print(" am salvat modelul cu second acc")

In [10]:
def save_model(model, path, model_no):
    torch.save(model.state_dict(), path + f"model{model_no}.pth")
    print("am salvat modelul")


In [11]:
 def run_model(lr, epochs, name_log, nr):
    best_loss = 99999.0
    second_loss = 99999.0
    best_accuracy = 0.0
    second_accuracy = 0.0

    model = CnnModel()

    model = model.to(device)

    optimizer = SGD(model.parameters(), lr = lr)

    criterion = torch.nn.CrossEntropyLoss()

    writer = SummaryWriter(log_dir="./logs/model_" + name_log)

    for epoch in range(epochs) :
        train_model(model, optimizer, criterion, train_dataloader, epoch, writer)
        # train_model(model, optimizer, criterion, validation_dataloader, epoch, writer)
        eval_model(model, validation_dataloader, criterion, epoch, writer, best_loss, second_loss, best_accuracy, second_accuracy, nr, validation=True)
        # eval_model(model, validation_dataloader, criterion, epoch, writer, best_loss, second_loss, best_accuracy, second_accuracy, nr, validation=False)


In [12]:
dataloader_test = get_dataloader("./data/test/", "./data/sample_submission.txt","dani", test_mean_image, test_std)

In [16]:
k = 17

for lr in [0.01, 0.1] :
    epoch = 400
    run_model(lr, epoch, "test_nr" + str(k), k)
    k += 1


Train: 100%|██████████| 1143/1143 [00:07<00:00, 153.51it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 286.53it/s]


acc is 0.44075021147727966
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 157.32it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 287.54it/s]


acc is 0.47229325771331787
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 158.85it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 286.46it/s]


acc is 0.5072463750839233
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 159.30it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 286.85it/s]


acc is 0.5191816091537476
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 160.27it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 278.40it/s]


acc is 0.5609548091888428
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 154.06it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 283.17it/s]


acc is 0.5524296760559082
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 157.74it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 284.17it/s]


acc is 0.5575447678565979
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 159.56it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 282.54it/s]


acc is 0.5907928347587585
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 156.99it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 287.57it/s]


acc is 0.5907928347587585
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 154.24it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 278.00it/s]


acc is 0.5848252177238464
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 158.13it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 288.86it/s]


acc is 0.6035805940628052
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 158.59it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 286.09it/s]


acc is 0.6044330596923828
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 155.36it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 285.54it/s]


acc is 0.6121057271957397
am salvat modelul
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 157.31it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 274.95it/s]


acc is 0.5942029356956482
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 158.95it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 285.31it/s]


acc is 0.6010230183601379
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 155.96it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 283.00it/s]


acc is 0.6052855849266052
am salvat modelul
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 158.87it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 282.25it/s]


acc is 0.6104006767272949
am salvat modelul
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 159.23it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 280.67it/s]


acc is 0.6172208189964294
am salvat modelul
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 159.61it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 289.42it/s]


acc is 0.6086956858634949
am salvat modelul
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 158.58it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 287.20it/s]


acc is 0.6223359107971191
am salvat modelul
am salvat modelul
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 158.97it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 287.11it/s]


acc is 0.6214833855628967
am salvat modelul
am salvat modelul
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 156.63it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 284.34it/s]


acc is 0.6044330596923828
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 156.79it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 284.39it/s]


acc is 0.6010230183601379
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 159.60it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 287.60it/s]


acc is 0.5873827934265137
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 156.42it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 286.54it/s]


acc is 0.6265984773635864
am salvat modelul
am salvat modelul
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 155.96it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 275.72it/s]


acc is 0.5882353186607361
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 152.77it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 271.99it/s]


acc is 0.5976129770278931
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 149.55it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 267.44it/s]


acc is 0.6138107776641846
am salvat modelul
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 152.92it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 273.85it/s]


acc is 0.5720375180244446
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 153.74it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 279.93it/s]


acc is 0.6095481514930725
am salvat modelul
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 157.75it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 281.29it/s]


acc is 0.6035805940628052
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 159.99it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 276.26it/s]


acc is 0.6121057271957397
am salvat modelul
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 159.72it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 279.47it/s]


acc is 0.5933504104614258
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 157.53it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 288.66it/s]


acc is 0.5805626511573792
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 159.24it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 280.18it/s]


acc is 0.6061381101608276
am salvat modelul
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 161.44it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 288.61it/s]


acc is 0.6052855849266052
am salvat modelul
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 160.31it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 280.84it/s]


acc is 0.6138107776641846
am salvat modelul
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train: 100%|██████████| 1143/1143 [00:07<00:00, 159.63it/s]
Validation: 100%|██████████| 168/168 [00:00<00:00, 288.27it/s]


acc is 0.5899403095245361
 am salvat modelul cu best loss
 am salvat modelul cu best acc


Train:  21%|██        | 237/1143 [00:01<00:05, 153.40it/s]


KeyboardInterrupt: 

In [17]:
f = open("cnn_62_acc_best_acc.txt", 'w')
f.write("id,label\n")
i = 0

d = {}

model = CnnModel()
model.to(device)
model.load_state_dict(torch.load("./models/modelgreater_62.pth"))

with torch.no_grad():
    for data in dataloader_test:
        imgs, labels = data
        imgs = imgs.to(device)
        labels = labels.to(device)
        output = model(imgs)
        _, predicteed = torch.max(output.data, 1)

        for label in predicteed :
            f.write(f"{test_data.id[i]},{label}\n")
            i += 1

f.close()

In [None]:
f = open("cnn_6th_try.txt", 'w')
f.write("id,label\n")
i = 0
net = CnnModel()
net.load_state_dict(torch.load("./model1.pth"))
with torch.no_grad():
    for data in dataloader:
        imgs, labels = data
        output = net(imgs)
        _, predicteed = torch.max(output.data, 1)

        for label in predicteed :
            f.write(f"{test_data.id[i]},{label}\n")
            i+=1

f.close()


In [None]:
lr = 0.01
epochs = 100

model = CnnModel()
model.to(device)

optimizer = SGD(model.parameters(), lr = lr)

criterion = torch.nn.CrossEntropyLoss()

for epoch in range(epochs) :

    running_loss = 0.0

    for i,data in enumerate(train_dataloader, 0):

        images,labels = data
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)

        loss = criterion(outputs, labels)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

        if i % 100 == 99:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')
            running_loss = 0.0


In [None]:
f = open("cnn_third_try.txt", 'w')
f.write("id,label\n")
i = 0

with torch.no_grad():
    for data in dataloader:
        imgs, labels = data
        output = model(imgs)
        _, predicteed = torch.max(output.data, 1)

        for label in predicteed :
            f.write(f"{test_data.id[i]},{label}\n")
            i += 1

f.close()