In [16]:
import torch
import os
import numpy as np
import cv2
from tqdm import tqdm
from pathlib import Path
from torch import nn
import torchvision
from torchvision import datasets, models, transforms
from skimage import io 
from skimage import transform
import shutil
from sklearn.model_selection import train_test_split
import shutil
from os import walk

In [17]:

os.makedirs("test_yerevan", exist_ok=True)
os.makedirs("test_london", exist_ok=True)
os.makedirs("yerevan", exist_ok=True)
os.makedirs("london", exist_ok=True)
os.makedirs("data/train", exist_ok=True)
os.makedirs("data/valid", exist_ok=True)
os.makedirs("data/train/yerevan", exist_ok=True)
os.makedirs("data/valid/yerevan", exist_ok=True)
os.makedirs("data/train/london", exist_ok=True)
os.makedirs("data/valid/london", exist_ok=True)
os.makedirs("data/test/yerevan", exist_ok=True)
os.makedirs("data/test/london", exist_ok=True)


In [21]:
f = []
yerevan = "/test_yerevan"
london = "/test_london"
path = os.getcwd()
for i in range(1,7):
    for (dirpath, dirnames, filenames) in walk(path + "/data/" + str(i)):
        for file in filenames:
            f.append((dirpath + "/" + file, file))
        break
for i in f:
    if "london" in i[0]:
        shutil.copy(i[0], path + london + "/" + i[1])
    else:
        shutil.copy(i[0], path + yerevan + "/" + i[1])

In [22]:
f = []
for (dirpath, dirnames, filenames) in walk(path + "/data/test"):
    for file in filenames:
        f.append((dirpath + "/" + file, file))
    break
for i in f:
    if "london" in i[0]:
        shutil.copy(i[0], path + london + "/" + i[1])
    else:
        shutil.copy(i[0], path + yerevan + "/" + i[1])

In [23]:
def resizer(src, dis, size):
    os.makedirs(dis, exist_ok=True)
    for file in tqdm(os.listdir(src)):
        try:
            img = io.imread(os.path.join(src, file))
            img = transform.resize(img, (size, size),
                                            order=1, mode='constant',
                                            cval=0, clip=True,
                                            preserve_range=True,
                                            anti_aliasing=True)
            io.imsave(os.path.join(dis,file),img.astype(np.uint8))
        except:
            print("coudn't do it for ", file)

In [24]:
resizer("./yerevan", "./yerevan_resized", 100)
resizer("./london", "./london_resized", 100)
resizer("./test_yerevan", "./test_yerevan_resized", 100)
resizer("./test_london", "./test_london_resized", 100)

 76%|███████▌  | 53/70 [01:04<00:04,  3.88it/s]

coudn't do it for  9_yerevan.12


100%|██████████| 70/70 [01:34<00:00,  1.34s/it]
 25%|██▌       | 19/76 [00:14<00:18,  3.04it/s]

coudn't do it for  176_london.12


 36%|███▌      | 27/76 [00:17<00:13,  3.69it/s]

coudn't do it for  142_london.$_


 71%|███████   | 54/76 [01:00<00:20,  1.08it/s]

coudn't do it for  158_london.-jpg


 76%|███████▋  | 58/76 [01:02<00:11,  1.56it/s]

coudn't do it for  90_london.Be


100%|██████████| 76/76 [02:07<00:00,  1.68s/it]
100%|██████████| 30/30 [00:44<00:00,  1.47s/it]
 10%|█         | 1/10 [00:00<00:01,  7.56it/s]

coudn't do it for  242_yerevan.jpg


100%|██████████| 10/10 [00:04<00:00,  2.42it/s]


In [25]:
yerevan = list(Path("yerevan_resized").glob("*"))
london = list(Path("london_resized").glob("*"))
test_yerevan = list(Path("test_yerevan_resized").glob("*"))
test_london = list(Path("test_london_resized").glob("*"))
yerevan_train, yerevan_valid = train_test_split(yerevan)
london_train, london_valid = train_test_split(london)

y_train = [0]*len(yerevan_train) + [1]*len(london_train)
y_valid = [0]*len(yerevan_valid) + [1]*len(london_valid)
y_test = [0]*len(test_yerevan) + [1]*len(test_london)


In [26]:
def copy_(src, dist):
    for x in src:
        shutil.copy(str(x), os.path.join(dist, x.parts[-1]))

In [27]:
copy_(yerevan_train, "data/train/yerevan")
copy_(yerevan_valid, "data/valid/yerevan")
copy_(london_train, "data/train/london")
copy_(london_valid, "data/valid/london")
copy_(test_yerevan, "data/test/yerevan")
copy_(test_london, "data/test/london")

In [8]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(100),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(100),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(100),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}
data_dir = './data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'valid', 'test']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'valid', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid', 'test']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [9]:
class DNet(nn.Module):
    def __init__(self, layer_sizes):
        super(DNet, self).__init__()
        self.layer1 = nn.Linear(layer_sizes[0], layer_sizes[1])
        self.layer2 = nn.Linear(layer_sizes[1], layer_sizes[2])
        self.layer3 = nn.Linear(layer_sizes[2], layer_sizes[3])
        self.activation1 = nn.functional.relu
        self.activation2 = nn.functional.softmax
    
    def forward(self, x):
        x = self.activation1(self.layer1(x))
        x = self.activation1(self.layer2(x))
        x = self.activation2(self.layer3(x))
        return x

In [10]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25, dense=True):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
#                 print(inputs.shape)
                if dense:
                    inputs = inputs.flatten(1)
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best valid Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [11]:
batchsize = 3
batch_count = len(y_train)//batchsize + 1

In [12]:
import torch.optim as optim
import time, copy
criterion = nn.CrossEntropyLoss()
net = DNet([30000, 6000, 600, 2])

optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [17]:
net = train_model(net, criterion, optimizer, exp_lr_scheduler,
                       num_epochs=26)

Epoch 0/25
----------


  del sys.path[0]


train Loss: 0.7073 Acc: 0.4095
valid Loss: 0.6086 Acc: 0.7222

Epoch 1/25
----------
train Loss: 0.6738 Acc: 0.5524
valid Loss: 0.5486 Acc: 0.8889

Epoch 2/25
----------
train Loss: 0.7094 Acc: 0.5143
valid Loss: 0.5803 Acc: 0.6944

Epoch 3/25
----------
train Loss: 0.6346 Acc: 0.7048
valid Loss: 0.5773 Acc: 0.6667

Epoch 4/25
----------
train Loss: 0.6187 Acc: 0.7429
valid Loss: 0.5341 Acc: 0.7222

Epoch 5/25
----------
train Loss: 0.5917 Acc: 0.7238
valid Loss: 0.4712 Acc: 0.8889

Epoch 6/25
----------
train Loss: 0.6119 Acc: 0.6571
valid Loss: 0.5020 Acc: 0.8056

Epoch 7/25
----------
train Loss: 0.6093 Acc: 0.6952
valid Loss: 0.4899 Acc: 0.8056

Epoch 8/25
----------
train Loss: 0.5820 Acc: 0.7333
valid Loss: 0.4858 Acc: 0.8056

Epoch 9/25
----------
train Loss: 0.6001 Acc: 0.6762
valid Loss: 0.4773 Acc: 0.8056

Epoch 10/25
----------
train Loss: 0.5894 Acc: 0.7524
valid Loss: 0.4889 Acc: 0.8056

Epoch 11/25
----------
train Loss: 0.6091 Acc: 0.6952
valid Loss: 0.4986 Acc: 0.8056



In [40]:
phase ="test"

cnet.eval()   # Set model to evaluate mode

running_loss = 0.0
running_corrects = 0

# Iterate over data.
for inputs, labels in dataloaders[phase]:
#                 print(inputs.shape)
#     inputs = inputs.flatten(1)
    inputs = inputs.to(device)
    labels = labels.to(device)

    # zero the parameter gradients
    optimizer.zero_grad()

    # forward
    # track history if only in train
    with torch.set_grad_enabled(phase == 'train'):
        outputs = cnet(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

    # statistics
    running_loss += loss.item() * inputs.size(0)
    running_corrects += torch.sum(preds == labels.data)

loss = running_loss / dataset_sizes[phase]
acc = running_corrects.double() / dataset_sizes[phase]

print('{} Loss: {:.4f} Acc: {:.4f}'.format(
    phase, loss, acc))

test Loss: 0.3904 Acc: 0.9744




In [37]:
from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD
class CNet(Module):   
    def __init__(self, out_channel1=4, out_channel2=4):
        super(CNet, self).__init__()

        self.cnn_layers = Sequential(
            # Defining a 2D convolution layer
            Conv2d(3, out_channel1, kernel_size=3, stride=1, padding=1),
            BatchNorm2d(out_channel1),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
            # Defining another 2D convolution layer
            Conv2d(out_channel1, out_channel2, kernel_size=3, stride=1, padding=1),
            BatchNorm2d(out_channel2),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
        )

        self.linear_layers = Sequential(
            Linear(2500, 2)
        )

    # Defining the forward pass    
    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0), -1)
        x = nn.functional.softmax(self.linear_layers(x))
        return x

In [38]:
cnet=CNet()

In [39]:
optimizer_cn = optim.SGD(cnet.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_cn, step_size=7, gamma=0.1)
cnet=train_model(cnet, criterion, optimizer_cn, exp_lr_scheduler, 50, False)

Epoch 0/49
----------




train Loss: 0.8020 Acc: 0.4891
valid Loss: 0.8123 Acc: 0.5000

Epoch 1/49
----------
train Loss: 0.8016 Acc: 0.5109
valid Loss: 0.8128 Acc: 0.5000

Epoch 2/49
----------
train Loss: 0.8017 Acc: 0.5109
valid Loss: 0.8126 Acc: 0.5000

Epoch 3/49
----------
train Loss: 0.7995 Acc: 0.5109
valid Loss: 0.8096 Acc: 0.5000

Epoch 4/49
----------
train Loss: 0.7584 Acc: 0.5182
valid Loss: 0.5562 Acc: 0.7500

Epoch 5/49
----------
train Loss: 0.6977 Acc: 0.5474
valid Loss: 0.5422 Acc: 0.7647

Epoch 6/49
----------
train Loss: 0.6710 Acc: 0.5912
valid Loss: 0.6471 Acc: 0.6029

Epoch 7/49
----------
train Loss: 0.6875 Acc: 0.5839
valid Loss: 0.5551 Acc: 0.7353

Epoch 8/49
----------
train Loss: 0.6389 Acc: 0.6204
valid Loss: 0.4201 Acc: 0.8971

Epoch 9/49
----------
train Loss: 0.6134 Acc: 0.6934
valid Loss: 0.3920 Acc: 0.8971

Epoch 10/49
----------
train Loss: 0.6369 Acc: 0.6496
valid Loss: 0.3936 Acc: 0.9118

Epoch 11/49
----------
train Loss: 0.5952 Acc: 0.7153
valid Loss: 0.3928 Acc: 0.9118



In [51]:
from sklearn.linear_model import LogisticRegression
import numpy as np 
from sklearn.metrics import accuracy_score

In [43]:
image_size = 100
def data(pase):
    data_ = []
    path = os.getcwd()
    for city in ["yerevan", "london"]:
        for image in os.listdir(f'data/{pase}/{city}'): 
            path_ = os.path.join(path + f'/data/{pase}/{city}', image)
            img = cv2.imread(path_ , cv2.IMREAD_GRAYSCALE) 
            data_.append(img.flatten() / 255)
        if city == "yerevan":
            y = np.ones(len(data_))
    y = np.append(y, np.zeros(len(data_) - len(y)))
    data_ = np.array(data_)
    indices = np.arange(data_.shape[0])
    np.random.shuffle(indices)
    return data_[indices], y[indices]

In [44]:
train_x, train_y = data("train")
test_x, test_y = data("test")
val_x, val_y = data("valid")

In [46]:
logisticRegr = LogisticRegression()

In [47]:
logisticRegr.fit(train_x, train_y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [52]:
val_pred = logisticRegr.predict(val_x)
accuracy_score(val_y, val_pred)

1.0

In [53]:
test_pred = logisticRegr.predict(test_x)
accuracy_score(test_y, test_pred)

1.0