In [None]:
import torch
import os
import numpy as np
import cv2
from tqdm import tqdm
from pathlib import Path
from torch import nn
import torchvision
from torchvision import datasets, models, transforms
from skimage import io 
from skimage import transform
import shutil
from sklearn.model_selection import train_test_split
import shutil
from os import walk

In [None]:

os.makedirs("test_yerevan", exist_ok=True)
os.makedirs("test_london", exist_ok=True)
os.makedirs("yerevan", exist_ok=True)
os.makedirs("london", exist_ok=True)
os.makedirs("data/train", exist_ok=True)
os.makedirs("data/valid", exist_ok=True)
os.makedirs("data/train/yerevan", exist_ok=True)
os.makedirs("data/valid/yerevan", exist_ok=True)
os.makedirs("data/train/london", exist_ok=True)
os.makedirs("data/valid/london", exist_ok=True)
os.makedirs("data/test/yerevan", exist_ok=True)
os.makedirs("data/test/london", exist_ok=True)


In [None]:
f = []
yerevan = "/test_yerevan"
london = "/test_london"
path = os.getcwd()
for i in range(1,7):
    for (dirpath, dirnames, filenames) in walk(path + "/data/" + str(i)):
        for file in filenames:
            f.append((dirpath + "/" + file, file))
        break
for i in f:
    if "london" in i[0]:
        shutil.copy(i[0], path + london + "/" + i[1])
    else:
        shutil.copy(i[0], path + yerevan + "/" + i[1])

In [None]:
f = []
for (dirpath, dirnames, filenames) in walk(path + "/data/test"):
    for file in filenames:
        f.append((dirpath + "/" + file, file))
    break
for i in f:
    if "london" in i[0]:
        shutil.copy(i[0], path + london + "/" + i[1])
    else:
        shutil.copy(i[0], path + yerevan + "/" + i[1])

In [None]:
def resizer(src, dis, size):
    os.makedirs(dis, exist_ok=True)
    for file in tqdm(os.listdir(src)):
        try:
            img = io.imread(os.path.join(src, file))
            img = transform.resize(img, (size, size),
                                            order=1, mode='constant',
                                            cval=0, clip=True,
                                            preserve_range=True,
                                            anti_aliasing=True)
            io.imsave(os.path.join(dis,file),img.astype(np.uint8))
        except:
            print("coudn't do it for ", file)

In [None]:
resizer("./yerevan", "./yerevan_resized", 100)
resizer("./london", "./london_resized", 100)
resizer("./test_yerevan", "./test_yerevan_resized", 100)
resizer("./test_london", "./test_london_resized", 100)

In [None]:
yerevan = list(Path("yerevan_resized").glob("*"))
london = list(Path("london_resized").glob("*"))
test_yerevan = list(Path("test_yerevan_resized").glob("*"))
test_london = list(Path("test_london_resized").glob("*"))
yerevan_train, yerevan_valid = train_test_split(yerevan)
london_train, london_valid = train_test_split(london)

y_train = [0]*len(yerevan_train) + [1]*len(london_train)
y_valid = [0]*len(yerevan_valid) + [1]*len(london_valid)
y_test = [0]*len(test_yerevan) + [1]*len(test_london)


In [None]:
def copy_(src, dist):
    for x in src:
        shutil.copy(str(x), os.path.join(dist, x.parts[-1]))

In [None]:
copy_(yerevan_train, "data/train/yerevan")
copy_(yerevan_valid, "data/valid/yerevan")
copy_(london_train, "data/train/london")
copy_(london_valid, "data/valid/london")
copy_(test_yerevan, "data/test/yerevan")
copy_(test_london, "data/test/london")

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(100),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(100),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(100),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}
data_dir = './data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'valid', 'test']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'valid', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid', 'test']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [None]:
class DNet(nn.Module):
    def __init__(self, layer_sizes):
        super(DNet, self).__init__()
        self.layer1 = nn.Linear(layer_sizes[0], layer_sizes[1])
        self.layer2 = nn.Linear(layer_sizes[1], layer_sizes[2])
        self.layer3 = nn.Linear(layer_sizes[2], layer_sizes[3])
        self.activation1 = nn.functional.relu
        self.activation2 = nn.functional.sigmoid
    
    def forward(self, x):
        x = self.activation1(self.layer1(x))
        x = self.activation1(self.layer2(x))
        x = self.activation2(self.layer3(x))
        return x

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25, dense=True):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
#                 print(inputs.shape)
                if dense:
                    inputs = inputs.flatten(1)
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best valid Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
batchsize = 3
batch_count = len(y_train)//batchsize + 1

In [None]:
import torch.optim as optim
import time, copy
criterion = nn.CrossEntropyLoss()
net = DNet([30000, 6000, 600, 2])

optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
net = train_model(net, criterion, optimizer, exp_lr_scheduler,
                       num_epochs=26)

In [None]:
phase ="test"

cnet.eval()   # Set model to evaluate mode

running_loss = 0.0
running_corrects = 0

# Iterate over data.
for inputs, labels in dataloaders[phase]:
#                 print(inputs.shape)
#     inputs = inputs.flatten(1)
    inputs = inputs.to(device)
    labels = labels.to(device)

    # zero the parameter gradients
    optimizer.zero_grad()

    # forward
    # track history if only in train
    with torch.set_grad_enabled(phase == 'train'):
        outputs = cnet(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

    # statistics
    running_loss += loss.item() * inputs.size(0)
    running_corrects += torch.sum(preds == labels.data)

loss = running_loss / dataset_sizes[phase]
acc = running_corrects.double() / dataset_sizes[phase]

print('{} Loss: {:.4f} Acc: {:.4f}'.format(
    phase, loss, acc))

In [None]:
from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD
class CNet(Module):   
    def init(self, out_channel1=4, out_channel2=4):
        super(CNet, self).init()
        self.cnn = Sequential(
            Conv2d(3, out_channel1, kernel_size=3, stride=1, padding=1),
            BatchNorm2d(out_channel1),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),

            Conv2d(out_channel1, out_channel2, kernel_size=3, stride=1, padding=1),
            BatchNorm2d(out_channel2),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
        )
        self.linear = Sequential(
            Linear(2500, 2)
        )
   
    def forward(self, x):
        x = self.cnn(x)
        x = x.view(x.size(0), -1)
        x = nn.functional.softmax(self.linear(x))
        return x

In [None]:
cnet=CNet()

In [None]:
optimizer_cn = optim.SGD(cnet.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_cn, step_size=7, gamma=0.1)
cnet=train_model(cnet, criterion, optimizer_cn, exp_lr_scheduler, 50, False)

In [None]:
from sklearn.linear_model import LogisticRegression
import numpy as np 
from sklearn.metrics import accuracy_score

In [None]:
image_size = 100
def data(pase):
    data_ = []
    path = os.getcwd()
    for city in ["yerevan", "london"]:
        for image in os.listdir(f'data/{pase}/{city}'): 
            path_ = os.path.join(path + f'/data/{pase}/{city}', image)
            img = cv2.imread(path_ , cv2.IMREAD_GRAYSCALE) 
            data_.append(img.flatten() / 255)
        if city == "yerevan":
            y = np.ones(len(data_))
    y = np.append(y, np.zeros(len(data_) - len(y)))
    data_ = np.array(data_)
    indices = np.arange(data_.shape[0])
    np.random.shuffle(indices)
    return data_[indices], y[indices]

In [None]:
train_x, train_y = data("train")
test_x, test_y = data("test")
val_x, val_y = data("valid")

In [None]:
logisticRegr = LogisticRegression()

In [None]:
logisticRegr.fit(train_x, train_y)

In [None]:
val_pred = logisticRegr.predict(val_x)
accuracy_score(val_y, val_pred)

In [None]:
test_pred = logisticRegr.predict(test_x)
accuracy_score(test_y, test_pred)