In [None]:
# Download libraries

import os
import shutil
import tarfile
from six.moves import urllib
import pandas as pd
import numpy as np
import cv2
import time

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import DataLoader, Dataset
from torch.utils.data import RandomSampler

import torchvision.transforms as T
import torchvision.models as models
from torchvision.utils import make_grid
from torchvision.datasets import ImageFolder

from matplotlib import pyplot as plt

from sklearn import model_selection

from PIL import Image

In [None]:
# Script to download dataset

DOWNLOAD_ROOT = "https://www.robots.ox.ac.uk/~vgg/data/flowers/17/17flowers.tgz"
DATA_PATH = os.path.join("datasets", "flowers")
IMAGES_PATH = os.path.join("datasets", "flowers", "jpg")
def fetch_housing_data(data_url=DOWNLOAD_ROOT, data_path=DATA_PATH):
    if not os.path.isdir(data_path):
        os.makedirs(data_path)
    tgz_path = os.path.join(data_path, "17flowers.tgz")
    urllib.request.urlretrieve(data_url, tgz_path)
    data_tgz = tarfile.open(tgz_path)
    data_tgz.extractall(path=data_path)
    data_tgz.close()

In [None]:
fetch_housing_data(data_url=DOWNLOAD_ROOT, data_path=DATA_PATH)

In [None]:
### PREPARING TRAINING AND TEST DATA

# assign labels to subsequent classes

classes = os.listdir(IMAGES_PATH)
#print(classes)

# prepare training, validation, test data PATHS
 
LIST_FILE_PATH = os.path.join(IMAGES_PATH, "files.txt")
DATASET_PATH = os.path.join(DATA_PATH, "prepared_dataset_1")


TRAINING_PATH = os.path.join(DATASET_PATH, "training")
VALIDATION_PATH = os.path.join(DATASET_PATH, "validation")
TEST_PATH = os.path.join(DATASET_PATH, "test")


#os.mkdir(TRAINING_PATH)
#os.mkdir(VALIDATION_PATH)
#os.mkdir(TEST_PATH)

In [None]:
# reading list of images

with open(LIST_FILE_PATH) as file:
    all_files_list = file.readlines()
    all_files_list = [line.rstrip() for line in all_files_list]

In [None]:
# resize images

DATASET_PATH = os.path.join(DATA_PATH, "prepared_dataset_1")

for file_name in all_files_list:
    
    full_file_name = os.path.join(IMAGES_PATH, file_name)

    print(full_file_name)
    if os.path.isfile(full_file_name):
        im = Image.open(full_file_name)
        newsize = (224, 224)
        im1 = im.resize(newsize)
        display(im1)
        im1 = im1.save(file_name)

In [None]:
import re
import random

# create list of files

with open(LIST_FILE_PATH, "r") as file:
    file_list = file.readlines()
    file_list = [line.rstrip() for line in lines]
    
# list of divisible by lenght of class

list_classes = [i for i in range(1, len(list1)) if i % 80 == 0]
list_classes

# create datasets

#- prepared_dataset
#-- training
#--- flower_1
#--- flower_2 ...

# declare location of the resized images

IMAGES_PATH = os.path.join("datasets", "flowers", "jpg1")


for i in range(1,18):
    
    TRAIN_DIR = os.path.join(TRAINING_PATH, "flower_{}".format(i))
    os.mkdir(TRAIN_DIR)
    VAL_DIR = os.path.join(VALIDATION_PATH, "flower_{}".format(i))
    os.mkdir(VAL_DIR)
    TEST_DIR = os.path.join(TEST_PATH, "flower_{}".format(i))
    os.mkdir(TEST_DIR)
    #print(TRAIN_DIR, VAL_DIR, TEST_DIR)
    
    a = 80*i
    print(a)
    
   
    # list of all images in one class
    list_total = [j for j in file_list if a-80 < int(re.search(r'\d+', j).group()) <= a]
    random.shuffle(list_total)

    training_dataset, test_dataset = sklearn.model_selection.train_test_split(list_total, test_size=20)
    test_dataset, valid_dataset = sklearn.model_selection.train_test_split(test_dataset, test_size=10)
    #print("TRAINING", len(training_dataset), "VALID", len(valid_dataset), "TEST", len(test_dataset))
    #print("TRAINING", training_dataset, "VALID", valid_dataset, "TEST", test_dataset)
    
    #copying files into subsequent datasets
    
    # CAN BE REPLACED WITH FUNCTION
       
    #training dataset
    src_files = os.listdir(IMAGES_PATH)
    for file_name in training_dataset:
        full_file_name = os.path.join(IMAGES_PATH, file_name)
        if os.path.isfile(full_file_name):
            shutil.copy(full_file_name, TRAIN_DIR)
    
    #valid dataset
    for file_name in valid_dataset:
        full_file_name = os.path.join(IMAGES_PATH, file_name)
        if os.path.isfile(full_file_name):
            shutil.copy(full_file_name, VAL_DIR)
    
    #test dataset
    for file_name in test_dataset:
        full_file_name = os.path.join(IMAGES_PATH, file_name)
        if os.path.isfile(full_file_name):
            shutil.copy(full_file_name, TEST_DIR)

In [None]:
### Exploring Dataset

classes = os.listdir(TRAINING_PATH)
print("Total Classes: ",len(classes))

#Counting total train, valid & test images

train_count = 0
valid_count = 0
test_count = 0
for _class in classes:
    train_count += len(os.listdir(TRAINING_PATH + "\\" + _class))
    valid_count += len(os.listdir(VALIDATION_PATH + "\\" +_class))
    test_count += len(os.listdir(TEST_PATH + "\\" +_class))

print("Total train images: ",train_count)
print("Total valid images: ",valid_count)
print("Total test images: ",test_count)

In [None]:
train_imgs = []
valid_imgs = []
test_imgs = []

for _class in classes:
    
    for img in os.listdir(TRAINING_PATH + "\\" +_class):
        train_imgs.append(TRAINING_PATH + "\\" + _class + "\\" + img)
    
    for img in os.listdir(VALIDATION_PATH +"\\" + _class):
        valid_imgs.append(VALIDATION_PATH +"\\" + _class + "\\" + img)
        
    for img in os.listdir(TEST_PATH + "\\" + _class):
        test_imgs.append(TEST_PATH + "\\" +_class + "\\" + img)

class_to_int = {classes[i] : i for i in range(len(classes))}

In [None]:
### Loading Classification Dataset - FOR METHOD 2: For multi-class data, by inheriting Dataset class

def get_transform():
    return T.Compose([T.ToTensor()])

class FlowerDataset(Dataset):
    
    def __init__(self, imgs_list, class_to_int, transforms = None):
        
        super().__init__()
        self.imgs_list = imgs_list
        self.class_to_int = class_to_int
        self.transforms = transforms
        
        
    def __getitem__(self, index):
    
        image_path = self.imgs_list[index]
        
        #Reading image
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        
        #Retrieving class label
        label = image_path.split("/")[-2]
        label = self.class_to_int[label]
        
        #Applying transforms on image
        if self.transforms:
            
            image = self.transforms(image)
        
        return image, label
        
        
        
    def __len__(self):
        return len(self.imgs_list)