In [22]:
import os
import cv2
from PIL import Image
import numpy as np
from tqdm import tqdm
import random
from sklearn.model_selection import train_test_split

In [34]:
def get_samples(path, train_size=0.8, prop_size=0.02):
    files = []
    for f in os.listdir(path):
        if (".png" in f) or (".jpg" in f) or (".jpeg" in f):
                if random.random() <= prop_size:
                    files.append(os.path.join(path, f))
    train, test = train_test_split(files, train_size=train_size)
    return train, test



def blur_and_save(files, save_name, blur_level):
    precise_data = np.ndarray(shape=(len(files), 256, 256, 3), dtype=np.float32)
    blurred_data = np.ndarray(shape=(len(files), 256, 256, 3), dtype=np.float32)
    for i in tqdm(range(len(files))):
        try:        
            img = np.array(Image.open(files[i]))
            img = img[(img.shape[0] // 2 - 256 // 2):(img.shape[0] // 2 + 256 // 2),
                      (img.shape[1] // 2 - 256 // 2):(img.shape[1] // 2 + 256 // 2), :]
            blurred = cv2.blur(img, (blur_level, blur_level))
                        
            precise_data[i] = img
            blurred_data[i] = blurred
        except Exception as e:
            print(e) 
    
    np.save(f"data/{save_name}_precise_{len(files)}.npy", precise_data)
    np.save(f"data/{save_name}_blurred_{len(files)}.npy", blurred_data)

In [35]:
train, test = get_samples("/home/ruslan/place_dataset/test_256")
len(train), len(test)

(5350, 1338)

In [21]:
blur_and_save("/home/ruslan/place_dataset/test_256", "", 1)

4922


In [36]:
blur_and_save(train, "train_data", blur_level=14)
blur_and_save(test, "test_data", blur_level=14)

  1%|          | 57/5350 [00:00<01:05, 80.67it/s]

too many indices for array


 14%|█▍        | 748/5350 [00:04<00:52, 88.18it/s] 

too many indices for array


 15%|█▍        | 799/5350 [00:05<01:18, 57.67it/s]

too many indices for array


 26%|██▌       | 1399/5350 [00:23<02:13, 29.55it/s]

too many indices for array


 40%|████      | 2143/5350 [00:50<03:25, 15.62it/s]

too many indices for array


 41%|████▏     | 2212/5350 [00:53<02:20, 22.37it/s]

too many indices for array


 44%|████▎     | 2331/5350 [00:57<01:21, 36.87it/s]

too many indices for array


 44%|████▍     | 2357/5350 [00:57<01:45, 28.34it/s]

too many indices for array


 52%|█████▏    | 2787/5350 [01:17<01:52, 22.71it/s]

too many indices for array


 56%|█████▌    | 2989/5350 [01:23<01:11, 32.93it/s]

too many indices for array


 60%|█████▉    | 3202/5350 [01:30<00:57, 37.61it/s]

too many indices for array


 60%|██████    | 3214/5350 [01:31<00:56, 37.88it/s]

too many indices for array


 75%|███████▌  | 4024/5350 [02:01<00:40, 33.07it/s]

too many indices for array


 82%|████████▏ | 4391/5350 [02:16<00:51, 18.73it/s]

too many indices for array


 89%|████████▉ | 4754/5350 [02:30<00:19, 29.86it/s]

too many indices for array


 94%|█████████▎| 5008/5350 [02:41<00:14, 22.90it/s]

too many indices for array


100%|██████████| 5350/5350 [02:54<00:00, 30.70it/s]
  5%|▍         | 64/1338 [00:00<00:25, 50.61it/s]

too many indices for array


 38%|███▊      | 507/1338 [00:01<00:02, 294.41it/s]

too many indices for array


 78%|███████▊  | 1040/1338 [00:03<00:00, 301.20it/s]

too many indices for array


100%|██████████| 1338/1338 [00:05<00:00, 257.35it/s]
