In [4]:
import cv2
import os
from pathlib import Path
import sys
sys.path.append('..')
from augmentations import CenterCrop

In [5]:
CROP = 2048
MIN_SIZE = 512

### FLICKR

In [6]:
flickr_root = Path('/mnt/data/kaggle_camera/flickr_images_')
flickr_crop_root = Path('../data/external/flickr_images_{}'.format(CROP))

try:
    flickr_crop_root.mkdir()
except FileExistsError:
    print('!!! Will overwrite files, is it OK?')
    
for d in [d for d in flickr_root.iterdir() if d.is_dir()]:
    (flickr_crop_root / d.stem).mkdir(exist_ok=True)

In [7]:
skip_exists = True

for s_path in [p for p in flickr_root.glob('*/*.jpg')] + [p for p in flickr_root.glob('*/*.JPG')]:
    # TIF does not loose quality
    d_path = flickr_crop_root / s_path.relative_to(flickr_root)
    d_path = str(d_path).replace('jpg', 'tif')
    if skip_exists and Path(d_path).exists():
        continue
    img = cv2.imread(str(s_path))
    assert img is not None, s_path
    h, w = img.shape[:2]
    if img.ndim != 3:
        print('{} dim {}, skip'.format(s_path, img.ndim))
    if h < MIN_SIZE or w < MIN_SIZE:
        print('{} size {}, skip'.format(s_path, (w,h)))
        continue
    sz = min(h, CROP), min(w, CROP)
    img = CenterCrop(sz)(img)
    cv2.imwrite(d_path, img)

# FLICKR NEW

In [8]:
flickr_new_root = Path('/mnt/data/kaggle_camera/flickr_images_new')
flickr_new_crop_root = Path('../data/external/flickr_new_images_{}'.format(CROP))

try:
    flickr_new_crop_root.mkdir()
except FileExistsError:
    print('!!! Will overwrite files, is it OK?')
    
for d in [d for d in flickr_new_root.iterdir() if d.is_dir()]:
    (flickr_new_crop_root / d.stem).mkdir(exist_ok=True)
    
skip_exists = True

for s_path in [flickr_new_root/p for p in (flickr_new_root/'good_jpgs_refined').read_text().splitlines()]:
    # TIF does not loose quality
    d_path = flickr_new_crop_root / s_path.relative_to(flickr_new_root)
    d_path = str(d_path).replace('jpg', 'tif')
    if skip_exists and Path(d_path).exists():
        continue
    img = cv2.imread(str(s_path))
    assert img is not None, s_path
    h, w = img.shape[:2]
    if img.ndim != 3:
        print('{} dim {}, skip'.format(s_path, img.ndim))
    if h < MIN_SIZE or w < MIN_SIZE:
        print('{} size {}, skip'.format(s_path, (w,h)))
        continue
    sz = min(h, CROP), min(w, CROP)
    img = CenterCrop(sz)(img)
    cv2.imwrite(d_path, img)

# REVIEWS

In [4]:
reviews_root = Path('../data/external/reviews_images')
reviews_crop_root = Path('../data/external/reviews_images_{}'.format(CROP))

try:
    reviews_crop_root.mkdir()
except FileExistsError:
    print('!!! Will overwrite files, is it OK?')
    
for d in [d for d in reviews_root.iterdir() if d.is_dir()]:
    (reviews_crop_root / d.stem).mkdir(exist_ok=True)

!!! Will overwrite files, is it OK?


In [8]:
for class_dir in reviews_root.iterdir():
    print(class_dir)
    o_paths = [path for path in class_dir.glob('*.jpg')]
    o_paths.extend([path for path in class_dir.glob('*.JPG')])
    d_paths = [reviews_crop_root / class_dir.name / p.name for p in o_paths]
    
    (reviews_crop_root / class_dir.name).mkdir(exist_ok=True)
    for o_path in o_paths:
        # TIF does not loose quality
        d_path = reviews_crop_root / class_dir.name / (o_path.stem + '.tif')
        img = cv2.imread(str(o_path))
        sz = min(img.shape[0], CROP), min(img.shape[1], CROP)
        img = CenterCrop(sz)(img)
        cv2.imwrite(str(d_path), img)

../data/external/reviews_images_orig/iphone_6
../data/external/reviews_images_orig/sony_nex7
../data/external/reviews_images_orig/moto_x
../data/external/reviews_images_orig/samsung_note3
../data/external/reviews_images_orig/nexus_6
../data/external/reviews_images_orig/samsung_s4
../data/external/reviews_images_orig/htc_m7
../data/external/reviews_images_orig/nexus_5x
../data/external/reviews_images_orig/moto_maxx
../data/external/reviews_images_orig/iphone_4s


### ORG