In [27]:
import glob
import os
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
import torch
from torch.nn import functional as F
import torchvision
from torch.autograd import Variable
import torch.optim as optim
from torch import nn
import requests
import skimage
import wget
from PIL import Image
from pathlib import Path
import cv2
from pylab import *
from urllib import request
from io import BytesIO
from os import makedirs

In [28]:
#Functions
def getImage(name,namesDL,dls):
    try:
        idx = namesDL.index(name)
        dl = dls[idx]
        #print(dl)
        headers = {}
        headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
        with request.urlopen(request.Request(dl, headers=headers)) as response:
            data = BytesIO(response.read())
            img = imread(data, format='jpg')
            #print(img.shape)
            return img, False
    except Exception as e:
        return np.zeros((300,300,3)), True

def img_resize_and_flatten(a, z_shape, y_shape, x_shape, interpolation=cv2.INTER_LINEAR):
    #convert grayscale to rgb
    if a.ndim == 2:
        w, h = a.shape
        ret = np.empty((w, h, 3), dtype=a.dtype)
        ret[:, :, :] = a[:, :, np.newaxis]
        a = ret
        
    #interpolate image
    xsh,ysh,zsh = a.shape
    b = np.empty((x_shape, y_shape, zsh), dtype=a.dtype)
    for k in range(zsh):
        b[:,:,k] = cv2.resize(a[:,:,k], (x_shape, y_shape), interpolation=interpolation)

    #interpolate to 3 channels if necessary
    if zsh != 3:
        c = np.empty((x_shape, y_shape, 3), dtype=a.dtype)
        for k in range(zsh):
            c[k] = cv2.resize(b[k], (3,y_shape), interpolation=interpolation)
        b = c
    return b.reshape(x_shape*y_shape*z_shape)


def readDataReturnAsNumpy(sizeImage, test, numImages = 0):
    #read train or test file
    if test == False:
        file = open('Yoga-82/yoga_train.txt', 'r') 
        LinesData = file.readlines()
    else:
        file = open('Yoga-82/yoga_test.txt', 'r') 
        LinesData = file.readlines()
        
    #read dl files
    LinesDL = []
    filesDL = glob.glob('Yoga-82/yoga_dataset_links/*.txt')
    for file in filesDL:
        file_ = open(file, 'r')
        Lines_ = file_.readlines()
        LinesDL += Lines_
    namesDL = []
    dls = []
    for i, (line) in enumerate(LinesDL): 
        name, dl, *rest = line.split(',')
        namesDL += [name]
        dls += [dl]
    
    #only dl part of the data
    if numImages != 0:
        LinesData = LinesData[:numImages]    
        
    #get numpy arrays for data and labels  
    numImages = len(LinesData)
    data = np.zeros((numImages,sizeImage*sizeImage*3))
    labels = np.zeros((numImages,3))
    count = 0
    countFailedDls = 0
    for i, (line) in enumerate(LinesData): 
        name, label1, label2, label3 = line.split(',')
        count += 1
        image, checkFailed = getImage(name,namesDL,dls)
        if checkFailed == True:
            countFailedDls += 1
        else:
            image = img_resize_and_flatten(image,3,sizeImage,sizeImage)
            data[i-countFailedDls] = image
            labels[i-countFailedDls] = np.array([label1,label2,label3])
        if count%10 == 0:
            print("count dls = {}, count failed dls = {}".format(count, countFailedDls))
    if countFailedDls != 0:
        data = data[:-countFailedDls]
        labels = labels[:-countFailedDls]
    return data, labels

def dlImagesFromNumpy(array, numImages, train = True):
    try:
        makedirs('./images')
    except Exception as e:
        None
    if train:
        name = '_train'
    else:
        name = '_test'
    if numImages != 0:
        for i in range(numImages):
            a = array[i].reshape((100,100,3))
            matplotlib.image.imsave('./images/' + str(i) + name +'.png', a/255)
    else:
        for i in range(a.shape[0]):
            a = array[i].reshape((100,100,3))
            matplotlib.image.imsave('./images/' + str(i) + name +'.png', a/255)

            
            
def dlData(savePath, sizeImageInterpolated = 100, numPlotImages = 400, numImages = 0):
    data_train, labels_train = readDataReturnAsNumpy(sizeImageInterpolated, test = False, numImages = numImages)
    data_test, labels_test = readDataReturnAsNumpy(sizeImageInterpolated, test = True, numImages = numImages)

    #save data as numpy
    np.savetxt(savePath + '/data_train.txt', data_train, fmt='%d')
    np.savetxt(savePath + '/labels_train.txt', labels_train)
    np.savetxt(savePath + '/data_test.txt', data_test, fmt='%d')
    np.savetxt(savePath + '/labels_test.txt', labels_test)
    
    
    #construct some images from array to check if they look like the original image
    if numPlotImages > 0:
        dlImagesFromNumpy(data_train,numPlotImages,True)
        dlImagesFromNumpy(data_test,numPlotImages,False)
    
    #to Tensor
    data_train = torch.from_numpy(data_train)
    labels_train = torch.from_numpy(labels_train)
    data_test = torch.from_numpy(data_test)
    labels_test = torch.from_numpy(labels_test)

    #save data as tensor
    torch.save(data_train, savePath + '/data_train.pt') 
    torch.save(labels_train, savePath + '/labels_train.pt')
    torch.save(data_test, savePath + '/data_test.pt') 
    torch.save(labels_test, savePath + '/labels_test.pt')
    return 0

In [None]:
dlData(savePath = "./allData", sizeImageInterpolated = 100)