# Data Preparation

## 1. Libraries

In [1]:
import os
import pandas as pd
import glob
import numpy as np
from PIL import Image
from scipy.misc import imresize

## 2.  Loading images

In [2]:
imagequality = 30    # This is the % reduction of image quality. 20 is equal to reducing the pixels to 20%

phototexture    = [] 

folder=os.getcwd()+'/photos/'+'texture/' #Location of texture images

filelist = glob.glob(folder+'/*.png') #Get the name of the files of each image

# Import images with reduced quality to the textureimages variable
textureimages = np.array([imresize(np.array(Image.open(fname).convert('RGBA')),imagequality) for fname in filelist])

# Turn the name of each image in a number to be used as key for assigning the correct texture index
texturefiles = np.array([np.array(fname.split('/')[-1].replace('.png','')) for fname in filelist])
texturefiles = list(map(int, texturefiles))

## 3. Importing texture file and assign correct image to the texture index

In [3]:
texture = pd.read_csv('texture.csv')
texture = texture.drop(['Unnamed: 0'],axis=1)
texture['texture'] = texture.texture.str.replace(',','.') # Original file in brazilian format (using comma for decimals)
texture['texture'] = pd.to_numeric(texture['texture'])    # Data transformed to numerical format
texture['inx']   = texture.index

texturefiles = pd.DataFrame(texturefiles,columns=['inx'])
texturedata = texturefiles.merge(texture,on='inx')

## 4. Removing images that are errors

In [4]:
errorimages = [1126,1132,1143]
errorindex  = texturedata[texturedata.inx.isin(errorimages)].index.values
texturedata = texturedata.drop(errorindex)

texturedata = texturedata.drop(['filename','empresa','gravel_size','image','inx'],axis=1)
texturedata['category'] = pd.cut(texturedata.texture,bins=[-0.01,165,275,350,460,20000],labels=[0, 1, 2, 3, 4])

textureimages = np.delete(textureimages,errorindex,0)

## 5. Saving prepared data

In [5]:
#Binary data
np.save('textureimagequality30.npy', textureimages)

In [5]:
texturedata.to_csv("texturedataprepared.csv")