# Introduction

Notebook para fazer download de datasets de TFDS (Tensorflow Datasets).

Datasets baixados:
* MNIST
* FASHION_MNIST
* CIFAR 10
* CelebA

Esses datasets serão usados para verificação de corretude de algoritmo REINFORCE no contexto de NAS.

# Import Libraries

In [1]:
import os
import cv2 
import shutil
import sklearn

import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

2022-07-12 23:12:26.901052: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


# Datasets Root Directory Path

In [2]:
DATASETS_ROOT_DIR = '/home/guilherme/data1/Dropbox/Link to Desktop/Doutorado/Datasets/'

# Utility Functions

In [3]:
def format_fashion_mnist_split(ds):
    df = tfds.as_dataframe(ds)
    df_labels = pd.get_dummies(df.label)
    df = pd.concat([df, df_labels], axis=1)
    #df = df.drop(columns=['label'], inplace=False)
    df = df.rename(columns={x: f'n_{x}' for x in df.columns if type(x) is int}, inplace=False)
    df['img_name'] = [f'image_{x}' for x in range(ds.cardinality())]
    return df


def record_dataset(df, ds_split, split_name, ds_name):
    dir_path = os.path.join(f'{DATASETS_ROOT_DIR}', ds_name, split_name)
    shutil.rmtree(dir_path, ignore_errors=True)
    os.makedirs(dir_path, exist_ok=True)
    
    for idx,row in df.iterrows():        
        img_path = os.path.join(dir_path, row.img_name + '.jpg')
        cv2.imwrite(img_path, row.image)
        df.loc[idx, 'img_name'] = img_path
    
    split_labels_df = df[['img_name']+[f'n_{x}' for x in range(10)]]
    
    labels_dir_path = os.path.join(DATASETS_ROOT_DIR, ds_name)
    os.makedirs(labels_dir_path, exist_ok=True)
    
    labels_file_path = os.path.join(labels_dir_path, split_name + '_data.csv')
    split_labels_df.to_csv(labels_file_path, index=False)
                    

# Fashion-MNIST Dataset

In [4]:
[ds_train, ds_valid, ds_test] = tfds.load('fashion_mnist', split=['train[:80%]','train[80%:]','test'], shuffle_files=True)

df_train = format_fashion_mnist_split(ds_train)
df_valid = format_fashion_mnist_split(ds_valid)
df_test = format_fashion_mnist_split(ds_test)
  
display(df_train.head())
display(df_valid.head())
display(df_test.head())

2022-07-12 23:12:27.956422: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2022-07-12 23:12:28.001407: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-12 23:12:28.001869: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:06:00.0 name: NVIDIA GeForce RTX 2070 SUPER computeCapability: 7.5
coreClock: 1.785GHz coreCount: 40 deviceMemorySize: 7.79GiB deviceMemoryBandwidth: 417.29GiB/s
2022-07-12 23:12:28.001898: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
2022-07-12 23:12:28.004112: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11
2022-07-12 23:12:28.004208: I tensorflow/stream_execut

Unnamed: 0,image,label,n_0,n_1,n_2,n_3,n_4,n_5,n_6,n_7,n_8,n_9,img_name
0,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",2,0,0,1,0,0,0,0,0,0,0,image_0
1,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",1,0,1,0,0,0,0,0,0,0,0,image_1
2,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",8,0,0,0,0,0,0,0,0,1,0,image_2
3,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",4,0,0,0,0,1,0,0,0,0,0,image_3
4,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",1,0,1,0,0,0,0,0,0,0,0,image_4


Unnamed: 0,image,label,n_0,n_1,n_2,n_3,n_4,n_5,n_6,n_7,n_8,n_9,img_name
0,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",7,0,0,0,0,0,0,0,1,0,0,image_0
1,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",6,0,0,0,0,0,0,1,0,0,0,image_1
2,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",5,0,0,0,0,0,1,0,0,0,0,image_2
3,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",7,0,0,0,0,0,0,0,1,0,0,image_3
4,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",2,0,0,1,0,0,0,0,0,0,0,image_4


Unnamed: 0,image,label,n_0,n_1,n_2,n_3,n_4,n_5,n_6,n_7,n_8,n_9,img_name
0,"[[[0], [0], [0], [0], [0], [0], [0], [0], [3],...",4,0,0,0,0,1,0,0,0,0,0,image_0
1,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",9,0,0,0,0,0,0,0,0,0,1,image_1
2,"[[[0], [0], [0], [0], [0], [0], [0], [0], [1],...",4,0,0,0,0,1,0,0,0,0,0,image_2
3,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",7,0,0,0,0,0,0,0,1,0,0,image_3
4,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",5,0,0,0,0,0,1,0,0,0,0,image_4


## Record Images Files

In [5]:
record_dataset(df_train, ds_train, 'train', 'fashion_mnist')
record_dataset(df_valid, ds_valid, 'valid', 'fashion_mnist')
record_dataset(df_test, ds_test, 'test', 'fashion_mnist')