# Introduction

Notebook para fazer download de datasets de TFDS (Tensorflow Datasets).

Datasets baixados:
* MNIST
* FASHION_MNIST
* CIFAR 10

Esses datasets serão usados para verificação de corretude de algoritmo REINFORCE no contexto de NAS.

# Import Libraries

In [1]:
import os
import cv2 
import sklearn

import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

2021-11-18 22:23:58.303119: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


# Datasets Root Directory Path

In [2]:
DATASETS_ROOT_DIR = '/home/guilherme/data1/Dropbox/Link to Desktop/Doutorado/Datasets/'

# Utility Functions

In [3]:
def format_mnist_split(ds):
    df = tfds.as_dataframe(ds)
    df_labels = pd.get_dummies(df.label)
    df = pd.concat([df, df_labels], axis=1)
    #df = df.drop(columns=['label'], inplace=False)
    df = df.rename(columns={x: f'n_{x}' for x in df.columns if type(x) is int}, inplace=False)
    df['img_name'] = [f'image_{x}' for x in range(ds.cardinality())]
    return df


def record_dataset(df, ds_split, split_name, ds_name):
    for idx,row in df.iterrows():
        dir_path = os.path.join(f'{DATASETS_ROOT_DIR}', ds_name, split_name)
        img_path = os.path.join(dir_path, row.img_name + '.jpg')
        cv2.imwrite(img_path, row.image)
        df.loc[idx, 'img_name'] = img_path
    
    split_labels_df = df[['img_name']+[f'n_{x}' for x in range(10)]]
    labels_file_path = os.path.join(DATASETS_ROOT_DIR, ds_name, split_name + '_data.csv')
    split_labels_df.to_csv(labels_file_path, index=False)
                    

# MNIST Dataset

In [4]:
[ds_train, ds_valid, ds_test] = tfds.load('mnist', split=['train[:80%]','train[80%:]','test'], shuffle_files=True)

df_train = format_mnist_split(ds_train)
df_valid = format_mnist_split(ds_valid)
df_test = format_mnist_split(ds_test)
  
df_train.head()

2021-11-18 22:23:59.344270: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2021-11-18 22:23:59.417397: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-18 22:23:59.418071: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:06:00.0 name: GeForce RTX 2070 SUPER computeCapability: 7.5
coreClock: 1.785GHz coreCount: 40 deviceMemorySize: 7.79GiB deviceMemoryBandwidth: 417.29GiB/s
2021-11-18 22:23:59.418109: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
2021-11-18 22:23:59.421302: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11
2021-11-18 22:23:59.421369: I tensorflow/stream_executor/plat

Unnamed: 0,image,label,n_0,n_1,n_2,n_3,n_4,n_5,n_6,n_7,n_8,n_9,img_name
0,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",4,0,0,0,0,1,0,0,0,0,0,image_0
1,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",0,1,0,0,0,0,0,0,0,0,0,image_1
2,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",1,0,1,0,0,0,0,0,0,0,0,image_2
3,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",7,0,0,0,0,0,0,0,1,0,0,image_3
4,"[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...",8,0,0,0,0,0,0,0,0,1,0,image_4


## Record Images Files

In [5]:
record_dataset(df_train, ds_train, 'train', 'mnist')
record_dataset(df_valid, ds_valid, 'valid', 'mnist')
record_dataset(df_test, ds_test, 'test', 'mnist')