# TFRecord visualization
This ipynb file shows the breakdown of our data by class. It should be used to see the raw number of images and objects in each class as well as the relative percentages of each class.

In [1]:
import os.path as op

import tensorflow_data_validation as tfdv

from config_data import tf_train_params as TTP

# Define stats_options to only look at classes. We need to exclude the raw image data
stats_options = tfdv.StatsOptions(feature_whitelist=['image/object/class/text'])

  'Running the Apache Beam SDK on Python 3 is not yet fully supported. '


### Building Parts

In [2]:
stats_dict_parts = dict(train=[], val=[])

# Loop over train/val datasets
for set_name in stats_dict_parts.keys():
    set_full_key = f'{set_name}_fnames'
    print(f'\nSet: {set_name}')
    
    # Loop over each TFRecord filename
    for tfrecord_fname in TTP['parts'][set_full_key]:
        print(f'Analyzing {tfrecord_fname}')

        tf_record_fpath = op.join(TTP['tfrecord_dir'], tfrecord_fname)
        
        # Compute and store stats
        stats = tfdv.generate_statistics_from_tfrecord(data_location=tf_record_fpath, stats_options=stats_options)
        stats_dict_parts[set_name].append(stats)


Set: train
Analyzing 4_Door - garage - window - disaster mitigation (Right images) - Lima.tfrecord




Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Analyzing 14_Door - garage - window - disaster mitigation (Right images) - Cartagena_t.tfrecord
Analyzing 19_Door - garage - window - disaster mitigation (Left images) - Neiva.tfrecord
Analyzing 10_Door - garage - window - disaster mitigation (Left images) - St. Maarten.tfrecord

Set: val
Analyzing 2_Door - garage - window - disaster mitigation (Left images) - Lima.tfrecord
Analyzing 13_Door - garage - window - disaster mitigation (Left images) - Cartagena_t.tfrecord
Analyzing 20_Door - garage - window - disaster mitigation (Right images) - Neiva.tfrecord
Analyzing 23_Door - garage - window - disaster mitigation (Right images) - St. Maarten.tfrecord


In [3]:
print('Building Parts Analysis\n')

# Visualize train and val data with TFDV
for ind in range(len(TTP['parts']['train_fnames'])):

    print(f'Train fname: {TTP["parts"]["train_fnames"][ind]}')
    print(f'Test  fname: {TTP["parts"]["val_fnames"][ind]}')
          
    temp = tfdv.visualize_statistics(stats_dict_parts['train'][ind], 
                              stats_dict_parts['val'][ind],
                              lhs_name='Train', 
                              rhs_name='Validate')

Building Parts Analysis

Train fname: 4_Door - garage - window - disaster mitigation (Right images) - Lima.tfrecord
Test  fname: 2_Door - garage - window - disaster mitigation (Left images) - Lima.tfrecord


Train fname: 14_Door - garage - window - disaster mitigation (Right images) - Cartagena_t.tfrecord
Test  fname: 13_Door - garage - window - disaster mitigation (Left images) - Cartagena_t.tfrecord


Train fname: 19_Door - garage - window - disaster mitigation (Left images) - Neiva.tfrecord
Test  fname: 20_Door - garage - window - disaster mitigation (Right images) - Neiva.tfrecord


Train fname: 10_Door - garage - window - disaster mitigation (Left images) - St. Maarten.tfrecord
Test  fname: 23_Door - garage - window - disaster mitigation (Right images) - St. Maarten.tfrecord


### Building Properties

In [4]:
stats_dict_properties = dict(train=[], val=[])

# Loop over train/val datasets
for set_name in stats_dict_properties.keys():
    set_full_key = f'{set_name}_fnames'
    print(f'\nSet: {set_name}')
    
    # Loop over each TFRecord filename
    for tfrecord_fname in TTP['properties'][set_full_key]:
        print(f'Analyzing {tfrecord_fname}')

        tf_record_fpath = op.join(TTP['tfrecord_dir'], tfrecord_fname)
        
        # Compute and store stats
        stats = tfdv.generate_statistics_from_tfrecord(data_location=tf_record_fpath, stats_options=stats_options)
        stats_dict_properties[set_name].append(stats)


Set: train
Analyzing 1_Building classification (Left images) - Lima.tfrecord
Analyzing 9_Building classification (Left images) - St. Maarten.tfrecord
Analyzing 24_Building classification (Left images) - St. Maarten.tfrecord
Analyzing 15_Building classification (Left images) - Cartagena_t.tfrecord
Analyzing 21_Building classification (Left images) - Neiva.tfrecord

Set: val
Analyzing 3_Building classification (Right images) - Lima.tfrecord
Analyzing 11_Building classification (Right images) - St. Maarten.tfrecord
Analyzing 16_Building classification (Right images) - Cartagena_t.tfrecord
Analyzing 22_Building classification (Right images) - Neiva.tfrecord


In [5]:
print('Building Properties Analysis\n')

# Visualize train and val data with TFDV
for ind in range(len(TTP['properties']['train_fnames'])):

    print(f'Train fname: {TTP["properties"]["train_fnames"][ind]}')
    print(f'Test  fname: {TTP["properties"]["val_fnames"][ind]}')
          
    temp = tfdv.visualize_statistics(stats_dict_properties['train'][ind], 
                              stats_dict_properties['val'][ind],
                              lhs_name='Train', 
                              rhs_name='Validate')

Building Properties Analysis

Train fname: 1_Building classification (Left images) - Lima.tfrecord
Test  fname: 3_Building classification (Right images) - Lima.tfrecord


Train fname: 9_Building classification (Left images) - St. Maarten.tfrecord
Test  fname: 11_Building classification (Right images) - St. Maarten.tfrecord


Train fname: 24_Building classification (Left images) - St. Maarten.tfrecord
Test  fname: 16_Building classification (Right images) - Cartagena_t.tfrecord


Train fname: 15_Building classification (Left images) - Cartagena_t.tfrecord
Test  fname: 22_Building classification (Right images) - Neiva.tfrecord


Train fname: 21_Building classification (Left images) - Neiva.tfrecord


IndexError: list index out of range

## Mexico validation-only data

Here, we'll analyze some data from Mexico that will be used to test the generalization of the ML model. The datasets are relatively small and not intended to be used for training.

### Building Parts

In [6]:
stats_dict_parts_mexico = []

# Define building part files
tfrecord_fnames = ['33_Door - garage - window - disaster mitigation (Left images) - Salina Cruz - Mexico.tfrecord',
                   '39_Door - garage - window - disaster mitigation (Left images) - Juchitan - Mexico.tfrecord',
                   '41_Door - garage - window - disaster mitigation (Right images) - Juchitan - Mexico.tfrecord']

# Loop over each TFRecord filename
for tfrecord_fname in tfrecord_fnames:
    print(f'Analyzing {tfrecord_fname}')

    tf_record_fpath = op.join(TTP['tfrecord_dir'], tfrecord_fname)

    # Compute and store stats
    stats = tfdv.generate_statistics_from_tfrecord(data_location=tf_record_fpath, stats_options=stats_options)
    stats_dict_parts_mexico.append(stats)
    
print('Mexico Properties Analysis\n')

# Visualize train and val data with TFDV
for ind in range(len(tfrecord_fnames)):
    print(f'Test fname: {tfrecord_fnames[ind]}')
    temp = tfdv.visualize_statistics(stats_dict_parts_mexico[ind], lhs_name='Validate')

Analyzing 33_Door - garage - window - disaster mitigation (Left images) - Salina Cruz - Mexico.tfrecord
Analyzing 39_Door - garage - window - disaster mitigation (Left images) - Juchitan - Mexico.tfrecord
Analyzing 41_Door - garage - window - disaster mitigation (Right images) - Juchitan - Mexico.tfrecord
Mexico Properties Analysis

Test fname: 33_Door - garage - window - disaster mitigation (Left images) - Salina Cruz - Mexico.tfrecord


Test fname: 39_Door - garage - window - disaster mitigation (Left images) - Juchitan - Mexico.tfrecord


Test fname: 41_Door - garage - window - disaster mitigation (Right images) - Juchitan - Mexico.tfrecord


### Building properties

In [7]:
stats_dict_props_mexico = []

# Define building property files
tfrecord_fnames = ['35_Building classification (Left images) - Salina Cruz - Mexico.tfrecord',
                   '38_Building classification (Left images) - Juchitan - Mexico.tfrecord',
                   '40_Building classification (Right images) - Juchitan - Mexico.tfrecord']

# Loop over each TFRecord filename
for tfrecord_fname in tfrecord_fnames:
    print(f'Analyzing {tfrecord_fname}')

    tf_record_fpath = op.join(TTP['tfrecord_dir'], tfrecord_fname)

    # Compute and store stats
    stats = tfdv.generate_statistics_from_tfrecord(data_location=tf_record_fpath, stats_options=stats_options)
    stats_dict_props_mexico.append(stats)
    
print('Mexico Properties Analysis\n')

# Visualize train and val data with TFDV
for ind in range(len(tfrecord_fnames)):
    print(f'Test fname: {tfrecord_fnames[ind]}')
    temp = tfdv.visualize_statistics(stats_dict_props_mexico[ind], lhs_name='Validate')

Analyzing 35_Building classification (Left images) - Salina Cruz - Mexico.tfrecord
Analyzing 38_Building classification (Left images) - Juchitan - Mexico.tfrecord
Analyzing 40_Building classification (Right images) - Juchitan - Mexico.tfrecord
Mexico Properties Analysis

Test fname: 35_Building classification (Left images) - Salina Cruz - Mexico.tfrecord


Test fname: 38_Building classification (Left images) - Juchitan - Mexico.tfrecord


Test fname: 40_Building classification (Right images) - Juchitan - Mexico.tfrecord
