In [2]:
# import and display options
%matplotlib notebook
from pathlib import Path

import cv2
import ipywidgets as widgets
import numpy as np
import pandas as pd
from IPython.display import display, HTML
from ipywidgets import interact, IntProgress, Label, VBox, HBox
from matplotlib import pyplot as plt

display(HTML("<style>.container {width:85% !important;}</style>"))

In [3]:
# hardcoded data directory and options for both serial titles and metadata fields
data_directory_path = Path('data/')
serial_titles_dict = {'Select a title': 'Select a title',
                      'Tennessee farm news': 'agrtfn',
                      'Tennessee farm and home science': 'agrtfhs',
                      'UT Special extension circulars': 'agrutesc'
                     }
metadata_fields_list = ['Select a metadata field', 'date', 'title']

In [4]:
# create interactive csv_path widget
identifier_widget = widgets.RadioButtons(layout={'width': 'initial'},
                                         style={
                                             'description_width': 'initial'},
                                         options=serial_titles_dict,
                                         description='Serial Title:',
                                         disabled=False
                                        )
metadata_field_widget = widgets.RadioButtons(layout={'width': 'initial'},
                                            style={
                                                'description_width': 'initial'},
                                            options=metadata_fields_list,
                                            description='Metadata field:',
                                            disabled=False,
                                           )

csv_path_display_widget = widgets.Text(layout={'width': 'initial'},
                                       description='CSV Path:')

csv_path_exists_validity_widget = widgets.Valid(description='Does CSV exist?',
                                               style={
                                                'description_width': 'initial'})
load_csv_button_widget = widgets.Button(description='Load CSV',
                                        style={'description_width': 'initial'},
                                       )

load_csv_button_output_widget = widgets.Text(layout={'width': 'initial'})


def on_button_clicked(b):
    global metadata
    # csv_path = Path(csv_path_display_widget.value)
    try:
        dataframe = pd.read_csv(csv_path_display_widget.value)
    except FileNotFoundError:
        load_csv_button_output_widget.value = 'No dataframe; CSV path invalid'
        return
    metadata = MetadataField(csv_path_display_widget.value)
    number_of_rows, number_of_columns = dataframe.shape
    load_csv_button_output_widget.value = f'{metadata.csv_path.name} loaded as dataframe with {metadata.number_of_rows} rows and {metadata.number_of_columns} columns'
    return metadata

load_csv_button_widget.on_click(on_button_clicked)

def select_csv(identifier, metadata_field):
    csv_name = f'{identifier}_{metadata_field}.csv'
    csv_path = data_directory_path.joinpath(csv_name)
    csv_path_display_widget.value = str(csv_path.resolve())
    csv_path_exists_validity_widget.value = csv_path.is_file()
    # print(f'Path to CSV: {csv_path}')
    return csv_path


# csv_path_interactive_widget = widgets.interactive(select_csv, identifier=identifier_widget, metadata_field=metadata_field_widget)
# output_csv_path_widget = interactive(select_csv, 'identifier'=identifier_widget, 'metadata_field'=metadata_field_widget)
csv_path_interactive_widget = widgets.interactive_output(select_csv, {'identifier': identifier_widget, 'metadata_field': metadata_field_widget})

In [5]:
class MetadataField():
    def __init__(self, csv_path):
        self.csv_path = Path(csv_path)
        self.dataframe = pd.read_csv(self.csv_path)
        self.number_of_rows, self.number_of_columns = self.dataframe.shape      

In [6]:
# Select a title and metadata field to process
row_1_widgets = HBox([identifier_widget, metadata_field_widget])

row_2_widgets = csv_path_display_widget  # HBox([csv_path_display_widget])

row_3_widgets = HBox([load_csv_button_widget, csv_path_exists_validity_widget])

row_4_widgets = load_csv_button_output_widget

my_widget = VBox([row_1_widgets, row_2_widgets, row_3_widgets, row_4_widgets])

my_widget

VBox(children=(HBox(children=(RadioButtons(description='Serial Title:', layout=Layout(width='initial'), option…

In [10]:
metadata.dataframe.columns

Index(['date_crop_box', 'date_guess', 'image_id', 'image_name'], dtype='object')

In [11]:
len(metadata.dataframe['image_name'].unique())

3062

In [12]:
image_dir_data_path = Path('/Volumes/jmoor167/data/agrtfn')
image_paths_list = sorted(image_dir_data_path.glob('*.tif'))
len(image_paths_list)

3451

In [15]:
len(image_paths_list) - len(metadata.dataframe['image_name'].unique())

389

In [17]:
metadata.dataframe['image_name']

0       0012_004266_000001_0001.tif
1       0012_004266_000001_0001.tif
2       0012_004266_000002_0001.tif
3       0012_004266_000002_0001.tif
4       0012_004266_000003_0001.tif
5       0012_004266_000003_0001.tif
6       0012_004266_000004_0001.tif
7       0012_004266_000004_0001.tif
8       0012_004266_000005_0001.tif
9       0012_004266_000005_0001.tif
10      0012_004266_000005_0001.tif
11      0012_004266_000005_0001.tif
12      0012_004266_000005_0001.tif
13      0012_004266_000005_0001.tif
14      0012_004266_000005_0001.tif
15      0012_004266_000005_0001.tif
16      0012_004266_000006_0001.tif
17      0012_004266_000006_0001.tif
18      0012_004266_000008_0001.tif
19      0012_004266_000008_0001.tif
20      0012_004266_000009_0001.tif
21      0012_004266_000009_0001.tif
22      0012_004266_000009_0001.tif
23      0012_004266_000009_0001.tif
24      0012_004266_000010_0001.tif
25      0012_004266_000010_0001.tif
26      0012_004266_000010_0001.tif
27      0012_004266_000010_0

In [19]:
images_not_processed_list

['0012_004266_000001_0001.tif',
 '0012_004266_000001_0001.tif',
 '0012_004266_000002_0001.tif',
 '0012_004266_000002_0001.tif',
 '0012_004266_000003_0001.tif',
 '0012_004266_000003_0001.tif',
 '0012_004266_000004_0001.tif',
 '0012_004266_000004_0001.tif',
 '0012_004266_000005_0001.tif',
 '0012_004266_000005_0001.tif',
 '0012_004266_000005_0001.tif',
 '0012_004266_000005_0001.tif',
 '0012_004266_000005_0001.tif',
 '0012_004266_000005_0001.tif',
 '0012_004266_000005_0001.tif',
 '0012_004266_000005_0001.tif',
 '0012_004266_000006_0001.tif',
 '0012_004266_000006_0001.tif',
 '0012_004266_000008_0001.tif',
 '0012_004266_000008_0001.tif',
 '0012_004266_000009_0001.tif',
 '0012_004266_000009_0001.tif',
 '0012_004266_000009_0001.tif',
 '0012_004266_000009_0001.tif',
 '0012_004266_000010_0001.tif',
 '0012_004266_000010_0001.tif',
 '0012_004266_000010_0001.tif',
 '0012_004266_000010_0001.tif',
 '0012_004266_000011_0001.tif',
 '0012_004266_000011_0001.tif',
 '0012_004266_000012_0001.tif',
 '0012_0

In [22]:
images_in_csv

<bound method IndexOpsMixin.tolist of 0       0012_004266_000001_0001.tif
1       0012_004266_000001_0001.tif
2       0012_004266_000002_0001.tif
3       0012_004266_000002_0001.tif
4       0012_004266_000003_0001.tif
5       0012_004266_000003_0001.tif
6       0012_004266_000004_0001.tif
7       0012_004266_000004_0001.tif
8       0012_004266_000005_0001.tif
9       0012_004266_000005_0001.tif
10      0012_004266_000005_0001.tif
11      0012_004266_000005_0001.tif
12      0012_004266_000005_0001.tif
13      0012_004266_000005_0001.tif
14      0012_004266_000005_0001.tif
15      0012_004266_000005_0001.tif
16      0012_004266_000006_0001.tif
17      0012_004266_000006_0001.tif
18      0012_004266_000008_0001.tif
19      0012_004266_000008_0001.tif
20      0012_004266_000009_0001.tif
21      0012_004266_000009_0001.tif
22      0012_004266_000009_0001.tif
23      0012_004266_000009_0001.tif
24      0012_004266_000010_0001.tif
25      0012_004266_000010_0001.tif
26      0012_004266_000010

In [25]:
images_in_csv

['0012_004266_000001_0001.tif',
 '0012_004266_000002_0001.tif',
 '0012_004266_000003_0001.tif',
 '0012_004266_000004_0001.tif',
 '0012_004266_000005_0001.tif',
 '0012_004266_000006_0001.tif',
 '0012_004266_000008_0001.tif',
 '0012_004266_000009_0001.tif',
 '0012_004266_000010_0001.tif',
 '0012_004266_000011_0001.tif',
 '0012_004266_000012_0001.tif',
 '0012_004266_000013_0001.tif',
 '0012_004266_000014_0001.tif',
 '0012_004266_000016_0001.tif',
 '0012_004266_000017_0001.tif',
 '0012_004266_000018_0001.tif',
 '0012_004266_000019_0001.tif',
 '0012_004266_000020_0001.tif',
 '0012_004266_000021_0001.tif',
 '0012_004266_000022_0001.tif',
 '0012_004266_000023_0001.tif',
 '0012_004266_000024_0001.tif',
 '0012_004266_000025_0001.tif',
 '0012_004266_000026_0001.tif',
 '0012_004266_000027_0001.tif',
 '0012_004266_000028_0001.tif',
 '0012_004266_000031_0001.tif',
 '0012_004266_000032_0001.tif',
 '0012_004266_000033_0001.tif',
 '0012_004266_000034_0001.tif',
 '0012_004266_000035_0001.tif',
 '0012_0

In [None]:
df[df['A'].isin([3, 6])]

In [27]:
len(images_in_csv)

3062

In [29]:
for image in images_in_csv:
    print(image)

0012_004266_000001_0001.tif
0012_004266_000002_0001.tif
0012_004266_000003_0001.tif
0012_004266_000004_0001.tif
0012_004266_000005_0001.tif
0012_004266_000006_0001.tif
0012_004266_000008_0001.tif
0012_004266_000009_0001.tif
0012_004266_000010_0001.tif
0012_004266_000011_0001.tif
0012_004266_000012_0001.tif
0012_004266_000013_0001.tif
0012_004266_000014_0001.tif
0012_004266_000016_0001.tif
0012_004266_000017_0001.tif
0012_004266_000018_0001.tif
0012_004266_000019_0001.tif
0012_004266_000020_0001.tif
0012_004266_000021_0001.tif
0012_004266_000022_0001.tif
0012_004266_000023_0001.tif
0012_004266_000024_0001.tif
0012_004266_000025_0001.tif
0012_004266_000026_0001.tif
0012_004266_000027_0001.tif
0012_004266_000028_0001.tif
0012_004266_000031_0001.tif
0012_004266_000032_0001.tif
0012_004266_000033_0001.tif
0012_004266_000034_0001.tif
0012_004266_000035_0001.tif
0012_004266_000036_0001.tif
0012_004266_000038_0001.tif
0012_004266_000039_0001.tif
0012_004266_000040_0001.tif
0012_004266_000041_0

In [32]:
image_names_list = [x.name for x in image_paths_list]
len(image_names_list)

3451

In [35]:
images_not_processed_list = []
images_in_csv = metadata.dataframe.image_name.unique().tolist()
for image_name in image_names_list:
    if image_name in images_in_csv:
        continue
    else:
        images_not_processed_list.append(image_name)

len(images_not_processed_list)

389

In [36]:
images_not_processed_list

['0012_004266_000007_0001.tif',
 '0012_004266_000015_0001.tif',
 '0012_004266_000029_0001.tif',
 '0012_004266_000030_0001.tif',
 '0012_004266_000037_0001.tif',
 '0012_004266_000042_0001.tif',
 '0012_004266_000044_0001.tif',
 '0012_004266_000069_0001.tif',
 '0012_004266_000081_0001.tif',
 '0012_004266_000082_0001.tif',
 '0012_004266_000103_0001.tif',
 '0012_004266_000133_0001.tif',
 '0012_004266_000144_0001.tif',
 '0012_004266_000154_0001.tif',
 '0012_004266_000164_0001.tif',
 '0012_004266_000181_0001.tif',
 '0012_004266_000189_0001.tif',
 '0012_004266_000208_0001.tif',
 '0012_004266_000211_0001.tif',
 '0012_004266_000214_0001.tif',
 '0012_004266_000215_0001.tif',
 '0012_004266_000263_0001.tif',
 '0012_004266_000268_0001.tif',
 '0012_004266_000274_0001.tif',
 '0012_004266_000276_0001.tif',
 '0012_004266_000277_0001.tif',
 '0012_004266_000282_0001.tif',
 '0012_004266_000287_0001.tif',
 '0012_004266_000292_0001.tif',
 '0012_004266_000300_0001.tif',
 '0012_004266_000303_0001.tif',
 '0012_0

In [37]:
# write images_not_processed_list to text file SLOWLY with BASH
output_path = Path('/Users/jeremy/Documents/GitHub/utk_ProjectCeres/data/agrtfn_date_to_process.txt')
!touch {str(output_path)}
for image_name in images_not_processed_list:
    !echo {image_name} >> {str(output_path)}
    
!open {str(output_path)}

In [38]:
# write images_not_processed_list to text file QUICKLY with Python!
output_path = Path('/Users/jeremy/Documents/GitHub/utk_ProjectCeres/data/agrtfn_date_to_process.txt')
with open (output_path, 'w') as text_file:
    text_file.write('\n'.join(images_not_processed_list))