In [5]:
# import and display options
from collections import OrderedDict
from pathlib import Path

import cv2
import ipywidgets as widgets
import numpy as np
import pandas as pd
from dateutil.parser import parse
from IPython.display import clear_output, display, HTML
from ipywidgets import IntProgress, Label, VBox, HBox
from PIL import Image

from datetime import datetime
import img_qc.img_qc as img_qc

display(HTML("<style>.container {width:90% !important;}</style>"))

In [6]:
# hardcoded data directory and options for both serial titles and metadata fields
data_directory_path = Path('data/')
serial_titles_dict = {'Select a title': 'Select a title',
                      'Tennessee farm news': 'agrtfn',
                      'Tennessee farm and home science': 'agrtfhs',
                      'UT Special extension circulars': 'agrutesc'
                     }
metadata_fields_list = ['Select a metadata field', 'date', 'title']

In [7]:
class MetadataField():
    def __init__(self, csv_path):
        self.csv_path = Path(csv_path)
        self.dataframe = pd.read_csv(self.csv_path)
        self.number_of_rows, self.number_of_columns = self.dataframe.shape      

In [8]:
# create interactive csv_path widget
identifier_widget = widgets.RadioButtons(layout={'width': 'initial'},
                                         style={
                                             'description_width': 'initial'},
                                         options=serial_titles_dict,
                                         description='Serial Title:',
                                         disabled=False
                                        )
metadata_field_widget = widgets.RadioButtons(layout={'width': 'initial'},
                                            style={
                                                'description_width': 'initial'},
                                            options=metadata_fields_list,
                                            description='Metadata field:',
                                            disabled=False,
                                           )

csv_path_display_widget = widgets.Text(layout={'width': 'initial'},
                                       description='CSV Path:')

csv_path_exists_validity_widget = widgets.Valid(style={'description_width': 'initial'})

csv_path_exists_validity_widget = widgets.Valid(description='CSV exist?',
                                               style={
                                                'description_width': 'initial'})

load_csv_button_widget = widgets.Button(description='Load CSV',
                                        style={'description_width': 'initial'},
                                        button_style='danger'
                                       )
# button_style='', # 'success', 'info', 'warning', 'danger' or ''

load_image_names_list_button_widget = widgets.Button(description='Load Image Names',
                                        style={'description_width': 'initial'},
                                        button_style='danger'
                                       )
load_csv_button_output_widget = widgets.Text(layout={'width': 'initial'})


def load_csv_button_clicked(b):
    global metadata

    try:
        dataframe = pd.read_csv(csv_path_display_widget.value)
    except FileNotFoundError:
        load_csv_button_output_widget.value = 'No dataframe; CSV path invalid'
        return
    metadata = MetadataField(csv_path_display_widget.value)
    number_of_rows, number_of_columns = dataframe.shape
    load_csv_button_output_widget.value = f'{metadata.csv_path.name} loaded as dataframe with {metadata.number_of_rows} rows and {metadata.number_of_columns} columns'
    load_csv_button_widget.description = 'CSV Loaded'
    load_csv_button_widget.button_style = 'success'
    return metadata

load_csv_button_widget.on_click(load_csv_button_clicked)

def select_csv(identifier, metadata_field):
    csv_name = f'{identifier}_{metadata_field}.csv'
    csv_path = data_directory_path.joinpath(csv_name)
    csv_path_display_widget.value = str(csv_path.resolve())
    csv_path_exists_validity_widget.value = csv_path.is_file()
    # reset buttons if a new csv is selected
    load_csv_button_widget.description='Load CSV'
    load_csv_button_widget.button_style='danger'
    load_image_names_list_button_widget.description='Load Image Names'
    load_image_names_list_button_widget.button_style='danger'
    # print(f'Path to CSV: {csv_path}')
    return csv_path

def load_image_names_list_button_clicked(b):
    global image_names_list
    
    # create image_names_list
    image_names_list = metadata.dataframe.image_name.unique().tolist()
    number_of_image_names = len(image_names_list)
    load_image_names_list_button_widget.description = f'{number_of_image_names} unique names'
    load_image_names_list_button_widget.style={'description_width': 'initial'}
    load_image_names_list_button_widget.button_style = 'success'

load_image_names_list_button_widget.on_click(load_image_names_list_button_clicked)

csv_path_interactive_widget = widgets.interactive_output(select_csv, {'identifier': identifier_widget, 'metadata_field': metadata_field_widget})

In [9]:
# Select a title and metadata field to process
csv_widget_row_1 = HBox([identifier_widget, metadata_field_widget])

csv_widget_row_2 = csv_path_display_widget

csv_widget_row_3 = HBox([csv_path_exists_validity_widget, load_csv_button_widget, load_image_names_list_button_widget])

csv_widget_row_4 = load_csv_button_output_widget

csv_widget = VBox([csv_widget_row_1, csv_widget_row_2, csv_widget_row_3, csv_widget_row_4])

csv_widget

VBox(children=(HBox(children=(RadioButtons(description='Serial Title:', layout=Layout(width='initial'), option…

In [17]:
# load output CSV
output_csv_path = data_directory_path.joinpath(f'{identifier_widget.value}_output_dates.csv')
if output_csv_path.is_file():
    # load it as the output_dictionary and skip first row
    input_df = pd.read_csv(output_csv_path)
print(output_csv_path)

data/agrtfn_output_dates.csv


In [18]:
# create dictionary from 2 lists example
list_a = ['a', 'b', 'c']
list_1 = [1, 2, 3]
test_dict = dict(zip(list_a, list_1))
test_dict

{'a': 1, 'b': 2, 'c': 3}

In [19]:
# create names_and_dates_input dictionary
image_names = input_df['image_name'].tolist()
dates = input_df['date'].tolist()
names_and_dates_input = dict(zip(image_names, dates))
# create names_and_dates output dictionary from input dictionary
names_and_dates_output = names_and_dates_input
len(names_and_dates_input), len(names_and_dates_output)

(1477, 1477)

In [20]:
# utility functions

def sanitize_crop_box(crop_box_dictionary):
    
    sanitized_dict = OrderedDict()
    
    for data in crop_box_dictionary:
        
        crop_box = crop_box_dictionary[data]
        
        # crop box is a string in form of : "(x1, y1, x2, y2)"
        try:  # stripping off parentheses
            sanitized_crop_box = crop_box.strip('()')
            
        except AttributeError:  # can't strip float instead of string because it's NaN
            # add back as None
            sanitized_dict[data] = None
            continue
        
        # remove commas then split on spaces and cast each string as an integer
        sanitized_crop_box = sanitized_crop_box.replace(',', '')
        sanitized_crop_box = [int(point) for point in sanitized_crop_box.split(' ')]        
        
        # print(sanitized_crop_box)
        sanitized_dict[data] = sanitized_crop_box
        
    return sanitized_dict
  
def resize_crop_box_dictionary(crop_box_dictionary, resize_ratio):
        
        resized_crop_box_dictionary = OrderedDict()
        
        for data in crop_box_dictionary:
            
            crop_box = crop_box_dictionary[data]
            resized_crop_box = [int(point * resize_ratio) for point in crop_box]
            
            # print(resized_crop_box)
            
            resized_crop_box_dictionary[data] = resized_crop_box
        
        return resized_crop_box_dictionary     
    
def crop_image_for_processing(image, percentage=0.4, top_and_sides_padding=10):

    # get technical metadata
    height, width = image.shape[:2]

    # set (x, y) pairs
    x1, y1, x2, y2 = 0, 0, width, int(height * percentage)

    # add/subtract padding from the top/left/right
    x1 += top_and_sides_padding
    y1 += top_and_sides_padding
    x2 -= top_and_sides_padding
    # add image area to the bottom that was cropped from the top
    y2 += top_and_sides_padding

    # crop image
    image = image[y1:y2, x1:x2]

    return image

def get_np_crop_points(crop_box):
    x_points = []
    y_points = []
    # print(f'crop box: {crop_box}')

    # append all x/y points to their respective lists
    for i in range(len(crop_box)):
        if crop_box[i][0][0]:
            x = (crop_box[i][0][0])
            if x < 0:
                x = -x
            x_points.append(x)
        if crop_box[i][0][1]:
            y = crop_box[i][0][1]
            if y < 0:
                y = -y
            y_points.append(y)
    # print('x/y points')
    # print(x_points, y_points)

    # find extremes in crop box
    x1 = min(x_points)
    x2 = max(x_points)
    y1 = min(y_points)
    y2 = max(y_points)
    # print(f'x1: {x1}, y1: {y1}, x2: {x2}, y2: {y2}')

    return x1, y1, x2, y2

def if_rgb_convert_to_gray(np_image):
    if len(np_image.shape) > 2:
        np_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2GRAY)

    return np_image

def if_bgr_convert_to_gray(np_image):
    if len(np_image.shape) > 2:
        np_image = cv2.cvtColor(np_image, cv2.COLOR_BGR2GRAY)

    return np_image

def resize_image(image, width=None, height=None, ratio=None, inter=cv2.INTER_AREA):
    # initialize the dimensions of the image to be resized and
    # grab the image size
    
    dim = None
    (h, w) = image.shape[:2]

    # if both the width and height are None, then return the
    # original image
    if width is None and height is None:
        return image

    # check to see if the width is None
    if width is None:
        # calculate the ratio of the height and construct the
        # dimensions
        ratio = height / float(h)
        dim = (int(w * ratio), height)

    # otherwise, the height is None
    else:
        # calculate the ratio of the width and construct the
        # dimensions
        ratio = width / float(w)
        dim = (width, int(h * ratio))

    # resize the image
    resized = cv2.resize(image, dim, interpolation=inter)
    
    # return the resized image
    return resized, ratio

def resize_ratio(image, ratio, inter=cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]
    dim = (int(w * ratio), int(h * ratio))

    # resize the image
    resized = cv2.resize(image, dim, interpolation=inter)

    # return the resized image
    return resized

def search(values, searchFor):
    for k in values:
        for v in values[k]:
            if searchFor in v:
                return k
    return None

In [21]:
# using plot.ly and display_image() function to display images
import plotly.graph_objects as go
def plotly_display(image_bgr, output_height=600):
    # Create figure
    fig = go.FigureWidget()
    
    image_source = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    image_source = Image.fromarray(image_source)

    # Constants
    source_width, source_height = image_source.size
    figure_height = output_height
    resize_ratio = figure_height / source_height
    figure_width = int(source_width * resize_ratio)

    # Add invisible scatter trace.
    # This trace is added to help the autoresize logic work.
    fig.add_trace(
        go.Scatter(
            x=[0, figure_width],
            y=[0, figure_height],
            mode="markers",
            marker_opacity=0
        )
    )

    # Configure axes
    fig.update_xaxes(
        visible=False,
        range=[0, figure_width]
    )

    fig.update_yaxes(
        visible=False,
        range=[0, figure_height],
        # the scaleanchor attribute ensures that the aspect ratio stays constant
        scaleanchor="x"
    )

    # Add image
    fig.update_layout(
        images=[go.layout.Image(
            x=0,
            sizex=figure_width,
            y=figure_height,
            sizey=figure_height,
            xref="x",
            yref="y",
            opacity=1.0,
            layer="below",
            sizing="stretch",
            source=image_source)]
    )

    # Configure other layout
    fig.update_layout(
        width=figure_width,
        height=figure_height,
        margin={"l": 0, "r": 0, "t": 0, "b": 0},
    )
    
    # fig.layout.dragmode = 'pan'

    # fig.show()
    return fig

In [22]:
# using matplotlib and cv2jupyter() function to display images
%matplotlib inline
import matplotlib.pyplot as plt
plt.ion()

import matplotlib as mpl
from io import BytesIO
from IPython.display import Image as ipyImage

def cv2jupyter(image):
    """Display a 2- or 3-d numpy array as an image."""
    # convert to rgb
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    if image.ndim == 2:
        format, cmap = 'png', mpl.cm.gray
    elif image.ndim == 3:
        format, cmap = 'jpg', None
    else:
        raise ValueError("Only 2- or 3-d arrays can be displayed as images.")
    # Don't let matplotlib autoscale the color range so we can control overall luminosity
    vmax = 255 if image.dtype == 'uint8' else 1.0
    with BytesIO() as buffer:
        mpl.image.imsave(buffer, image, format=format, cmap=cmap, vmin=0, vmax=vmax)
        out = buffer.getvalue()
    return ipyImage(out)

In [23]:
class PageOne:

    def __init__(self, image_path, config=False):

        self.image_path = Path(image_path)
        self.crop_box_dictionary = self.get_date_crop_dictionary()

        # set config dictionary
        if config:
            self.config = config
        else:
            self.config = {'crop_percentage': 0.4,
                           'distance_ratio': 0.6,
                           'minimum_matches': 5,
                           'top_and_sides_padding': 10,
                           'resize_width': 1200}
    
    def get_date_crop_dictionary(self):
        
        # get a dataframe for all rows with image_name
        image_name_dataframe = metadata.dataframe[metadata.dataframe['image_name'] == self.image_path.name]

        # create dictionary of date guesses and crop boxes
        crop_box_dict = OrderedDict()
        for date_guess_id in image_name_dataframe['date_guess_id']:
            crop_box = image_name_dataframe.loc[image_name_dataframe['date_guess_id'] == date_guess_id]['date_crop_box'].tolist()[0]
            date_guess = image_name_dataframe.loc[image_name_dataframe['date_guess_id'] == date_guess_id]['date_guess'].tolist()[0]
            # print(crop_box)
            crop_box_dict[date_guess] = crop_box
        
        self.crop_box_dictionary = sanitize_crop_box(crop_box_dict)
        # print(crop_box_dict)
        
        return self.crop_box_dictionary
    
    def get_guesses(self):
    
        guesses_list = []
    
        for date_guess in self.crop_box_dictionary:
            if isinstance(date_guess, str):
                guesses_list.append(date_guess)
            else:  # is probably None or NaN
                guesses_list.append(None)
        print_text(f'get_guesses(): {len(guesses_list)}')
        self.guesses = guesses_list
        return self.guesses
    
    def resize(self, image=None):
        if image is None:
            image = cv2.imread(str(self.image_path))
        else:
            image = image
        self.resized, self.resize_ratio = resize_image(image, width=self.config['resize_width'])
        return self.resized, self.resize_ratio
    
    def crop_off_bottom(self, crop_off_percent=0.5, image=None):
        if image is None:
            image = cv2.imread(str(self.image_path))
        height, width = image.shape[:2]
        self.cropped = image[0:int(height*(1-crop_off_percent)), 0:width]
        return self.cropped
    
    def resize_and_crop_bottom(self):
        self.resize()
        self.crop_off_bottom(image=self.resized)
    
    def draw_rectangle(self, guess, crop_box_dictionary=None, image=None, crop=False):
        
        if image is None:
            image = cv2.imread(str(self.image_path))
        
        if crop_box_dictionary is None:
            crop_box_dictionary = self.get_date_crop_dictionary()
            
        crop_box = crop_box_dictionary[guess]
            
        
        x1, y1, x2, y2 = crop_box
        
        # draw a red box around the crop area
        drawing = cv2.rectangle(image.copy(), (x1, y1), (x2, y2), (0, 0, 255), 4)
        
        if crop:
            
            height, width = drawing.shape[:2]
            x1_crop = x1 - 25
            x1_crop = max(x1_crop, 0)
            y1_crop = y1 - 45 - 25
            y1_crop = max(y1_crop, 0)
            x2_crop = x2 + 25
            x2_crop = min(x2_crop, width)
            y2_crop = y2 + 25
            y2_crop = min(y2_crop, height)

            self.drawing_cropped = drawing[y1_crop:y2_crop, x1_crop:x2_crop]
        self.drawing = drawing
        return self.drawing
    
    def get_date_crop(self, guess, crop=True):
        if guess not in self.crop_box_dictionary:
            print_text('path1path1--line102')
            date_crop = None
        else:
            self.resize_and_crop_bottom()

            self.date_crop_box_dictionary = resize_crop_box_dictionary(self.crop_box_dictionary, self.resize_ratio)
            
            self.drawing = self.draw_rectangle(guess, crop_box_dictionary=self.date_crop_box_dictionary, image=self.cropped, crop=crop)
            
            date_crop = self.drawing_cropped
            
        self.date_crop = date_crop
        
    def parse_date(self, guess):
    
        # reset values to None
        # self.parsed_date = None
        # date_parts = [self.month, self.day, self.year]
        # for date_part in date_parts:
        #     date_part = None
        # with text_display_widget:
        #     print(f'parsing {guess}')
        try:
            parsed_date = parse(guess)
        except ValueError:  # no date found
            parsed_date = None

        try:
            self.month, self.day, self.year = parsed_date.strftime("%B %d %Y").split(' ')
        except ValueError:  # no date found
            parsed_date = None

        if parsed_date is not None:
            self.parsed_date = f'{self.month} {self.day}, {self.year}'
        else:
            self.parsed_date = parsed_date

        return self.parsed_date
    
    def set_date(self, parsed_date=None):
        
        if parsed_date is None:
            parsed_date = self.parsed_date
        # with text_display_widget:
        #     print(f'parsed_date: {parsed_date}')
        #     # print(f'month: {self.month}')
    
        parsed_date_text_widget.value = str(parsed_date)
        
        month_widget.value = self.month
        # with text_display_widget:
        #     print(f'month: {self.month}')

        # set day_tens value to 1 so we get full option for day_ones
        day_tens_widget.value = 1

        # set day_ones
        day_ones = int(str(self.day)[1])
        day_ones_widget.value = day_ones

        # set day_tens
        day_tens = int(str(self.day)[0])
        day_tens_widget.value = day_tens

        # year year_tens and _ones
        year_tens = int(str(self.year)[2])
        year_tens_widget.value = year_tens
        year_ones = int(str(self.year)[3])
        year_ones_widget.value = year_ones

In [24]:
# images currently on external hard drive
image_data_directory_path = Path('/Volumes/jmoor167/data/agrtfn/')

# image_name widget
image_name_widget = widgets.Dropdown(layout={'width': 'initial'},
                                    style={'description_width': 'initial'},
                                    value = image_names_list[0],
                                    options=image_names_list,
                                    description='Image name:'
                                    )




In [None]:
# image_names
image_data_directory_path = Path('/Volumes/jmoor167/data/agrtfn/')

select_input_data_widget = widgets.Dropdown(options=image_names_list)
select_input_data_widget

In [None]:
# update input image name and instantiate PageOne(image_path)
image_data_directory_path = Path('/Volumes/jmoor167/data/agrtfn/')

select_input_data_widget = widgets.Dropdown(options=image_names_list)
text_display_widget = widgets.Output()
image_display_widget = widgets.Output()
radio_buttons = widgets.RadioButtons(style={'description_width': 'initial'},
                                           options=['Select an image'])
def load_image(image_name):
    
    image_path = image_data_directory_path.joinpath(image_name)
    page_1 = PageOne(image_path)
    
    return page_1
def radio_buttons_from_list(description_list):
    
    if len(description_list) == 0:
        radio_buttons.options = {'No data found': None}
    else:
        radio_buttons.options = description_list
        with text_display_widget:
            print(f'radio_buttons_from_list()')
            print(f'# of buttons: {len(description_list)}')
        
    return radio_buttons 
def create_guess_buttons(change):
    global page_1
    
    page_1 = load_image(select_input_data_widget.value)
    
    page_1.get_guesses()
    
    radio_buttons = radio_buttons_from_list(page_1.guesses)
    
    with button_display_area:
        button_display_area.clear_output(wait=True)
        display(radio_buttons)
        
    return radio_buttons
select_input_data_widget.observe(create_guess_buttons, 'value')

# based on select_input_data_widget.observe, we load an image
def select_date_guess(change):

    guess = radio_buttons.value
    # with text_display_widget:
    #     print(f'beginning: {current_guess}')
        
    date_crop = page_1.get_date_crop(guess)
    
    if current_guess is None or date_crop is None:
        page_1.resize_and_crop_bottom()
        display_image = page_1.cropped
    else:
        display_image = page_1.date_crop


    with image_display_widget:
        image_display_widget.clear_output(wait=True)
        
        # get display_image for Jupyter Notebook display
        display_image = cv2jupyter(display_image)
        display(display_image)
        
        
select_input_data_widget.observe(select_date_guess, 'value')
radio_buttons.observe(select_date_guess, 'value')

In [None]:
# user interactive to link radio buttons and image selection?

# update input image name and instantiate PageOne(image_path)
image_data_directory_path = Path('/Volumes/jmoor167/data/agrtfn/')
select_input_data_widget = widgets.Dropdown(options=image_names_list)
text_display_widget = widgets.Output()
image_display_widget = widgets.Output()
radio_buttons = widgets.RadioButtons(style={'description_width': 'initial'},
                                           options=['Select an image'])
def load_image(image_name):
    
    image_path = image_data_directory_path.joinpath(image_name)
    page_1 = PageOne(image_path)
    
    return page_1
def radio_buttons_from_list(description_list):
    with text_display_widget:
        print(f'radio_buttons_from_list()')
    if len(description_list) == 0:
        radio_buttons.options = {'No data found': None}
    else:
        radio_buttons.options = description_list
        
    return radio_buttons 
def select_date_guess_test(image, guess):
    global page_1
    page_1 = load_image(image)
    page_1.get_guesses()
    radio_buttons = radio_buttons_from_list(page_1.guesses)
        
    guess = radio_buttons.value
        
    page_1.get_date_crop(guess)
    display_image = page_1.date_crop
    # print(f'{page_1.crop_box_dictionary}')
    # image = page_1.date_crop
    # print(image.shape)
    
    # if guess is None or date_crop is None:
    #     print('is_none')
    #     page_1.resize_and_crop_bottom()
    #     display_image = page_1.cropped
    # else:
    #     display_image = page_1.date_crop


    with image_display_widget:
        image_display_widget.clear_output(wait=True)
        
        if display_image is None:
            page_1.resize_and_crop_bottom()
            display_image = cv2jupyter(page_1.cropped)
        else:
            try:
                display_image = cv2jupyter(display_image)
            except AttributeError:
                page_1.resize_and_crop_bottom()
                display_image = cv2jupyter(page_1.cropped)
        display(display_image)

        
# my_result = widgets.interactive(select_date_guess_test, image=select_input_data_widget, guess=radio_buttons)
my_result = widgets.interactive_output(select_date_guess_test, {'image': select_input_data_widget, 'guess': radio_buttons})
ui = HBox([select_input_data_widget, radio_buttons])
display_widget = HBox([text_display_widget, image_display_widget])
my_widget = VBox([ui, display_widget])
my_widget

In [27]:
# widget stuff here

# update input image name and instantiate PageOne(image_path)
image_data_directory_path = Path('/Volumes/jmoor167/data/agrtfn/')
select_input_data_widget = widgets.Dropdown(options=image_names_list)
text_display_widget = widgets.Output()
full_image_display = widgets.Output()
crop_display_widget = widgets.Output()
radio_buttons = widgets.RadioButtons(style={'description_width': 'initial'},
                                           options=['Select an image'])
date_selected_widget = widgets.Output()


months_dict = {'January': 1,
               'February': 2,
               'March': 3,
               'April': 4,
               'May': 5,
               'June': 6,
               'July': 7,
               'August': 8,
               'September': 9,
               'October': 10,
               'November': 11,
               'December': 12
              }
months_list = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
months_number_of_days_dict = {'January': 31,
                             'February': 28,
                             'March': 31,
                             'April': 30,
                             'May': 31,
                              'June': 30,
                             'July': 31,
                             'August': 31,
                              'September': 30,
                              'October': 31,
                              'November': 30,
                              'December': 31
                             }
numbers_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

raw_date_text_widget = widgets.Text(description='Raw text:')
parsed_date_text_widget = widgets.Text(description='Parsed text:')
output_date_text_widget = widgets.Text(description='Output text:')
month_widget = widgets.RadioButtons(options=months_list,
                                   description='Month:')
day_tens_widget = widgets.RadioButtons(options=numbers_list[0:4],
                                      description='Day 10s:')
day_ones_widget = widgets.RadioButtons(options=numbers_list,
                                      description='Day 1s:')
year_tens_widget = widgets.RadioButtons(options=numbers_list,
                                       description='Year 10s:')
year_ones_widget = widgets.RadioButtons(options=numbers_list,
                                      description='Year 1s:')
button_previous_image = widgets.Button(style={'description_width': 'initial'},
                                   description='Discard & Go Back',
                                  )
button_next_image = widgets.Button(style={'description_width': 'initial'},
                                   description='Save & Load Next',
                                  )

def create_crop_image(image=None, text=None, color=None):
    """
    Create black crop image with size (width, height) and add an image or text to it
    """
    
    if color is None:
        color = (255, 255, 255)
    elif color is 'red':
        color = (0, 0, 255)
    base_height = 140
    base_width = 520
    base_image = np.zeros(shape=[base_height, base_width, 3], dtype=np.uint8)
    if text is None and image is None:
        print_text('ERROR: input image or text to create crop image')
    elif image is None:  # we must have text
        crop_image = cv2.putText(base_image, text, (10, int(base_height*.5)), cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, 2, cv2.LINE_AA)
    else:  # we must have image
        # paste image on crop_image -- dunno if this will help or make a difference at all yet, just trying something
        height, width = image.shape[:2]
        if height > base_height and width > base_width:  
            crop_image = image
        elif height > base_height:
            crop_image = image
        elif width > base_width:
            crop_image = image
        else:# put image on output crop
            # get points in base_image to paste image on top
            # let's align it to the left and bottom of the box
            y1 = base_height - height # starting height point for image paste
            x1 = 0 # starting width
            base_image[y1:(y1+height), x1:(x1+width)] = image
            crop_image = base_image
    return crop_image

output_test_widget = widgets.Output(layout={'width': 'initial'})

def on_button_previous_image_clicked(b):
    
    start_image_name = select_input_data_widget.value
    
    current_image_index = image_names_list.index(start_image_name)
    
    previous_image_index = current_image_index - 1
    
    select_input_data_widget.value = image_names_list[previous_image_index]
    
    return select_input_data_widget.value

button_previous_image.on_click(on_button_previous_image_clicked)

!touch ./_temp_text.temp
def on_button_next_image_clicked(b):
    
    output_data = {page_1.image_path.name: output_date_text_widget.value}
    names_and_dates_output.update(output_data)
    # for safety, append this info to a temp text file
    !echo {page_1.image_path.name} {output_date_text_widget.value} >>./_temp_text.temp
    
    start_image_name = select_input_data_widget.value
    
    current_image_index = image_names_list.index(start_image_name)
    
    next_image_index = current_image_index + 1
    
    if next_image_index == len(image_names_list):
        next_image_index = 0
    
    select_input_data_widget.value = image_names_list[next_image_index]
    
    return select_input_data_widget.value

button_next_image.on_click(on_button_next_image_clicked)




In [28]:
# select date widget stuff
max_days=0
date_image_display_widget = widgets.Output(continuous_update=False)
date_image_display_widget.layout.height = '150px'
date_image_display_widget.layout.width = '530px'

@date_image_display_widget.capture()
def date_image_display(date_image):
    date_image_display_widget.clear_output(wait=True)
    date_image = cv2jupyter(date_image)
    display(date_image)
    return

def select_date(month, day_tens, day_ones, year_tens, year_ones):
    year = int(f'19{year_tens}{year_ones}')
    max_days = months_number_of_days_dict[month]
    # set radio option buttons based on max days in the month
    day_tens_end = int(str(max_days)[0]) + 1
    day_tens_widget.options = numbers_list[:day_tens_end]
    day_tens_widget.value = day_tens
    if day_tens_widget.value == 0:
        day_ones_start = 1
        if day_ones_widget.value == 0:
            day_ones_widget.value = 1
            day_ones = 1
    else:
        day_ones_start = 0
    if day_tens_widget.value == 3:
        day_ones_end = int(str(max_days)[1]) + 1
        day_ones_widget.options = numbers_list[day_ones_start:day_ones_end]
        day_ones_widget.value = day_ones
    elif year % 4 == 0 and month == 'February':  # then it's a leapyear
        day_ones_widget.options = numbers_list[day_ones_start:]
        day_ones_widget.value = day_ones
    elif month == 'February' and day_tens_widget.value == 2:
        day_ones_widget.options = numbers_list[day_ones_start:-1]
        day_ones_widget.value = day_ones
    else:
        day_ones_widget.options = numbers_list[day_ones_start:]
        day_ones_widget.value = day_ones
    day_ones = day_ones_widget.value
    day = (day_tens * 10) + day_ones
    date = f'{month} {day}, {year}'
    
    output_date_text_widget.value = date
    
    # height = 200
    # width = 600
    # text_image = np.zeros(shape=[height, width, 3], dtype=np.uint8)
    # # cv2.putText(img,'OpenCV',(10,500), font, 4, (255, 255, 255), 2, cv2.LINE_AA)
    # cv2.putText(text_image, output_date_text_widget.value, (50, int(height*.5)), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 255, 255), 2, cv2.LINE_AA)
    text_image = create_crop_image(text=output_date_text_widget.value)
    
    date_image_display(text_image)
    # with date_image_display_widget:
    #     date_image_display_widget.clear_output(wait=True)
    #     display(date_display)
    
#     with output_test_widget:
        
#         labeled_dictionary = draw_text(drawings_dictionary, date)
        
#         # just process first date found
#         for index, data in enumerate(labeled_dictionary):
#             if index == 0:
#                 output_test_widget.clear_output(wait=True)
#                 image_source = labeled_dictionary[data][0]
#                 fig = display_image(image_source)
#                 display(fig)
    return date

select_date_interactive_widget = widgets.interactive_output(select_date, {'month': month_widget,
                                                                          'day_tens': day_tens_widget,
                                                                          'day_ones': day_ones_widget,
                                                                          'year_tens': year_tens_widget,
                                                                          'year_ones': year_ones_widget}
                                                           )
# select_date_interactive_widget = widgets.interactive(select_date,
#                                                      month=month_widget,
#                                                      day_tens=day_tens_widget,
#                                                      day_ones=day_ones_widget,
#                                                      year_tens=year_tens_widget,
#                                                      year_ones=year_ones_widget)

select_date_interactive_widget


Output()

In [29]:
# date row widget design
date_row_1_widgets = HBox([raw_date_text_widget, parsed_date_text_widget, output_date_text_widget])

date_row_2_widgets = HBox([button_previous_image, button_next_image])

date_row_3_widgets = HBox([month_widget, day_tens_widget, day_ones_widget, year_tens_widget, year_ones_widget])
date_row_3_widgets.layout.width = '1000px'

date_widget = VBox([date_row_1_widgets, date_row_2_widgets, date_row_3_widgets])

In [30]:
# add date selection widget and current date selected
# also -- load date into date selection widget from guess

# user interactive to link radio buttons and image selection?

# update input image name and instantiate PageOne(image_path)

# test creating image_names_list from directory instead

select_input_data_widget = widgets.Dropdown(options=image_names_list)
text_display_widget = widgets.Output()
full_image_display_widget = widgets.Output()
full_image_display_widget.layout.height = '400px'
# full_image_display_widget.layout.width = '800px'
crop_image_display_widget = widgets.Output()
crop_image_display_widget.layout.height = '150px'
crop_image_display_widget.layout.width = '530px'
radio_buttons = widgets.RadioButtons(style={'description_width': 'initial'},
                                           options=['Select an image'])

def load_image(image_name):
    
    image_path = image_data_directory_path.joinpath(image_name)
    page_1 = PageOne(image_path)
    
    return page_1

def radio_buttons_from_list(description_list):
    
    if len(description_list) == 0:
        radio_buttons.options = ['No data found']
        radio_buttons.value = 'No data found'
    else:
        sanitized_list = []
        empty_description = False
        for description in description_list:
            if description is not None:
                sanitized_list.append(description)
            else:
                empty_description = True
        sanitized_list = sorted(sanitized_list)
        if empty_description:
            sanitized_list.append(None)
        radio_buttons.options = sanitized_list
    # with text_display_widget:
    #     print(f'radio_buttons_from_list()')
    return radio_buttons

@full_image_display_widget.capture()
def full_image_display(full_image):
    full_image_display_widget.clear_output(wait=True)
    full_image = cv2jupyter(full_image)
    display(full_image)
    return

@crop_image_display_widget.capture()
def crop_image_display(crop_image):
    crop_image_display_widget.clear_output(wait=True)
    try:
        crop_image = cv2jupyter(crop_image)
    except AttributeError:
        page_1.resize_and_crop_bottom()
        crop_image = cv2jupyter(page_1.cropped)
    display(crop_image)
    return

@text_display_widget.capture()
def print_text(text):
    print(text)
    return

def select_date_guess_test(image, guess):
    global page_1
    
    page_1 = load_image(image)
    page_1.resize_and_crop_bottom()
    page_1.get_guesses()
    
    radio_buttons = radio_buttons_from_list(page_1.guesses)
    
    guess = radio_buttons.value
    
    if guess is None or guess == 'No data found':
        raw_date_text_widget.value = 'None'
        # load full-sized image from crop -- no drawing because no date data
        full_image, _ = resize_image(page_1.cropped, height=400)
        crop_image = create_crop_image(text='Data -> None', color='red')    
    else:
        raw_date_text_widget.value = guess
        # get the date crop
        page_1.get_date_crop(guess)
        # print_text(page_1.drawing)
        # load full-sized image from drawing so we get the version with red box around found data
        full_image, resize_ratio = resize_image(page_1.drawing, height=400)
        try:  # setting crop image from date_crop
            crop_image = page_1.date_crop
            crop_image = create_crop_image(image=crop_image)
        except AttributeError:  # no date_crop
            crop_image = create_crop_image(text='Data -> None', color='red')
        
    # display images
    full_image_display(full_image)
    crop_image_display(crop_image)
    
    # print text
    text_display_widget.clear_output(wait=True)
    print_text(f'Raw text: {raw_date_text_widget.value}')

    # process date information
    page_1.parse_date(guess)
    page_1.set_date()

# my_result = widgets.interactive(select_date_guess_test, image=select_input_data_widget, guess=radio_buttons)
my_result = widgets.interactive_output(select_date_guess_test, {'image': select_input_data_widget, 'guess': radio_buttons})
ui = HBox([select_input_data_widget, radio_buttons, text_display_widget])
ui.layout.height = '75px'
crop_image_stack_widget = VBox([crop_image_display_widget, date_image_display_widget])
display_widget = HBox([full_image_display_widget, crop_image_stack_widget])
my_widget = VBox([ui, display_widget, date_widget])
# my_widget.layout.height = '1000px'
my_widget.layout.width = '1600px'
my_widget

VBox(children=(HBox(children=(Dropdown(options=('0012_004266_000001_0001.tif', '0012_004266_000002_0001.tif', …

In [None]:
# WARNING: reset output_dictionary
output_dictionary = {}

In [20]:
# load different image_name_lists -- default is loaded from the CSV file on-disk, which is a mistake

In [13]:
# image_names_list created from all *.tif in image_data_directory_path
image_data_directory_path = Path('/Volumes/jmoor167/data/agrtfn/')
image_names_list = sorted([x.name for x in list(image_data_directory_path.glob('*.tif'))])
len(image_names_list)

3451

In [177]:
# set image_names_list to all names in current list not in output_dictionary -- load everything that hasn't been processed yet!
not_in_output = []
for image_name in image_names_list:
    if image_name not in names_and_dates_output:
        not_in_output.append(image_name)
image_names_list = not_in_output
select_input_data_widget.options = image_names_list
print(len(image_names_list), image_names_list[:5])

1974 ['0012_004266_001478_0001.tif', '0012_004266_001479_0001.tif', '0012_004266_001480_0001.tif', '0012_004266_001481_0001.tif', '0012_004266_001482_0001.tif']


In [None]:
# use range to create a list with a stub to search for names in the output_dictionary
not_in_output = []
for i in range(405):
    i += 1
    name_to_search_for = f'0012_004266_{str(i).zfill(6)}_0001.tif'
    if name_to_search_for not in names_and_dates_output:
        not_in_output.append(test_name)
image_names_list = not_in_output     
print(len(not_in_output))

In [31]:
# load dates into DataFrame from dictionary
dates_df = pd.DataFrame(list(names_and_dates_output.items()), columns=['image_name', 'date'])
print(f'{dates_df.head(3)}\n\n{dates_df.tail(3)}')

                    image_name             date
0  0012_004266_000001_0001.tif    July 13, 1925
1  0012_004266_000002_0001.tif  January 9, 1921
2  0012_004266_000003_0001.tif     July 4, 1921

                       image_name              date
1474  0012_004266_001475_0001.tif   January 9, 1950
1475  0012_004266_001476_0001.tif  January 16, 1950
1476  0012_004266_001477_0001.tif  January 23, 1950


In [32]:
print(dates_df.head(5).to_csv())

,image_name,date
0,0012_004266_000001_0001.tif,"July 13, 1925"
1,0012_004266_000002_0001.tif,"January 9, 1921"
2,0012_004266_000003_0001.tif,"July 4, 1921"
3,0012_004266_000004_0001.tif,"July 11, 1921"
4,0012_004266_000005_0001.tif,"July 18, 1922"



In [33]:
# output to csv
dates_df.to_csv(output_csv_path, index=False)
if output_csv_path.is_file():
    print(f'{output_csv_path} exists')

data/agrtfn_output_dates.csv exists
