In [1]:
# import and display options
%matplotlib notebook
from pathlib import Path

import cv2
import ipywidgets as widgets
import numpy as np
import pandas as pd
from dateutil.parser import parse
from IPython.display import display, HTML
from ipywidgets import interact, IntProgress, Label, VBox, HBox
from matplotlib import pyplot as plt

display(HTML("<style>.container {width:85% !important;}</style>"))

In [2]:
# hardcoded data directory and options for both serial titles and metadata fields
data_directory_path = Path('data/')
serial_titles_dict = {'Select a title': 'Select a title',
                      'Tennessee farm news': 'agrtfn',
                      'Tennessee farm and home science': 'agrtfhs',
                      'UT Special extension circulars': 'agrutesc'
                     }
metadata_fields_list = ['Select a metadata field', 'date', 'title']

In [3]:
# create interactive csv_path widget
identifier_widget = widgets.RadioButtons(layout={'width': 'initial'},
                                         style={
                                             'description_width': 'initial'},
                                         options=serial_titles_dict,
                                         description='Serial Title:',
                                         disabled=False
                                        )
metadata_field_widget = widgets.RadioButtons(layout={'width': 'initial'},
                                            style={
                                                'description_width': 'initial'},
                                            options=metadata_fields_list,
                                            description='Metadata field:',
                                            disabled=False,
                                           )

csv_path_display_widget = widgets.Text(layout={'width': 'initial'},
                                       description='CSV Path:')

csv_path_exists_validity_widget = widgets.Valid(description='Does CSV exist?',
                                               style={
                                                'description_width': 'initial'})
load_csv_button_widget = widgets.Button(description='Load CSV',
                                        style={'description_width': 'initial'},
                                       )

load_csv_button_output_widget = widgets.Text(layout={'width': 'initial'})


def on_button_clicked(b):
    global metadata
    # csv_path = Path(csv_path_display_widget.value)
    try:
        dataframe = pd.read_csv(csv_path_display_widget.value)
    except FileNotFoundError:
        load_csv_button_output_widget.value = 'No dataframe; CSV path invalid'
        return
    metadata = MetadataField(csv_path_display_widget.value)
    number_of_rows, number_of_columns = dataframe.shape
    load_csv_button_output_widget.value = f'{metadata.csv_path.name} loaded as dataframe with {metadata.number_of_rows} rows and {metadata.number_of_columns} columns'
    return metadata

load_csv_button_widget.on_click(on_button_clicked)

def select_csv(identifier, metadata_field):
    csv_name = f'{identifier}_{metadata_field}.csv'
    csv_path = data_directory_path.joinpath(csv_name)
    csv_path_display_widget.value = str(csv_path.resolve())
    csv_path_exists_validity_widget.value = csv_path.is_file()
    # print(f'Path to CSV: {csv_path}')
    return csv_path


# csv_path_interactive_widget = widgets.interactive(select_csv, identifier=identifier_widget, metadata_field=metadata_field_widget)
# output_csv_path_widget = interactive(select_csv, 'identifier'=identifier_widget, 'metadata_field'=metadata_field_widget)
csv_path_interactive_widget = widgets.interactive_output(select_csv, {'identifier': identifier_widget, 'metadata_field': metadata_field_widget})

In [4]:
class MetadataField():
    def __init__(self, csv_path):
        self.csv_path = Path(csv_path)
        self.dataframe = pd.read_csv(self.csv_path)
        self.number_of_rows, self.number_of_columns = self.dataframe.shape      

In [110]:
# functions
def resize(image, width=None, height=None, ratio=None, inter=cv2.INTER_AREA):
    # initialize the dimensions of the image to be resized and
    # grab the image size
    dim = None
    (h, w) = image.shape[:2]

    # if both the width and height are None, then return the
    # original image
    if width is None and height is None:
        return image

    # check to see if the width is None
    if width is None:
        # calculate the ratio of the height and construct the
        # dimensions
        ratio = height / float(h)
        dim = (int(w * ratio), height)

    # otherwise, the height is None
    else:
        # calculate the ratio of the width and construct the
        # dimensions
        ratio = width / float(w)
        dim = (width, int(h * ratio))

    # resize the image
    resized = cv2.resize(image, dim, interpolation=inter)

    # return the resized image
    return resized, ratio


def resize_ratio(image, ratio, inter=cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]
    dim = (int(w * ratio), int(h * ratio))

    # resize the image
    resized = cv2.resize(image, dim, interpolation=inter)

    # return the resized image
    return resized

In [5]:
# Select a title and metadata field to process
row_1_widgets = HBox([identifier_widget, metadata_field_widget])

row_2_widgets = csv_path_display_widget  # HBox([csv_path_display_widget])

row_3_widgets = HBox([load_csv_button_widget, csv_path_exists_validity_widget])

row_4_widgets = load_csv_button_output_widget

my_widget = VBox([row_1_widgets, row_2_widgets, row_3_widgets, row_4_widgets])

my_widget

VBox(children=(HBox(children=(RadioButtons(description='Serial Title:', layout=Layout(width='initial'), option…

In [None]:
# write images_not_processed_list to text file QUICKLY with Python!
output_path = Path('/Users/jeremy/Documents/GitHub/utk_ProjectCeres/data/agrtfn_date_to_process.txt')
with open (output_path, 'w') as text_file:
    text_file.write('\n'.join(images_not_processed_list))

In [None]:
image_names_list = metadata.dataframe.image_name.unique().tolist()
len(image_names_list)

In [106]:
# create interactive date selection widget
months_dict = {'January': 1,
               'February': 2,
               'March': 3,
               'April': 4,
               'May': 5,
               'June': 6,
               'July': 7,
               'August': 8,
               'September': 9,
               'October': 10,
               'November': 11,
               'December': 12
              }
months_list = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
months_number_of_days_dict = {'January': 31,
                             'February': 28,
                             'March': 31,
                             'April': 30,
                             'May': 31,
                              'June': 30,
                             'July': 31,
                             'August': 31,
                              'September': 30,
                              'October': 31,
                              'November': 30,
                              'December': 31
                             }
numbers_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

month_widget = widgets.RadioButtons(options=months_list,
                                   description='Month:')

day_tens_widget = widgets.RadioButtons(options=numbers_list[0:4],
                                      description='Day 10s:')
day_ones_widget = widgets.RadioButtons(options=numbers_list,
                                      description='Day 1s:')
year_tens_widget = widgets.RadioButtons(options=numbers_list,
                                       description='Year 10s:')
year_ones_widget = widgets.RadioButtons(options=numbers_list,
                                      description='Year 1s:')

date_display_widget = widgets.Text(description='Date:')
max_days=0
def select_date(month, day_tens, day_ones, year_tens, year_ones):

    year = int(f'19{year_tens}{year_ones}')
    max_days = months_number_of_days_dict[month]
    # set radio option buttons based on max days in the month
    day_tens_end = int(str(max_days)[0]) + 1
    day_tens_widget.options = numbers_list[:day_tens_end]
    day_tens_widget.value = day_tens
    if day_tens_widget.value == 0:
        day_ones_start = 1
        if day_ones_widget.value == 0:
            day_ones_widget.value = 1
            day_ones = 1
    else:
        day_ones_start = 0
    if day_tens_widget.value == 3:
        day_ones_end = int(str(max_days)[1]) + 1
        day_ones_widget.options = numbers_list[day_ones_start:day_ones_end]
        day_ones_widget.value = day_ones
    elif year % 4 == 0 and month == 'February':  # then it's a leapyear
        day_ones_widget.options = numbers_list[day_ones_start:]
        day_ones_widget.value = day_ones
    elif month == 'February':
        day_ones_widget.options = numbers_list[day_ones_start:-1]
        day_ones_widget.value = day_ones
    
    else:
        day_ones_widget.options = numbers_list[day_ones_start:]
        day_ones_widget.value = day_ones
    day_ones = day_ones_widget.value
    day = (day_tens * 10) + day_ones
    date = f'{month} {day}, {year}'
    # print(date)
    date_display_widget.value = date
    return date
    

    
select_date_interactive_widget = widgets.interactive_output(select_date, {'month': month_widget,
                                                                          'day_tens': day_tens_widget,
                                                                          'day_ones': day_ones_widget,
                                                                          'year_tens': year_tens_widget,
                                                                          'year_ones': year_ones_widget}
                                                           )

# select_date_interactive_widget

# load_csv_button_widget = widgets.Button(description='Load CSV',
#                                         style={'description_width': 'initial'},
#                                        )

# load_csv_button_output_widget = widgets.Text(layout={'width': 'initial'})
    
# def on_button_clicked(b):
#     global metadata
#     # csv_path = Path(csv_path_display_widget.value)
#     try:
#         dataframe = pd.read_csv(csv_path_display_widget.value)
#     except FileNotFoundError:
#         load_csv_button_output_widget.value = 'No dataframe; CSV path invalid'
#         return
#     metadata = MetadataField(csv_path_display_widget.value)
#     number_of_rows, number_of_columns = dataframe.shape
#     load_csv_button_output_widget.value = f'{metadata.csv_path.name} loaded as dataframe with {metadata.number_of_rows} rows and {metadata.number_of_columns} columns'
#     return metadata

# load_csv_button_widget.on_click(on_button_clicked)

# def select_csv(identifier, metadata_field):
#     csv_name = f'{identifier}_{metadata_field}.csv'
#     csv_path = data_directory_path.joinpath(csv_name)
#     csv_path_display_widget.value = str(csv_path.resolve())
#     csv_path_exists_validity_widget.value = csv_path.is_file()
#     # print(f'Path to CSV: {csv_path}')
#     return csv_path


# csv_path_interactive_widget = widgets.interactive(select_csv, identifier=identifier_widget, metadata_field=metadata_field_widget)
# output_csv_path_widget = interactive(select_csv, 'identifier'=identifier_widget, 'metadata_field'=metadata_field_widget)
# csv_path_interactive_widget = widgets.interactive_output(select_csv, {'identifier': identifier_widget, 'metadata_field': metadata_field_widget})

In [108]:
# Select a title and metadata field to process

date_row_1_widgets = HBox([month_widget, day_tens_widget, day_ones_widget, year_tens_widget, year_ones_widget])

date_row_2_widgets = date_display_widget  # HBox([csv_path_display_widget])

# date_row_3_widgets = HBox([load_csv_button_widget, csv_path_exists_validity_widget])

# date_row_4_widgets = load_csv_button_output_widget

date_widget = VBox([date_row_1_widgets, date_row_2_widgets])#, date_row_3_widgets, date_row_4_widgets])
date_widget.layout.height = '300px'

In [None]:
crop_box = image_name_dataframe.loc[image_name_dataframe['date_guess_id'] == date_guess_id]['date_crop_box'].tolist()[0]

In [132]:
date_guesses = metadata.dataframe['date_guess'].tolist()
date_guesses

['13-Jul-25',
 nan,
 'July 4,',
 '11-Jul-21',
 '18-Jul-22',
 '16-Jul-22',
 'June 29 to July',
 'July 8 were red-',
 '25-Jul-21',
 nan,
 '8-Aug-21',
 '15-Aug-21',
 '15-Aug-21',
 'August 9,-10, 11 a',
 '22-Aug-21',
 '29-Aug-21',
 '5-Sep-21',
 '12-Sep-21',
 'September 19, 19.',
 nan,
 '3-Oct-21',
 '10-Oct-21',
 '10-Oct-21',
 '17-Oct-21',
 '17-Oct-21',
 'October 17, 1192',
 '25-Oct-21',
 '28-Oct-21',
 '31-Oct-21',
 '7-Nov-21',
 '14-Nov-21',
 'November 2 1, 1921',
 'Novemb er 2B, We',
 nan,
 '26-Dec-21',
 '28-Dec-21',
 nan,
 'December 2, “4921',
 '19-Dec-21',
 '26-Dec-21',
 nan,
 nan,
 '31-Jan-22',
 '6-Feb-22',
 '13-Feb-22',
 '13-Feb-22',
 '13-Feb-19',
 '20-Feb-22',
 'February 14-16 was a',
 nan,
 '27-Feb-22',
 '6-Mar-22',
 nan,
 '20-Mar-22',
 '27-Mar-22',
 '3-Apr-22',
 nan,
 '10-Apr-22',
 nan,
 '24-Apr-22',
 nan,
 '6-May-22',
 '15-May-22',
 'Mey 16-18',
 nan,
 '22-May-22',
 '29-May-22',
 '5-Jun-22',
 'June 12 » 2922',
 '19-Jun-22',
 '26-Jun-22',
 '3-Jul-22',
 '11-Jul-22',
 '17-Jul-22',
 '2

In [142]:
# fix date_guess column messed up by Excel
month_abbrev_dict = {'Jan': 'January',
                    'Feb': 'February',
                    'Mar': 'March',
                    'Apr': 'April',
                    'May': 'May',
                    'Jun': 'June',
                    'Jul': 'July',
                    'Aug': 'August',
                    'Sep': 'September',
                    'Oct': 'October',
                    'Nov': 'November',
                    'Dec': 'December'}

fixed_date_guesses = []
for date_guess in date_guesses:
    # print(date_guess)
    if isinstance(date_guess, float):  # then it's NaN, empty
        fixed_date_guesses.append(None)
    elif len(date_guess) == 8 or len(date_guess) == 9:  # correct length for date
        try:
            day, month, year = date_guess.split('-')
        except ValueError:
            fixed_date_guesses.append(date_guess)
            continue
        month = month_abbrev_dict[month]
        year = f'19{year}'
        date = f'{month} {day}, {year}'
        fixed_date_guesses.append(date)
    else:
        fixed_date_guesses.append(date_guess)
fixed_date_guesses

['July 13, 1925',
 None,
 'July 4,',
 'July 11, 1921',
 'July 18, 1922',
 'July 16, 1922',
 'June 29 to July',
 'July 8 were red-',
 'July 25, 1921',
 None,
 'August 8, 1921',
 'August 15, 1921',
 'August 15, 1921',
 'August 9,-10, 11 a',
 'August 22, 1921',
 'August 29, 1921',
 'September 5, 1921',
 'September 12, 1921',
 'September 19, 19.',
 None,
 'October 3, 1921',
 'October 10, 1921',
 'October 10, 1921',
 'October 17, 1921',
 'October 17, 1921',
 'October 17, 1192',
 'October 25, 1921',
 'October 28, 1921',
 'October 31, 1921',
 'November 7, 1921',
 'November 14, 1921',
 'November 2 1, 1921',
 'Novemb er 2B, We',
 None,
 'December 26, 1921',
 'December 28, 1921',
 None,
 'December 2, “4921',
 'December 19, 1921',
 'December 26, 1921',
 None,
 None,
 'January 31, 1922',
 'February 6, 1922',
 'February 13, 1922',
 'February 13, 1922',
 'February 13, 1919',
 'February 20, 1922',
 'February 14-16 was a',
 None,
 'February 27, 1922',
 'March 6, 1922',
 None,
 'March 20, 1922',
 'Marc

In [147]:
metadata.dataframe['date_guess'] = fixed_date_guesses

In [127]:
# process dates/images

# images currently on external hard drive
image_data_directory_path = Path('/Volumes/jmoor167/data/agrtfn/')

# get unique list of image names to process
image_names_list = metadata.dataframe['image_name'].unique().tolist()

for image_name in image_names_list:
    
    # get a dataframe for all rows with image_name
    image_name_dataframe = metadata.dataframe[metadata.dataframe['image_name'] == image_name]
    
    # set image_path and load image
    image_path = image_data_directory_path.joinpath(image_name)
    image = cv2.imread(str(image_path))
    
    image_resized, resize_ratio = resize(image, width=800)
    
    crop_box_dict = {}
    for index, date_guess_id in enumerate(image_name_dataframe['date_guess_id']):
        crop_box = image_name_dataframe.loc[image_name_dataframe['date_guess_id'] == date_guess_id]['date_crop_box'].tolist()[0]
        date_guess = image_name_dataframe.loc[image_name_dataframe['date_guess_id'] == date_guess_id]['date_guess'].tolist()[0]
        # print(crop_box)
        crop_box_dict[index] = [date_guess, crop_box]
    # print(crop_box_dict)
    for date_guess_id in crop_box_dict:
        date_guess, crop_box = crop_box_dict[date_guess_id]
        print(date_guess)

13-Jul-25
nan
July 4,
11-Jul-21
18-Jul-22
16-Jul-22
June 29 to July
July 8 were red-
25-Jul-21
nan
8-Aug-21
15-Aug-21
15-Aug-21
August 9,-10, 11 a
22-Aug-21
29-Aug-21
5-Sep-21
12-Sep-21
September 19, 19.
nan
3-Oct-21
10-Oct-21
10-Oct-21
17-Oct-21
17-Oct-21
October 17, 1192
25-Oct-21
25-Oct-21
31-Oct-21
7-Nov-21
14-Nov-21
November 2 1, 1921
Novemb er 2B, We
nan
26-Dec-21
26-Dec-21
nan
December 2, “4921
19-Dec-21
26-Dec-21
nan
nan
31-Jan-22
6-Feb-22
13-Feb-22
13-Feb-22
13-Feb-19
20-Feb-22
February 14-16 was a
nan
27-Feb-22
6-Mar-22
nan
20-Mar-22
27-Mar-22
3-Apr-22
nan
10-Apr-22
nan
24-Apr-22
nan
6-May-22
15-May-22
Mey 16-18
nan
22-May-22
29-May-22
5-Jun-22
June 12 » 2922
19-Jun-22
26-Jun-22
3-Jul-22
11-Jul-22
17-Jul-22
28-Jun-75
28-Jun-75
24-Jul-22
31-Jul-22
July 25-27 was p
7-Aug-22
nan
14-Aug-22
14-Aug-22
21-Aug-22
28-Aug-22
4-Sep-22
nan
11-Sep-19
11-Sep-22
nan
11-Sep-22
nan
18-Sep-19
nan
nan
18-Sep-22
25-Sep-22
2-Oct-22
nan
9-Oct-22
October 9 and the U:
nan
November 3,
16-Oct-22
nan
3

KeyboardInterrupt: 

In [None]:
date_widget