In [1]:
from datetime import datetime, timedelta
import re

In [2]:
def get_date_from_line(s):
    """
    Return the first date contained in input string 's'
    as a datetime.date object.
    """
    match = re.search(r'\d{4}-\d{2}-\d{2}', s)
    date = datetime.strptime(match.group(), '%Y-%m-%d').date()
    return date

def validate_date(date_text):
    try:
        return datetime.strptime(date_text, '%Y-%m-%d').date()
    except ValueError:
        raise ValueError("Incorrect data format, should be YYYY-MM-DD")

In [3]:
testdates = [
    "x 2016-04-28",
    "x 2016-05-01",
    "x 2016-05-03",
    "x 2016-05-31",
    "x 2016-06-03",
    "x 2016-06-05",
    "x 2016-06-29",
    "x 2016-07-05",
    "x 2016-07-15",
    "x 2016-07-16",
    "x 2016-07-17"
]

def print_testdates():
    for index, s in enumerate(testdates):
        print(index, s)

In [4]:
def get_start_and_end_dates_indices(lines, startdate, enddate):
    """
    Return index of the first occurrence of startdate and index of 
    the last occurrence of enddate in the array of strings, `lines`.
    
    Arguments:
        lines - array of strings. Somewhere in each string is a date
                with format YYYY-MM-DD. Assume `lines` is sorted in
                ASCENDING date order (i.e., oldest date first and newest
                date last).
        startdate, enddate - datetime object for DATES (not both date and time)
    """
    if startdate > enddate:
        raise RuntimeError("start date is after end date")
    index_start_date = -999
    for index, line in enumerate(lines):
        temp_date = get_date_from_line(line)
        if index_start_date == -999:
            if temp_date >= startdate:
                index_start_date = index
                index_end_date = index
        else:
            if temp_date > enddate:
                index_end_date = index - 1
                break
            elif index == len(lines) - 1:
                index_end_date = -1

    return index_start_date, index_end_date

In [5]:
def test_function(start, end):
    startdate = validate_date(start)
    enddate = validate_date(end)

    tempstart, tempend = get_start_and_end_dates_indices(testdates, startdate, enddate)

    print_testdates()
    print("----- Date Range: {} to {} -----".format(startdate, enddate))
    print(tempstart, testdates[tempstart])
    print(tempend, testdates[tempend])

In [6]:
test_function("2016-06-01", "2016-05-31")

RuntimeError: start date is after end date

In [7]:
test_function("2016-05-01", "2016-05-31")

0 x 2016-04-28
1 x 2016-05-01
2 x 2016-05-03
3 x 2016-05-31
4 x 2016-06-03
5 x 2016-06-05
6 x 2016-06-29
7 x 2016-07-05
8 x 2016-07-15
9 x 2016-07-16
10 x 2016-07-17
----- Date Range: 2016-05-01 to 2016-05-31 -----
1 x 2016-05-01
3 x 2016-05-31


In [8]:
test_function("2016-04-20", "2016-05-05")

0 x 2016-04-28
1 x 2016-05-01
2 x 2016-05-03
3 x 2016-05-31
4 x 2016-06-03
5 x 2016-06-05
6 x 2016-06-29
7 x 2016-07-05
8 x 2016-07-15
9 x 2016-07-16
10 x 2016-07-17
----- Date Range: 2016-04-20 to 2016-05-05 -----
0 x 2016-04-28
2 x 2016-05-03


In [9]:
test_function("2016-06-01", "2016-06-30")

0 x 2016-04-28
1 x 2016-05-01
2 x 2016-05-03
3 x 2016-05-31
4 x 2016-06-03
5 x 2016-06-05
6 x 2016-06-29
7 x 2016-07-05
8 x 2016-07-15
9 x 2016-07-16
10 x 2016-07-17
----- Date Range: 2016-06-01 to 2016-06-30 -----
4 x 2016-06-03
6 x 2016-06-29


In [10]:
test_function("2016-07-06", "2016-07-16")

0 x 2016-04-28
1 x 2016-05-01
2 x 2016-05-03
3 x 2016-05-31
4 x 2016-06-03
5 x 2016-06-05
6 x 2016-06-29
7 x 2016-07-05
8 x 2016-07-15
9 x 2016-07-16
10 x 2016-07-17
----- Date Range: 2016-07-06 to 2016-07-16 -----
8 x 2016-07-15
9 x 2016-07-16


In [11]:
test_function("2016-07-06", "2016-07-15")

0 x 2016-04-28
1 x 2016-05-01
2 x 2016-05-03
3 x 2016-05-31
4 x 2016-06-03
5 x 2016-06-05
6 x 2016-06-29
7 x 2016-07-05
8 x 2016-07-15
9 x 2016-07-16
10 x 2016-07-17
----- Date Range: 2016-07-06 to 2016-07-15 -----
8 x 2016-07-15
8 x 2016-07-15


In [12]:
test_function("2016-06-20", "2016-07-15")

0 x 2016-04-28
1 x 2016-05-01
2 x 2016-05-03
3 x 2016-05-31
4 x 2016-06-03
5 x 2016-06-05
6 x 2016-06-29
7 x 2016-07-05
8 x 2016-07-15
9 x 2016-07-16
10 x 2016-07-17
----- Date Range: 2016-06-20 to 2016-07-15 -----
6 x 2016-06-29
8 x 2016-07-15
