In [1]:
import numpy as np
import pandas as pd

# Filter Holes for One Detector Station

## What is a "hole"?

A hole is defined as a discontinuity in time between two adjacent records.  For instance, if record $n$ is at time $t$, and the next record $n+1$ is not at time $t\ + \ 5\ min$, then a hole occurs after record $n$, or time $t$.

Up to now we've found two kinds of holes:
1. Holes between two days (whole days are absent in the dataset);
2. Holes occurring in one single day.

The first kind of holes does not, in general, affect the integrity of time series inference in one single day, because if one day is absent in the whole dataset, we can use other days to train the model.

The second kind of holes is a little bit tricky, because in this way it's difficult to use the day's data to train the dataset.  One possible solution is to interpolate intermediate points.

In [2]:
concat = pd.read_csv("data/I88N-processed/concat_1.csv")

In [5]:
stations = concat["Station ID"].unique()

In [6]:
stations

array([408907, 400951, 400057, 400147, 400343, 401560, 400045, 400122,
       401541, 402281, 402283, 402285, 402286, 400088, 402288, 413026,
       401464, 401489, 401538, 402290, 402292, 401643, 402800, 402828,
       407219, 402789, 408755, 402802, 408756, 400189, 400309, 400417,
       400249, 401639, 400662])

In [16]:
concat.head(2)

Unnamed: 0.1,Unnamed: 0,Station ID,Date,Time,Occupancy,Flow,Speed
0,0,408907,2017-01-01,00:00:00,2.1,95.0,68.8
1,1,408907,2017-01-01,00:05:00,2.0,91.0,68.4


In [17]:
for station in stations:
    print(str(station) + ": " + str(len(concat.loc[concat["Station ID"] == station]["Station ID"].values)))

408907: 101642
400951: 101643
400057: 101643
400147: 101645
400343: 101645
401560: 101648
400045: 101650
400122: 101649
401541: 101647
402281: 101647
402283: 101649
402285: 101647
402286: 101649
400088: 101650
402288: 101645
413026: 101645
401464: 101646
401489: 101647
401538: 101648
402290: 101648
402292: 101645
401643: 101646
402800: 101650
402828: 101645
407219: 101643
402789: 101645
408755: 101642
402802: 101648
408756: 101644
400189: 101648
400309: 101649
400417: 101649
400249: 101648
401639: 101647
400662: 101646


In [138]:
concat["datetime"] = pd.to_datetime(concat["Date"] + ' ' + concat["Time"])

In [144]:
concat_1.head(3)

Unnamed: 0,Station ID,Date,Time,Occupancy,Flow,Speed,datetime,idx
0,408907,2017-01-01,00:00:00,2.1,95.0,68.8,2017-01-01 00:00:00,0
1,408907,2017-01-01,00:05:00,2.0,91.0,68.4,2017-01-01 00:05:00,1
2,408907,2017-01-01,00:10:00,2.0,91.0,68.4,2017-01-01 00:10:00,2


In [42]:
df_1 = concat.loc[concat["Station ID"] == 408907]

In [46]:
# important
df_1["datetime"] = pd.to_datetime(df_1["Date"] + ' ' + df_1["Time"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [50]:
datetime_0 = df_1["datetime"].values[0]
datetime_1 = df_1["datetime"].values[1]

In [51]:
datetime_0 + np.timedelta64(5, 'm') == datetime_1

True

In [55]:
datetimes = df_1["datetime"].values

In [57]:
datetimes.dtype

dtype('<M8[ns]')

In [59]:
prev_datetime = None
for datetime in df_1["datetime"].values:
    if prev_datetime is not None:
        if datetime != prev_datetime + np.timedelta64(5, 'm'):
            prev_date = pd.Timestamp(prev_datetime).date()
            current_date = pd.Timestamp(datetime).date()
            if prev_date != current_date:
                print(prev_date, current_date)
            else:
                print(prev_datetime, datetime)
    prev_datetime = datetime

2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-04-20T01:35:00.000000000 2017-04-20T01:45:00.000000000
2017-05-05T19:50:00.000000000 2017-05-05T20:00:00.000000000
2017-05-28 2017-05-30
2017-06-15T00:55:00.000000000 2017-06-15T01:05:00.000000000
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-07-09T03:25:00.000000000 2017-07-09T03:35:00.000000000
2017-07-30T01:05:00.000000000 2017-07-30T01:15:00.000000000
2017-08-22T00:40:00.000000000 2017-08-22T00:50:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-17T02:40:00.000000000 2017-10-17T02:50:00.000000000
2017-11-09 2017-11-11
2017-11-12T06:00:00.000000000 2017-11-12T06:10:00.000000000
2017-11-22 2017-11-25
2017-12-04T12:20:00.000000000 2017-12-04T12:30:00.000000000
2017-12-24 2017-12-26


In [61]:
sampleStations = [408907]

In [129]:
stations_1 = concat["Station ID"].unique()
output_str = ""
for station in stations_1:
    print(station)
    output_str += str(station) + "\n"
    substr_date = ""
    substr_min = ""
    df = concat.loc[concat["Station ID"] == station]
    prevRow = None
    prev_datetime = None
    for datetime in df["datetime"].values:
        if prev_datetime is not None:
            if datetime != prev_datetime + np.timedelta64(5, 'm'):
                prev_date = pd.Timestamp(prev_datetime).date()
                current_date = pd.Timestamp(datetime).date()
                if prev_date != current_date:
                    substr_date += str(prev_date) + " " + str(current_date) + "\n"
                    print(prev_date, current_date)
                else:
                    substr_min += str(prev_datetime) + " " + str(datetime) + "\n"
                    print(prev_datetime, datetime)
        prev_datetime = datetime
    output_str += "date:\n" + substr_date + "\nmin:\n" + substr_min + "\n"
with open("data/I88N-processed/holes_1.txt", "w") as text_file:
    print(output_str, file=text_file)

408907


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-04-20T01:35:00.000000000 2017-04-20T01:45:00.000000000
2017-05-05T19:50:00.000000000 2017-05-05T20:00:00.000000000
2017-05-28 2017-05-30
2017-06-15T00:55:00.000000000 2017-06-15T01:05:00.000000000
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-07-09T03:25:00.000000000 2017-07-09T03:35:00.000000000
2017-07-30T01:05:00.000000000 2017-07-30T01:15:00.000000000
2017-08-22T00:40:00.000000000 2017-08-22T00:50:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-17T02:40:00.000000000 2017-10-17T02:50:00.000000000
2017-11-09 2017-11-11
2017-11-12T06:00:00.000000000 2017-11-12T06:10:00.000000000
2017-11-22 2017-11-25
2017-12-04T12:20:00.000000000 2017-12-04T12:30:00.000000000
2017-12-24 2017-12-26
400951
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:

2017-10-08 2017-10-10
2017-11-09 2017-11-11
2017-11-22 2017-11-25
2017-12-24 2017-12-26
2017-12-26T22:40:00.000000000 2017-12-26T22:50:00.000000000
402288
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-05T19:50:00.000000000 2017-05-05T20:00:00.000000000
2017-05-28 2017-05-30
2017-06-15T00:55:00.000000000 2017-06-15T01:05:00.000000000
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-07-09T03:25:00.000000000 2017-07-09T03:35:00.000000000
2017-08-22T00:40:00.000000000 2017-08-22T00:50:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-11-09 2017-11-11
2017-11-12T06:00:00.000000000 2017-11-12T06:10:00.000000000
2017-11-22 2017-11-25
2017-12-04T12:20:00.000000000 2017-12-04T12:30:00.000000000
2017-12-24 2017-12-26
413026
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-05T1

2017-10-08 2017-10-10
2017-10-17T02:40:00.000000000 2017-10-17T02:50:00.000000000
2017-10-18T18:40:00.000000000 2017-10-18T18:50:00.000000000
2017-11-09 2017-11-11
2017-11-12T06:00:00.000000000 2017-11-12T06:10:00.000000000
2017-11-22 2017-11-25
2017-12-24 2017-12-26
2017-12-26T22:40:00.000000000 2017-12-26T22:50:00.000000000
402802
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-28 2017-05-30
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-07-09T03:25:00.000000000 2017-07-09T03:35:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-18T18:40:00.000000000 2017-10-18T18:50:00.000000000
2017-11-09 2017-11-11
2017-11-12T06:00:00.000000000 2017-11-12T06:10:00.000000000
2017-11-22 2017-11-25
2017-12-24 2017-12-26
408756
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-28 2

## Concatenate the Other Two Sets

### Concatenate Set 2

In [89]:
flow_speed_2 = pd.read_csv("data/I88N/flow_speed2.csv", error_bad_lines=False)

b'Skipping line 2192864: expected 6 fields, saw 7\n'


In [88]:
occ_2 = pd.read_csv("data/I88N/occ2.csv")

In [98]:
flow_speed_2.iloc[2192863]

Station ID                        408133
5 Minutes               07/27/2017 08:05
Flow (Veh/5 Minutes)                 722
Speed (mph)                         68.1
# Lane Points                          5
% Observed                            80
Name: 2192863, dtype: object

In [97]:
occ_2.iloc[2192865]

Station ID                 408133
5 Minutes        07/27/2017 08:10
Occupancy (%)                 9.4
# Lane Points                   5
% Observed                     80
Name: 2192865, dtype: object

In [99]:
size_2 = len(flow_speed_2["Station ID"].values)

In [100]:
size_2

3491445

In [101]:
len(occ_2["Station ID"].values)

3491446

In [None]:
# do not run this twice
occ_2.drop([2192862], axis=0, inplace=True)

In [110]:
datetime_sf = flow_speed_2["5 Minutes"].values
datetime_occ = occ_2["5 Minutes"].values

In [111]:
for i, dt in enumerate(datetime_sf):
    if dt != datetime_occ[i]:
        print(i, dt, datetime_occ[i])

In [113]:
concat_2 = pd.DataFrame({
    'Station ID': flow_speed_2["Station ID"].values,
    'datetime': flow_speed_2["5 Minutes"].values,
    'Occupancy': occ_2["Occupancy (%)"].values,
    'Flow': flow_speed_2["Flow (Veh/5 Minutes)"].values,
    'Speed': flow_speed_2["Speed (mph)"].values,
})

In [114]:
concat_2.head(3)

Unnamed: 0,Station ID,datetime,Occupancy,Flow,Speed
0,400141,01/01/2017 00:00,2.4,161.0,68.0
1,400141,01/01/2017 00:05,2.4,153.0,68.1
2,400141,01/01/2017 00:10,2.4,161.0,68.1


In [115]:
concat_2["Date"] = concat_2["datetime"].apply(lambda x: x.split(' ')[0])
concat_2["Time"] = concat_2["datetime"].apply(lambda x: x.split(' ')[1])

In [116]:
concat_2["idx"] = np.arange(size_2)

In [117]:
concat_2.head(3)

Unnamed: 0,Station ID,datetime,Occupancy,Flow,Speed,Date,Time,idx
0,400141,01/01/2017 00:00,2.4,161.0,68.0,01/01/2017,00:00,0
1,400141,01/01/2017 00:05,2.4,153.0,68.1,01/01/2017,00:05,1
2,400141,01/01/2017 00:10,2.4,161.0,68.1,01/01/2017,00:10,2


In [122]:
concat_2["datetime"] = pd.to_datetime(concat_2["datetime"])

In [118]:
concat_2.to_csv("data/I88N-processed/concat_2.csv")

### Concatenate Set 3

In [71]:
flow_speed_3 = pd.read_csv("data/I88N/flow_speed3.csv")
occ_3 = pd.read_csv("data/I88N/occ3.csv")

In [72]:
flow_speed_3.head(3)

Unnamed: 0,Station ID,5 Minutes,Flow (Veh/5 Minutes),Speed (mph),# Lane Points,% Observed
0,400515,01/01/2017 00:00,283.0,69.8,6,0.0
1,400515,01/01/2017 00:05,283.0,69.8,6,0.0
2,400515,01/01/2017 00:10,271.0,69.7,6,0.0


In [73]:
occ_3.head(3)

Unnamed: 0,Station ID,5 Minutes,Occupancy (%),# Lane Points,% Observed
0,400515,01/01/2017 00:00,3.3,6,0.0
1,400515,01/01/2017 00:05,3.3,6,0.0
2,400515,01/01/2017 00:10,3.1,6,0.0


In [74]:
concat_3 = pd.DataFrame({
    'Station ID': flow_speed_3["Station ID"].values,
    'datetime': flow_speed_3["5 Minutes"].values,
    'Occupancy': occ_3["Occupancy (%)"].values,
    'Flow': flow_speed_3["Flow (Veh/5 Minutes)"].values,
    'Speed': flow_speed_3["Speed (mph)"].values,
})

In [77]:
concat_3.head(3)

Unnamed: 0,Station ID,datetime,Occupancy,Flow,Speed
0,400515,01/01/2017 00:00,3.3,283.0,69.8
1,400515,01/01/2017 00:05,3.3,283.0,69.8
2,400515,01/01/2017 00:10,3.1,271.0,69.7


In [80]:
concat_3["Date"] = concat_3["datetime"].apply(lambda x: x.split(' ')[0])
concat_3["Time"] = concat_3["datetime"].apply(lambda x: x.split(' ')[1])

In [82]:
size_3 = len(concat_3["Station ID"].values)

In [84]:
concat_3["idx"] = np.arange(size_3)

In [121]:
concat_3["datetime"] = pd.to_datetime(concat_3["datetime"])

In [86]:
concat_3.to_csv("data/I88N-processed/concat_3.csv")

## Output: Holes in the Two Concatenated Files

In [134]:
output_str = ""
stations_2 = concat_2["Station ID"].unique()
for station in stations_2:
    print(station)
    output_str += str(station) + "\n"
    substr_date = ""
    substr_min = ""
    df = concat_2.loc[concat_2["Station ID"] == station]
    prevRow = None
    prev_datetime = None
    for datetime in df["datetime"].values:
        if prev_datetime is not None:
            if datetime != prev_datetime + np.timedelta64(5, 'm'):
                prev_date = pd.Timestamp(prev_datetime).date()
                current_date = pd.Timestamp(datetime).date()
                if prev_date != current_date:
                    substr_date += str(prev_date) + " " + str(current_date) + "\n"
                    print(prev_date, current_date)
                else:
                    substr_min += str(prev_datetime) + " " + str(datetime) + "\n"
                    print(prev_datetime, datetime)
        prev_datetime = datetime
    output_str += "date:\n" + substr_date + "\nmin:\n" + substr_min + "\n"
with open("data/I88N-processed/holes_2.txt", "w") as text_file:
    print(output_str, file=text_file)

400141
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-28 2017-05-30
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-08-22T00:40:00.000000000 2017-08-22T00:50:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-17T02:40:00.000000000 2017-10-17T02:50:00.000000000
2017-10-18T18:40:00.000000000 2017-10-18T18:50:00.000000000
2017-11-09 2017-11-11
2017-11-22 2017-11-25
2017-12-24 2017-12-26
400761
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-28 2017-05-30
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-07-09T03:25:00.000000000 2017-07-09T03:35:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-17T02:40:00.000000000 2017-10-17T02:50:00.000000000
2017-10-18T18:40:00.000000000 2017-10-18T18:50:00.000000000
2017-11-09 2

2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-07-09T03:25:00.000000000 2017-07-09T03:35:00.000000000
2017-08-22T00:40:00.000000000 2017-08-22T00:50:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-17T02:40:00.000000000 2017-10-17T02:50:00.000000000
2017-11-09 2017-11-11
2017-11-22 2017-11-25
2017-12-24 2017-12-26
401561
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-28 2017-05-30
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-18T18:40:00.000000000 2017-10-18T18:50:00.000000000
2017-11-09 2017-11-11
2017-11-22 2017-11-25
2017-12-04T12:20:00.000000000 2017-12-04T12:30:00.000000000
2017-12-24 2017-12-26
400611
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-28 2017-05-30
2017-0

2017-10-18T18:40:00.000000000 2017-10-18T18:50:00.000000000
2017-11-09 2017-11-11
2017-11-22 2017-11-25
2017-12-04T12:20:00.000000000 2017-12-04T12:30:00.000000000
2017-12-24 2017-12-26
400275
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-28 2017-05-30
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-17T02:40:00.000000000 2017-10-17T02:50:00.000000000
2017-10-18T18:40:00.000000000 2017-10-18T18:50:00.000000000
2017-11-09 2017-11-11
2017-11-22 2017-11-25
2017-12-24 2017-12-26
400939
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-28 2017-05-30
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-17T02:40:00.000000000 2017-10-17T02:50:00.000000000
2017-1

In [132]:
stations_3 = concat_3["Station ID"].unique()
output_str = ""
for station in stations_3:
    print(station)
    output_str += str(station) + "\n"
    substr_date = ""
    substr_min = ""
    df = concat_3.loc[concat_3["Station ID"] == station]
    prevRow = None
    prev_datetime = None
    for datetime in df["datetime"].values:
        if prev_datetime is not None:
            if datetime != prev_datetime + np.timedelta64(5, 'm'):
                prev_date = pd.Timestamp(prev_datetime).date()
                current_date = pd.Timestamp(datetime).date()
                if prev_date != current_date:
                    substr_date += str(prev_date) + " " + str(current_date) + "\n"
                    print(prev_date, current_date)
                else:
                    substr_min += str(prev_datetime) + " " + str(datetime) + "\n"
                    print(prev_datetime, datetime)
        prev_datetime = datetime
    output_str += "date:\n" + substr_date + "\nmin:\n" + substr_min + "\n"
with open("data/I88N-processed/holes_3.txt", "w") as text_file:
    print(output_str, file=text_file)

400515
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-28 2017-05-30
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-07-09T03:25:00.000000000 2017-07-09T03:35:00.000000000
2017-08-22T00:40:00.000000000 2017-08-22T00:50:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-17T02:40:00.000000000 2017-10-17T02:50:00.000000000
2017-11-09 2017-11-11
2017-11-22 2017-11-25
2017-12-24 2017-12-26
400252
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-28 2017-05-30
2017-07-03 2017-07-05
2017-07-09T03:25:00.000000000 2017-07-09T03:35:00.000000000
2017-08-22T00:40:00.000000000 2017-08-22T00:50:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-17T02:40:00.000000000 2017-10-17T02:50:00.000000000
2017-11-09 2017-11-11
2017-11-22 2017-11-25
2017-12-04T12:20:00.00000000

2017-12-24 2017-12-26
400949
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-05T19:50:00.000000000 2017-05-05T20:00:00.000000000
2017-05-28 2017-05-30
2017-06-15T00:55:00.000000000 2017-06-15T01:05:00.000000000
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-07-30T01:05:00.000000000 2017-07-30T01:15:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-17T02:40:00.000000000 2017-10-17T02:50:00.000000000
2017-10-18T18:40:00.000000000 2017-10-18T18:50:00.000000000
2017-11-09 2017-11-11
2017-11-12T06:00:00.000000000 2017-11-12T06:10:00.000000000
2017-11-22 2017-11-25
2017-12-24 2017-12-26
400678
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-28 2017-05-30
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-07-09T03:25:00.000000000 2017

2017-05-28 2017-05-30
2017-07-03 2017-07-05
2017-07-09T03:25:00.000000000 2017-07-09T03:35:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-17T02:40:00.000000000 2017-10-17T02:50:00.000000000
2017-10-18T18:40:00.000000000 2017-10-18T18:50:00.000000000
2017-11-09 2017-11-11
2017-11-22 2017-11-25
2017-12-24 2017-12-26
401143
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-04-20T01:35:00.000000000 2017-04-20T01:45:00.000000000
2017-05-28 2017-05-30
2017-07-03 2017-07-05
2017-07-09T03:25:00.000000000 2017-07-09T03:35:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-18T18:40:00.000000000 2017-10-18T18:50:00.000000000
2017-11-09 2017-11-11
2017-11-22 2017-11-25
2017-12-24 2017-12-26
2017-12-26T22:40:00.000000000 2017-12-26T22:50:00.000000000
401471
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-0

In [135]:
import util as u

In [145]:
u.filter_holes(concat_1, "data/I88N-processed/holes_1.txt")

408907
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-04-20T01:35:00.000000000 2017-04-20T01:45:00.000000000
2017-05-05T19:50:00.000000000 2017-05-05T20:00:00.000000000
2017-05-28 2017-05-30
2017-06-15T00:55:00.000000000 2017-06-15T01:05:00.000000000
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-07-09T03:25:00.000000000 2017-07-09T03:35:00.000000000
2017-07-30T01:05:00.000000000 2017-07-30T01:15:00.000000000
2017-08-22T00:40:00.000000000 2017-08-22T00:50:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-17T02:40:00.000000000 2017-10-17T02:50:00.000000000
2017-11-09 2017-11-11
2017-11-12T06:00:00.000000000 2017-11-12T06:10:00.000000000
2017-11-22 2017-11-25
2017-12-04T12:20:00.000000000 2017-12-04T12:30:00.000000000
2017-12-24 2017-12-26
400951
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12

2017-07-03 2017-07-05
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-11-09 2017-11-11
2017-11-22 2017-11-25
2017-12-24 2017-12-26
2017-12-26T22:40:00.000000000 2017-12-26T22:50:00.000000000
402288
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-05T19:50:00.000000000 2017-05-05T20:00:00.000000000
2017-05-28 2017-05-30
2017-06-15T00:55:00.000000000 2017-06-15T01:05:00.000000000
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-07-09T03:25:00.000000000 2017-07-09T03:35:00.000000000
2017-08-22T00:40:00.000000000 2017-08-22T00:50:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-11-09 2017-11-11
2017-11-12T06:00:00.000000000 2017-11-12T06:10:00.000000000
2017-11-22 2017-11-25
2017-12-04T12:20:00.000000000 2017-12-04T12:30:00.000000000
2017-12-24 2017-12-26
413026
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.00000000

2017-07-30T01:05:00.000000000 2017-07-30T01:15:00.000000000
2017-08-22T00:40:00.000000000 2017-08-22T00:50:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-17T02:40:00.000000000 2017-10-17T02:50:00.000000000
2017-10-18T18:40:00.000000000 2017-10-18T18:50:00.000000000
2017-11-09 2017-11-11
2017-11-12T06:00:00.000000000 2017-11-12T06:10:00.000000000
2017-11-22 2017-11-25
2017-12-24 2017-12-26
2017-12-26T22:40:00.000000000 2017-12-26T22:50:00.000000000
402802
2017-01-01 2017-01-03
2017-01-15 2017-01-17
2017-02-19 2017-02-21
2017-03-12T01:55:00.000000000 2017-03-12T03:00:00.000000000
2017-05-28 2017-05-30
2017-07-01T02:55:00.000000000 2017-07-01T03:05:00.000000000
2017-07-03 2017-07-05
2017-07-09T03:25:00.000000000 2017-07-09T03:35:00.000000000
2017-09-03 2017-09-05
2017-10-08 2017-10-10
2017-10-18T18:40:00.000000000 2017-10-18T18:50:00.000000000
2017-11-09 2017-11-11
2017-11-12T06:00:00.000000000 2017-11-12T06:10:00.000000000
2017-11-22 2017-11-25
2017-12-24 2017-12-26
408