In [11]:
from io import StringIO
from pandas import read_csv, to_datetime

# how close do sessions have to be to be considered equal? (in minutes)
threshold = 5

# datetime column (combination of date + start_time)
dtc = [['date', 'start_time']]

# index column (above combination)
ixc = 'date_start_time'

df1 = read_csv(StringIO(u'''
date,start_time,employee_id,session_id
01/01/2016,02:03:00,7261824,871631182
01/01/2016,06:03:00,7261824,871631183
01/01/2016,11:01:00,7261824,871631184
01/01/2016,14:01:00,7261824,871631185
'''), parse_dates=dtc)

df2 = read_csv(StringIO(u'''
date,start_time,employee_id,session_id
01/01/2016,02:03:00,7261824,871631182
01/01/2016,06:05:00,7261824,871631183
01/01/2016,11:04:00,7261824,871631184
01/01/2016,14:10:00,7261824,871631185
'''), parse_dates=dtc)

In [12]:
df1

Unnamed: 0,date_start_time,employee_id,session_id
0,2016-01-01 02:03:00,7261824,871631182
1,2016-01-01 06:03:00,7261824,871631183
2,2016-01-01 11:01:00,7261824,871631184
3,2016-01-01 14:01:00,7261824,871631185


In [13]:
df2

Unnamed: 0,date_start_time,employee_id,session_id
0,2016-01-01 02:03:00,7261824,871631182
1,2016-01-01 06:05:00,7261824,871631183
2,2016-01-01 11:04:00,7261824,871631184
3,2016-01-01 14:10:00,7261824,871631185


In [15]:
import numpy as np

# half-threshold in nanoseconds
threshold_ns = threshold * 60 * 1e9

# compute "interval" to which each session belongs
df1['interval'] = to_datetime(np.round(df1.date_start_time.astype(np.int64) / threshold_ns) * threshold_ns)
df2['interval'] = to_datetime(np.round(df2.date_start_time.astype(np.int64) / threshold_ns) * threshold_ns)

# join
cols = ['interval', 'employee_id', 'session_id']
print (df1.merge(df2, on=cols, how='outer')[cols])

             interval  employee_id  session_id
0 2016-01-01 02:05:00      7261824   871631182
1 2016-01-01 06:05:00      7261824   871631183
2 2016-01-01 11:00:00      7261824   871631184
3 2016-01-01 14:00:00      7261824   871631185
4 2016-01-01 11:05:00      7261824   871631184
5 2016-01-01 14:10:00      7261824   871631185


In [16]:
df1

Unnamed: 0,date_start_time,employee_id,session_id,interval
0,2016-01-01 02:03:00,7261824,871631182,2016-01-01 02:05:00
1,2016-01-01 06:03:00,7261824,871631183,2016-01-01 06:05:00
2,2016-01-01 11:01:00,7261824,871631184,2016-01-01 11:00:00
3,2016-01-01 14:01:00,7261824,871631185,2016-01-01 14:00:00


In [17]:
df2

Unnamed: 0,date_start_time,employee_id,session_id,interval
0,2016-01-01 02:03:00,7261824,871631182,2016-01-01 02:05:00
1,2016-01-01 06:05:00,7261824,871631183,2016-01-01 06:05:00
2,2016-01-01 11:04:00,7261824,871631184,2016-01-01 11:05:00
3,2016-01-01 14:10:00,7261824,871631185,2016-01-01 14:10:00


In [18]:
df2.dtypes

date_start_time    datetime64[ns]
employee_id                 int64
session_id                  int64
interval           datetime64[ns]
dtype: object