In [2]:
import pandas as pd
import numpy as np
import time
from datetime import datetime
import altair as alt
import pandas as pd
import json
import itertools as it

In [3]:
ds = pd.read_csv('S1SubActivities_preprocessed.csv', index_col = None)
ds.start = pd.to_datetime(ds.start, format='%Y-%m-%d %H:%M:%S')
ds.end = pd.to_datetime(ds.end, format='%Y-%m-%d %H:%M:%S')
ds = ds.sort_values('start')
ds.reset_index(drop = True, inplace = True)

In [4]:
ds = ds[:100]
ds.head(5)

Unnamed: 0,subActNum,subAct,start,end
0,67,Cabinet,2003-03-27 06:43:40,2003-03-27 06:43:43
1,100,Toilet Flush,2003-03-27 06:44:06,2003-03-27 07:12:41
2,101,Light switch,2003-03-27 06:44:20,2003-03-27 07:46:34
3,57,Medicine cabinet,2003-03-27 06:44:35,2003-03-27 06:44:48
4,58,Medicine cabinet,2003-03-27 06:44:36,2003-03-27 06:44:48


# Problem Statement

**If start EVENTA between X and Y on a WD/WE the PR that I will start EVENTB within Z minutes is Q.**

>_If I switch on the bathroom light switch between 6am and 7am on a weekday, the probability that I will use my razor is X._

## Relationships

### Intersection
<pre>
|----EventA----|  <br>
            |---EventB---|   
</pre>

### Separate
<pre>
|----EventA----|  <br>
                    |---EventB---|   
</pre>

### Enclosed
<pre>
|----------EventA----------|  <br>
           |---EventB---|   
</pre>

### Equal
<pre>
       |---EventA---|  <br>
       |---EventB---|   
</pre>

### More?

---
## Pr(EventA delta eventB)

### Delta Negative
<pre>
|----EventA----|  <br>
                -Gap- <br>
                     |---EventB---|   
</pre>

### Delta Positive
<pre>
|------EventA------|  <br>
            -Union-   <br>
            |---EventB---|   
</pre>

### Delta Zero
<pre>
|--EventA--|  <br>

           |------EventB------|   
</pre>

### Equal Start
<pre>
|--EventA--|  <br>

|------EventB------|   
</pre>

**Input A:** S1SubActivities_preprocessed.csv

| Event    | EventStart | EventEnd   |        
|:--------:|:--------:  | :--------: |        
| 100      | dateTime   | dateTime   |        
| 101      | dateTime   | dateTime   |        
| 104      | dateTime   | dateTime   |  
| 105      | dateTime   | dateTime   |  

**Input B:** dsCombin2n

| EventA   | EventB  | 
|:--------:|:-------:| 
| 100      | 101     | 
| 100      | 104     | 
| 100      | 105     | 
| 100      | 106     | 

**Function:** def id_delta(InputA, InputB)  <br>
**Output:**

| EventA   | EventB   |  Ev.A Start | Ev.A End | Ev.B Start | Ev.B End | Delta    | Descriptor |
|:--------:|:--------:| :--------:  | :------: |:--------:  | :------: | :------: | :------:   |
| 100      | 101      |  dateTime   | dateTime | dateTime   | dateTime | -4       | Gap        |
| 100      | 104      |  dateTime   | dateTime | dateTime   | dateTime | 92       | Union      |
| 100      | 105      |  dateTime   | dateTime | dateTime   | dateTime | 0        | Zero       | 
| 100      | 106      |  dateTime   | dateTime | dateTime   | dateTime | ???      | EqualStart | 

**Function:** def add_temporalFeatures(): <br>
**Output:**

| EventA| EventB| Ev.A Start| Ev.A End| Ev.B Start|Ev.B End|Delta   | Descriptor | WendWday|Hour  |TimeofDay|
|:-----:|:-----:|:--------: | :------:|:--------: |:------:|:------:| :------:   | :------:|:----:|:------: |
| 100   | 101   | dateTime  | dateTime| dateTime  |dateTime|-4      | Gap        | weekday | 6:00 |6Till8   |
| 100   | 104   | dateTime  | dateTime| dateTime  |dateTime|92      | Union      | weekend |12:00 |12Till14 |
| 100   | 105   | dateTime  | dateTime| dateTime  |dateTime|0       | Zero       | weekday |15:00 |15Till17 |
| 100   | 106   | dateTime  | dateTime| dateTime  |dateTime|???     | EqualStart | weekday |15:00 |15Till17 |

### def id_delta(InputA, InputB)

* WHERE event A is followed/ union/ gap (first )  by event B
* IF (event A end < event B start)
* SUM (event B start - event A end) = delta
* Add delta attribute to df
* RETURN df

---

1. Equal Start
2. Delta positive (the highest)
3. Delta Zero (the first)
4. Delta Negative (closest to zero)


And nearest 5 

In [6]:
import datetime as dt

def id_delta(events, n=1, delta_threshold=dt.timedelta(-99)):
    nns = []
    for row in events.itertuples():
        #print(row)
        start_time = getattr(row, 'start')
        end_time = getattr(row, 'end')
        subActNum = getattr(row, 'subActNum')
        row_index = getattr(row, 'Index')
        
        nn = events[(events.start >= start_time) & 
                    (events.index != row_index) & 
                    ((start_time - events.start) > delta_threshold)][:n]
        #print(len(nn))
        ordered = pd.DataFrame()
        ordered['Dummy'] = nn['subActNum']
        ordered['EventA'] = subActNum
        ordered['EventB'] = nn['subActNum']
        ordered['EvA_Start'] = start_time
        ordered['EvB_Start'] = nn['start']
        ordered['EvA_End'] = end_time
        ordered['EvB_End'] = nn['end']
        del ordered['Dummy']
        nns.append(ordered)
  
    #print(nns)
    result = pd.concat(nns)

    result['Delta'] = np.where(result['EvA_Start']==result['EvB_Start'], 
                               None, 
                               (result['EvA_End'] - result['EvB_Start']) / 1000000000)
    return result

In [7]:
id_delta(ds, 1, dt.timedelta(0,-25))

Unnamed: 0,EventA,EventB,EvA_Start,EvB_Start,EvA_End,EvB_End,Delta
2,100,101,2003-03-27 06:44:06,2003-03-27 06:44:20,2003-03-27 07:12:41,2003-03-27 07:46:34,1701.0
3,101,57,2003-03-27 06:44:20,2003-03-27 06:44:35,2003-03-27 07:46:34,2003-03-27 06:44:48,3719.0
4,57,58,2003-03-27 06:44:35,2003-03-27 06:44:36,2003-03-27 06:44:48,2003-03-27 06:44:48,12.0
5,58,67,2003-03-27 06:44:36,2003-03-27 06:44:49,2003-03-27 06:44:48,2003-03-27 06:44:56,-1.0
10,143,55,2003-03-27 06:54:09,2003-03-27 06:54:16,2003-03-27 13:07:43,2003-03-27 06:54:19,22407.0
14,93,72,2003-03-27 07:05:22,2003-03-27 07:05:39,2003-03-27 07:05:24,2003-03-27 07:05:57,-15.0
19,82,75,2003-03-27 07:35:27,2003-03-27 07:35:50,2003-03-27 07:35:34,2003-03-27 07:35:56,-16.0
23,84,84,2003-03-27 07:38:48,2003-03-27 07:38:48,2003-03-27 07:38:51,2003-03-27 07:38:51,
22,84,84,2003-03-27 07:38:48,2003-03-27 07:38:48,2003-03-27 07:38:51,2003-03-27 07:38:51,
25,72,73,2003-03-27 07:39:07,2003-03-27 07:39:14,2003-03-27 07:40:27,2003-03-27 07:39:17,73.0


In [None]:
ds_12n_900s = id_delta(ds, 12, dt.timedelta(0,-900))

In [None]:
ds_1n_10s = id_delta(ds, 5, dt.timedelta(0,-200))

In [12]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.sankey as sankey

In [1]:
import plotly.graph_objects as go

ModuleNotFoundError: No module named 'plotly.graph_objects'

ModuleNotFoundError: No module named 'pysankey'

* 24till2
* 3till5
* 6till8
* 9till11
* 12till14
* 15till17
* 18till20
* 21till23