# ndn-compute Demo

### Step 1: Initialize a client 
(assuming the cluster is up)

In [1]:
from ndn_compute_client import NdnComputeClient
client = NdnComputeClient('http://localhost:5214')

### Step 2: Obtain the dataset

#### 1 million lines of logs of an e-commerce site

In [2]:
dataset = client.create_dataset("appB/events.log.jsonl")

### Step 3: Transform the dataset
#### Show purchases made on an iPad only

In [3]:
pred = lambda row: row['event_type'] == 'purchase' and row['device'] == 'tablet' and row['browser'] == 'safari'
ipad_purchases = dataset.filter(pred)

In [4]:
ipad_purchases.collect().head()

Unnamed: 0,id,timestamp,user_id,event_type,device,browser,location,session_duration,metadata
0,SJ4pOPr6MJIjPbVW,2024-04-30 16:38:42.701,8IgVthq7,purchase,tablet,safari,"{'country': 'JP', 'city': 'Tokyo', 'latitude':...",3297,"{'platform_version': '8.7.4', 'user_agent': 'J..."
1,tryk1tLK1L0krcSG,2024-12-19 16:38:42.703,iTl4HFp0,purchase,tablet,safari,"{'country': 'JP', 'city': 'New York', 'latitud...",326,"{'platform_version': '1.8.2', 'user_agent': 'b..."
2,WWwDFPubXDrB8gQ7,2024-10-17 16:38:42.703,thgUyh9g,purchase,tablet,safari,"{'country': 'JP', 'city': 'Paris', 'latitude':...",2179,"{'platform_version': '10.4.4', 'user_agent': '..."
3,gBJ3FgDRMtdCwLRR,2024-07-24 16:38:42.706,DcjXeWME,purchase,tablet,safari,"{'country': 'FR', 'city': 'Sydney', 'latitude'...",361,"{'platform_version': '4.2.4', 'user_agent': 'l..."
4,DnQxtLPtzf9t4aPq,2024-03-08 16:38:42.706,bEdrEKEC,purchase,tablet,safari,"{'country': 'FR', 'city': 'London', 'latitude'...",3019,"{'platform_version': '5.9.6', 'user_agent': 'l..."


### Step 4: Add a subsequent transformations to the lineage
#### Use ipad_purchases as a fork-point in the lineage

In [5]:
duration_minutes = ipad_purchases.transform(lambda df: df[['session_duration']].transform(lambda x: x / 60.0))
duration_hours = ipad_purchases.transform(lambda df: df[['session_duration']].transform(lambda x: x / 3600.0))

In [6]:
duration_minutes.collect().head()

Unnamed: 0,session_duration
0,54.95
1,5.433333
2,36.316667
3,6.016667
4,50.316667


In [7]:
duration_hours.collect().head()

Unnamed: 0,session_duration
0,0.915833
1,0.090556
2,0.605278
3,0.100278
4,0.838611
