In [1]:
from dotenv import load_dotenv
import os
import sys
from pycelonis import get_celonis

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
sys.path.append(os.path.abspath('..'))

## 1. Connect To Celonis

The first step is to connect to your celonis instance. Replace the credentials below with your own. If you are having trouble connecting to Celonis see: https://celonis.github.io/pycelonis/2.12.0/tutorials/executed/01_quickstart/02_celonis_basics/

In [3]:
#Load credentials
load_dotenv()

KEY = os.getenv('KEY')
BASE_URL = os.getenv('BASE_URL')
KEY_TYPE = os.getenv('KEY_TYPE')


In [4]:
celonis = get_celonis(base_url=BASE_URL, api_token=KEY, key_type=KEY_TYPE)

## 2. Access the Relevant Data Model

Next use the authenticated celonis object to retrieve the data model that contains the data that will be used to build the model

In [5]:
data_pools = celonis.data_integration.get_data_pools()
data_pools

[
	DataPool(id='9844f304-d81f-42ba-8839-6949ee45bd21', name='control-tower-kpi-logs-f3d5c328-bd7f-4244-b4cd-525fd82bd5e5'),
	DataPool(id='93d7b41b-1fa7-4f43-90fb-270c185dedd9', name='Get Data into Celonis - P2P'),
	DataPool(id='b4a070bb-bae9-4139-a41e-b596312dab4c', name='Get Data into Celonis - P2P (1)'),
	DataPool(id='b8a705e4-8140-4ed1-9137-95ed14885542', name='Caines Datapool'),
	DataPool(id='a226e76d-3d6d-46ca-834c-56e9b82a6255', name='OCPM Data Pool'),
	DataPool(id='93fa4117-5d41-4c68-a548-ee6d25f66830', name='Task Mining Data Pool - Caines Task Mining'),
	DataPool(id='12906c9d-0bff-4778-88ae-2f7d49d2c67b', name='AP Data Pool'),
	DataPool(id='dc5abaf7-281d-4acd-8f83-98a141253198', name='PyCelonis')
]

In [6]:
p2p_datapool = celonis.data_integration.get_data_pool(id_='b4a070bb-bae9-4139-a41e-b596312dab4c')
p2p_datapool

DataPool(id='b4a070bb-bae9-4139-a41e-b596312dab4c', name='Get Data into Celonis - P2P (1)')

In [7]:
p2p_datapool.get_data_models()

[
	DataModel(id='3b0d16bf-d3ca-4b78-89ef-509716b0c1df', name='P2P Data Model', pool_id='b4a070bb-bae9-4139-a41e-b596312dab4c')
]

In [8]:
p2p_datamodel = p2p_datapool.get_data_models()[0]
p2p_datamodel

DataModel(id='3b0d16bf-d3ca-4b78-89ef-509716b0c1df', name='P2P Data Model', pool_id='b4a070bb-bae9-4139-a41e-b596312dab4c')

## 3. Import CelonisML and initialise your machine learning job

You are now ready to import the `CelonisML` class to build your ML job. You also need to import the `PQLColumn` class from pycelonis. This class is used to add data from your data model


In [9]:
from CelonisML.core import CelonisML
from pycelonis.pql import PQLColumn

In [24]:
ml_job = CelonisML(p2p_datamodel, None)

## 4. Start building your model

add predictors variable, a target variable (if supervised learning), set the ML model, extract your data

In [25]:
#Add one column as a predictor
ml_job.add_predictor(PQLColumn(name='Netpr', query='"EKPO"."Netpr"'))

In [26]:
print(ml_job.data_extractor.predictors)

[PQLColumn(name='Netpr', query='"EKPO"."Netpr"')]


In [27]:
#Add multiple columns as predictor
ml_job.add_predictor(PQLColumn(name='Menge', query='"EKPO"."Menge"'), PQLColumn(name='Netwr', query='"EKPO"."Netwr"'))

In [28]:
print(ml_job.data_extractor.predictors)

[PQLColumn(name='Netpr', query='"EKPO"."Netpr"'), PQLColumn(name='Menge', query='"EKPO"."Menge"'), PQLColumn(name='Netwr', query='"EKPO"."Netwr"')]


In [30]:
# This loads the data from the celonis environment
ml_job.load_data()

In [33]:
ml_job.data

Unnamed: 0_level_0,Netpr,Menge,Netwr
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,3,334,10187
1,10,40,400
2,45,300,13500
3,0,1095,5803
4,0,1339,7096
...,...,...,...
27932,286,500,143150
27933,286,500,143150
27934,50,100,50
27935,50,100,50


In [34]:
ml_job.add_target(PQLColumn(name='NetWrr', query='"EKPO"."NetWr"'))

In [35]:
from sklearn.linear_model import LinearRegression

In [36]:
ml_job.add_model(LinearRegression())

In [None]:
ml_job.train_model()