# How to Extract Datadog Log Data Into the IBC

## Set up Required Packages and Settings

In [None]:
import pandas as pd
from pandas.io.json import json_normalize
from pycelonis import get_celonis
import requests
from time import sleep

pd.set_option('max_colwidth', 300)

## Define API Requests Parameters

In [None]:
baseUrlLogs = 'https://api.datadoghq.com/api/v1/logs-queries/list'

headers = {
  'content-type': 'application/json',
  'DD-API-KEY': 'XXXXX',
  'DD-APPLICATION-KEY': 'XXXXX'
}


startDate = "XXXXX"  #ISO-8601 string, unix timestamp or relative time (such as "now-1h" or "now-1d")
endDate = "XXXXX"  #ISO-8601 string, unix timestamp or relative time (such as "now")

  
bodyLogXXXXX = {
  "query": "XXXXX",  #Datadog log explorer query, e.g.:"@errorType:(INTERNAL OR EXTERNAL)"
  "sort": "asc",
  "time": {
          "from": startDate,
          "to": endDate
  },
  "limit": 1,
}

## Define Required Helper Functions

#### Get newest log ID

In [None]:
def get_newestLogId_logXXXXX():
    print('Getting newest log ID')
    newestLogId = ""
    response = requests.post(baseUrlLogs, headers=headers, json=bodyLogXXXXX)
    newestLogId = json_normalize(response.json()['logs'])['id'] 
    newestLogId = newestLogId.to_string(index=False)[1:]
    print('Newest log ID retrieved')
    return newestLogId

## Fetch Data

#### Get most current log ID as starting point for API request

In [None]:
nextLogId_logXXXXX = get_newestLogId_logXXXXX()

#### Get log data and convert it to dataframe

In [None]:
print('Getting log XXXXX')
logXXXXX = pd.DataFrame()
while nextLogId_logXXXXX != None:
    response = requests.post(baseUrlLogs, headers=headers, json={
                                                            "limit": 1000,
                                                            "query": "XXXXX",
                                                            "startAt": nextLogId_logXXXXX,
                                                            "sort": "asc",
                                                            "time": {
                                                                    "from": startDate,
                                                                    "to": endDate
                                                                    },
                                                            })                
    data = json_normalize(response.json()['logs'])[['XXXXX'
                                                    , 'XXXXX'
                                                    , ...
                                                    ]]
    
    #additional functionalities to isolate information from message string, set data types, etc. as required
    
    nextLogId_logXXXXX = response.json()['nextLogId']
    logXXXXX = logXXXXX.append(data, ignore_index=True, sort=False)
    sleep(0.1)
print('Log XXXXX retrieved')

## Push Data to the IBC

#### Connect to IBC team and identify data pool (here: manually)

In [None]:
login = {
    'celonis_url': 'XXXXX',
    'api_token': 'XXXXX',
}
celonis_manual = get_celonis(**login)

data_pool = celonis_manual.pools.find('XXXXX')
data_pool

#### Push dataframes into IBC team/data pool

In [None]:
print('Starting to push data to IBC')
data_pool.push_table(logXXXXX, 'DD_LOG_XXXXX', if_exists = 'upsert', primary_keys = ['id'])
print('Data push successful')