# Log Format Conversion

Format OpenTelemetry logs into a ClickHouse-compatible format.

This example uses the following python library to run\
"glassflow>=2.0.5" \
"Faker==30.3.0" \
"pandas" 

In [None]:
%pip install "glassflow>=2.0.8" pandas Faker

In [None]:
import glassflow

In [None]:
# Fill Clickhouse credentials
clickhouse_config = {
    "username": "< clickhouse-username >",
    "password": "< clickhouse-password >",
    "addr": "< clickhouse host address >",
    "database": "< clickhouse database >",
    "table": "< clickhouse table >",
}

# Please edit this variable with your own personal access token from https://app.glassflow.dev/profile
personal_access_token = "< your-personal-access-token >"

## Create Pipeline

In [None]:
client = glassflow.GlassFlowClient(
    personal_access_token=personal_access_token
)

Get the space named "examples" (or create one if no space is found)

In [None]:
list_spaces = client.list_spaces()

space_name = "examples"
for s in list_spaces.spaces:
    if s["name"] == space_name:
        space = glassflow.Space(
            personal_access_token=client.personal_access_token,
            id=s["id"], 
            name=s["name"]
        )
        break
else:
    space = client.create_space(name=space_name)

print(f"Space \"{space.name}\" with ID: {space.id}")

Display transformation function

In [None]:
%pycat transform.py

Create a pipeline with Clickhouse as a sink

In [None]:
with open("requirements.txt") as f:
    requirementsTxt = f.read()

pipeline = client.create_pipeline(
    name="log-format-conversion",
    space_id=space.id,
    sink_kind="clickhouse",
    sink_config=clickhouse_config,
    transformation_file="transform.py",
    requirements=requirementsTxt
)
print("Pipeline ID:", pipeline.id)

## Produce data and send it to your pipeline

Create an OpenTelemetry fake data generator:

In [None]:
from faker import Faker


def opentelemetry_datagen():
    fake = Faker()
    severity = fake.random_element(
        elements=["INFO", "DEBUG", "WARN", "ERROR"]
    )
    cloud_region = fake.random_element(
        elements=[
            'us-east-2', 'us-east-1', 'us-west-1', 'us-west-2', 'af-south-1', 'ap-east-1', 	
            'ap-south-2', 'ap-southeast-3', 'ap-southeast-5', 'ap-southeast-4', 'ap-south-1', 	
            'ap-northeast-3', 'ap-northeast-2', 'ap-southeast-1', 'ap-southeast-2', 'ap-northeast-1', 	
            'ca-central-1', 'ca-west-1', 'eu-central-1', 'eu-west-1', 'eu-west-2', 'eu-south-1', 	
            'eu-west-3', 'eu-south-2', 'eu-north-1', 'eu-central-2', 'il-central-1', 'me-south-1', 	
            'me-central-1', 'sa-east-1'
        ]
    )
    return {
        'Timestamp': fake.iso8601(),
        'SeverityText': severity,
        'Name': fake.word() + "_" + fake.word(),
        'Body': fake.sentence(nb_words=5),
        'Resource': {
            'service.name': fake.word() + "-service",
            'cloud.region': cloud_region
        },
        'Attributes': {
            'http.method': fake.http_method(),
            'http.status_code': fake.http_status_code(),
            'user.id': fake.random_int(min=1, max=99999),
            'user.ip': fake.ipv4(),
            'auth.method': 'oauth',
            'auth.success': fake.pybool()
        },
        'TraceId': fake.uuid4(),
        'SpanId': fake.uuid4(),
        'TraceFlags': '01'
    }

Create a glassflow source client and publish the generated data. 

The `n_events` variable controls the number of events that will be sent to the pipeline.

In [None]:
data_source = pipeline.get_source()

# Generate some data and send it to the pipeline. Store it locally to compare
n_events = 10
input_events = []
for i in range(n_events):
    event = opentelemetry_datagen()
    input_events.append(event)
    data_source.publish(event)

In [None]:
import pandas as pd

display(pd.DataFrame(input_events))

## Check your Clickhouse table

Check your ClickHouse table and confirm that the events have reached your database in the correct format.