In [None]:
import polars as pl
import math
import numpy as np
df = pl.DataFrame(
    {
        "tower_id": ["T1", "T1", "T2", "T2"],
        "pollution_index": [20, 50, 35, 80],
    }
)
print(df)


In [None]:
def log_transform(value):
    return math.log(value)

df_log = df.select(
    pl.col("pollution_index").map_elements(log_transform, return_dtype=pl.Float64)
)
print("Log-transformed pollution index:")
print(df_log)


In [None]:
 #Using map_batches() â€“ Whole-series UDF
def subtract_mean(series):
    mean_val = sum(series) / len(series)
    return pl.Series([val - mean_val for val in series])

df_centered = df.select(
    pl.col("pollution_index").map_batches(subtract_mean)
)
print("Pollution index centered by mean:")
print(df_centered)

In [None]:
import polars as pl
import numpy as np
from numba import guvectorize, float64

# Numba UDF for z-score normalization
@guvectorize([(float64[:], float64[:])], "(n)->(n)", nopython=True)
def zscore(values, result):
    n = len(values)
    mean = 0
    for i in range(n):
        mean += values[i]
    mean /= n

    std = 0
    for i in range(n):
        std += (values[i] - mean) ** 2
    std = (std / n) ** 0.5

    for i in range(n):
        result[i] = (values[i] - mean) / std if std > 0 else 0.0

In [None]:
df = pl.DataFrame({
    "tower_id": ["T1", "T1", "T2", "T2", "T3", "T3"],
    "signal_dbm": [-85, -80, -95, -90, -70, -75],
})


# Standardize globally
df_std = df.select(
    pl.col("signal_dbm").map_batches(zscore)
)
print("Z-score normalized signal (global):")
print(df_std)

In [None]:
import polars as pl
from numba import guvectorize, int64, float64

# Numba UDF to add call volume and data usage
@guvectorize([(int64[:], int64[:], float64[:])], "(n),(n)->(n)")
def combined_usage(call_volume, data_usage, result):
    for i in range(len(call_volume)):
        result[i] = call_volume[i] + data_usage[i]

# Example telecom cluster data
df = pl.DataFrame({
    "tower_id": ["T1", "T2", "T3"],
    "call_volume": [120, 300, 250],
    "data_usage": [1024, 2048, 1536]  # in MBs
})

# Apply Numba UDF using Polars + Struct + map_batches
result = df.select(
    pl.struct(["call_volume", "data_usage"])
    .map_batches(
        lambda s: combined_usage(
            s.struct.field("call_volume"), s.struct.field("data_usage")
        )
    )
    .alias("total_usage_score")
)

print(result)