### Casting : 
we use cast(strict = True)

In [9]:
import polars as pl
df = pl.DataFrame(
    {
        "integers" : [1,2,3],
        "big_integers": [10000002, 2, 30000003],
        "floats": [4.0, 5.8, -6.3],
    }
)
print(df)

shape: (3, 3)
┌──────────┬──────────────┬────────┐
│ integers ┆ big_integers ┆ floats │
│ ---      ┆ ---          ┆ ---    │
│ i64      ┆ i64          ┆ f64    │
╞══════════╪══════════════╪════════╡
│ 1        ┆ 10000002     ┆ 4.0    │
│ 2        ┆ 2            ┆ 5.8    │
│ 3        ┆ 30000003     ┆ -6.3   │
└──────────┴──────────────┴────────┘


In [10]:
result =df.select(
    pl.col("integers").cast(pl.Float32).alias("Integers As Floats"),
    pl.col("floats").cast(pl.Int128).alias("Floats as Intgers")   #Floating points will be truncated
)
print(result)

shape: (3, 2)
┌────────────────────┬───────────────────┐
│ Integers As Floats ┆ Floats as Intgers │
│ ---                ┆ ---               │
│ f32                ┆ i128              │
╞════════════════════╪═══════════════════╡
│ 1.0                ┆ 4                 │
│ 2.0                ┆ 5                 │
│ 3.0                ┆ -6                │
└────────────────────┴───────────────────┘


### Downcasting numerical data types
ex : Int64 ---> Int16

In [11]:
from polars.exceptions import InvalidOperationError

print(f"Before downcasting: {df.estimated_size()} bytes")
result = df.with_columns(
    pl.col("integers").cast(pl.Int16),
    pl.col("floats").cast(pl.Float32),
)
print(f"After downcasting: {result.estimated_size()} bytes")

# If the size is overflowing will get error
try:
    result = df.select(pl.col("big_integers").cast(pl.Int8))
    print(result)
except InvalidOperationError as err:
    print(err)

Before downcasting: 72 bytes
After downcasting: 42 bytes
conversion from `i64` to `i8` failed in column 'big_integers' for 2 out of 3 values: [10000002, 30000003]


In [12]:
# for that we set strict to flase : overflowing and underflowirng will become to null
result = df.select(
    pl.col("big_integers").cast(pl.Int8,strict=False)
)
print(result)

shape: (3, 1)
┌──────────────┐
│ big_integers │
│ ---          │
│ i8           │
╞══════════════╡
│ null         │
│ 2            │
│ null         │
└──────────────┘


### Strings to numeric types

if string contains non numeric polars will throw conversion error

In [21]:
import polars as pl
df = pl.DataFrame(
    {
        "rollnos" : ["12","23","90"],
        "percent" : [89.32,12.34,67.76],
    }
)
strings = df.select(
    pl.col("rollnos").cast(pl.Int128),
    pl.col("percent").cast(pl.String)
)

print(strings)

shape: (3, 2)
┌─────────┬─────────┐
│ rollnos ┆ percent │
│ ---     ┆ ---     │
│ i128    ┆ str     │
╞═════════╪═════════╡
│ 12      ┆ 89.32   │
│ 23      ┆ 12.34   │
│ 90      ┆ 67.76   │
└─────────┴─────────┘


### Parsing / formatting temporal data types
values of the data type Date are stored as the number of days since the epoch. For the data type Datetime the time unit is the microsecond (us) and for Time the time unit is the nanosecond (ns).

dt._to_string() and str.to-datetime() funcitons are used

In [31]:
from datetime import date, datetime, time

df = pl.DataFrame(
    {
        "date": [
            date(1970, 1, 1),  # epoch
            date(1970, 1, 10),  # 9 days later
        ],
        "datetime": [
            datetime(1970, 1, 1, 0, 0, 0),  # epoch
            datetime(1970, 1, 1, 0, 1, 0),  # 1 minute later
        ],
        "time": [
            time(0, 0, 0),  # reference time
            time(0, 0, 1),  # 1 second later
        ],
    }
)

result = df.select(
    pl.col("date").cast(pl.Int64).alias("days_since_epoch"),
    pl.col("datetime").cast(pl.Int64).alias("us_since_epoch"),
    pl.col("time").cast(pl.Int64).alias("ns_since_midnight"),
)
print(result)

shape: (2, 3)
┌──────────────────┬────────────────┬───────────────────┐
│ days_since_epoch ┆ us_since_epoch ┆ ns_since_midnight │
│ ---              ┆ ---            ┆ ---               │
│ i64              ┆ i64            ┆ i64               │
╞══════════════════╪════════════════╪═══════════════════╡
│ 0                ┆ 0              ┆ 0                 │
│ 9                ┆ 60000000       ┆ 1000000000        │
└──────────────────┴────────────────┴───────────────────┘


In [37]:
df = pl.DataFrame(
    {
        "date": [date(2022, 1, 1), date(2022, 1, 2)],
        "string": ["2022-01-01", "2022-01-02"],
    }
)

result = df.select(
    pl.col("date").dt.to_string("%Y-%m-%d"),
    pl.col("string").str.to_datetime("%Y-%m-%d"),
)
print(result)

shape: (2, 2)
┌────────────┬─────────────────────┐
│ date       ┆ string              │
│ ---        ┆ ---                 │
│ str        ┆ datetime[μs]        │
╞════════════╪═════════════════════╡
│ 2022-01-01 ┆ 2022-01-01 00:00:00 │
│ 2022-01-02 ┆ 2022-01-02 00:00:00 │
└────────────┴─────────────────────┘
