In [6]:
# Series
import polars as pl 
s = pl.Series("ints", [1,2,3,45,5])
print(s)
print(type(s))

shape: (5,)
Series: 'ints' [i64]
[
	1
	2
	3
	45
	5
]
<class 'polars.series.series.Series'>


In [15]:
# dataframe
import polars as pl
import datetime as dt

df = pl.DataFrame(
    {
        "name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
        "birthdate": [
            dt.date(1997, 1, 10),
            dt.date(1985, 2, 15),
            dt.date(1983, 3, 22),
            dt.date(1981, 4, 30),
        ],
        "weight": [57.9, 72.5, 53.6, 83.1],  # (kg)
        "height": [1.56, 1.77, 1.65, 1.75],  # (m)
    }
)

print(df)
# Head : shows the first rows of df(first 5)
print(df.head(2))
# glimpse : also shows first few rows, but formats differently. Each line = single column
print(df.glimpse(return_as_string = True))
# tail : last 5 rows
print(df.tail(1))

shape: (4, 4)
┌────────────────┬────────────┬────────┬────────┐
│ name           ┆ birthdate  ┆ weight ┆ height │
│ ---            ┆ ---        ┆ ---    ┆ ---    │
│ str            ┆ date       ┆ f64    ┆ f64    │
╞════════════════╪════════════╪════════╪════════╡
│ Alice Archer   ┆ 1997-01-10 ┆ 57.9   ┆ 1.56   │
│ Ben Brown      ┆ 1985-02-15 ┆ 72.5   ┆ 1.77   │
│ Chloe Cooper   ┆ 1983-03-22 ┆ 53.6   ┆ 1.65   │
│ Daniel Donovan ┆ 1981-04-30 ┆ 83.1   ┆ 1.75   │
└────────────────┴────────────┴────────┴────────┘
shape: (2, 4)
┌──────────────┬────────────┬────────┬────────┐
│ name         ┆ birthdate  ┆ weight ┆ height │
│ ---          ┆ ---        ┆ ---    ┆ ---    │
│ str          ┆ date       ┆ f64    ┆ f64    │
╞══════════════╪════════════╪════════╪════════╡
│ Alice Archer ┆ 1997-01-10 ┆ 57.9   ┆ 1.56   │
│ Ben Brown    ┆ 1985-02-15 ┆ 72.5   ┆ 1.77   │
└──────────────┴────────────┴────────┴────────┘
Rows: 4
Columns: 4
$ name       <str> 'Alice Archer', 'Ben Brown', 'Chloe Cooper', 'Dani

In [16]:
# sample
# randomly selected undorderd rows are return 
import random 
random.seed(40)
print(df.sample(2))


shape: (2, 4)
┌────────────────┬────────────┬────────┬────────┐
│ name           ┆ birthdate  ┆ weight ┆ height │
│ ---            ┆ ---        ┆ ---    ┆ ---    │
│ str            ┆ date       ┆ f64    ┆ f64    │
╞════════════════╪════════════╪════════╪════════╡
│ Daniel Donovan ┆ 1981-04-30 ┆ 83.1   ┆ 1.75   │
│ Chloe Cooper   ┆ 1983-03-22 ┆ 53.6   ┆ 1.65   │
└────────────────┴────────────┴────────┴────────┘


In [17]:
# describe : to compute summary statistics for all columns of youro dataframe.
print(df.describe())

shape: (9, 5)
┌────────────┬────────────────┬─────────────────────┬───────────┬──────────┐
│ statistic  ┆ name           ┆ birthdate           ┆ weight    ┆ height   │
│ ---        ┆ ---            ┆ ---                 ┆ ---       ┆ ---      │
│ str        ┆ str            ┆ str                 ┆ f64       ┆ f64      │
╞════════════╪════════════════╪═════════════════════╪═══════════╪══════════╡
│ count      ┆ 4              ┆ 4                   ┆ 4.0       ┆ 4.0      │
│ null_count ┆ 0              ┆ 0                   ┆ 0.0       ┆ 0.0      │
│ mean       ┆ null           ┆ 1986-09-04 00:00:00 ┆ 66.775    ┆ 1.6825   │
│ std        ┆ null           ┆ null                ┆ 13.560082 ┆ 0.097082 │
│ min        ┆ Alice Archer   ┆ 1981-04-30          ┆ 53.6      ┆ 1.56     │
│ 25%        ┆ null           ┆ 1983-03-22          ┆ 57.9      ┆ 1.65     │
│ 50%        ┆ null           ┆ 1985-02-15          ┆ 72.5      ┆ 1.75     │
│ 75%        ┆ null           ┆ 1985-02-15          ┆ 72.5    

In [19]:
# Schema : It is a mapping of column or series names to the data types of those same columns or series.
print(df.schema)
# Much like with series, Polars will infer the schema of a dataframe when you create it
# but you can override the inference system if needed.



Schema({'name': String, 'birthdate': Date, 'weight': Float64, 'height': Float64})


In [28]:
# , you can specify an explicit schema by using a dictionary to map column names to data types.
df = pl.DataFrame(
    {
        "names" : ["aj", "sj", "pp","sw"],
        "age" : [23,20,14,10]
    } ,
    schema = {"names": None,"age":pl.UInt8},
)
print(df)
print(df.schema)

# For specific columns if we want ot ovveride then better to use schema_overrides instead of schema

shape: (4, 2)
┌───────┬─────┐
│ names ┆ age │
│ ---   ┆ --- │
│ str   ┆ u8  │
╞═══════╪═════╡
│ aj    ┆ 23  │
│ sj    ┆ 20  │
│ pp    ┆ 14  │
│ sw    ┆ 10  │
└───────┴─────┘
Schema({'names': String, 'age': UInt8})
