In [2]:
:dep serde = { version = "1.0", features = ["derive"] }
:dep rand = "0.8"
:dep polars = { version = "0.46.0", features = ["lazy", "describe"] }
:dep plotters = { version = "0.3.6", default-features = false, features = ["evcxr", "all_series", "all_elements"] }

In [28]:
let empty = DataFrame::default();

let mut fruits = df!(
    "fruit" => [
        "apple", "banana", "orange",
        "apple", "banana", "orange",
        "apple", "banana", "orange",
        "apple", "banana", "orange",
        "apple", "banana", "orange",               
               ] ,
    "year"  => [
        2018, 2019, 2020, 2021, 2022,
        2018, 2019, 2020, 2021, 2022,
        2018, 2019, 2020, 2021, 2022,
               ],
    "price" => [1.5, 1.7, 1.2, 1.0, 1.6, 
                0.8, 0.7, 0.9, 0.6, 0.8, 
                2.0, 2.3, 1.9, 2.5, 2.3]
)?;

println!("{:?}", fruits[0]);

println!("{:?}", fruits["year"]);

Series(SeriesColumn { inner: shape: (15,)
Series: 'fruit' [str]
[
	"apple"
	"banana"
	"orange"
	"apple"
	"banana"
	…
	"banana"
	"orange"
	"apple"
	"banana"
	"orange"
], materialized_at: None })
Series(SeriesColumn { inner: shape: (15,)
Series: 'year' [i32]
[
	2018
	2019
	2020
	2021
	2022
	…
	2018
	2019
	2020
	2021
	2022
], materialized_at: None })


In [10]:
let mut file = File::create("fruits.csv").expect("could not create file");

CsvWriter::new(&mut file)
    .include_header(true)
    .with_separator(b',')
    .finish(&mut fruits)?;

let df_csv = CsvReadOptions::default()
    .with_has_header(true)
    .with_parse_options(CsvParseOptions::default().with_try_parse_dates(true))
    .try_into_reader_with_file_path(Some("fruits.csv".into()))?
    .finish()?;

df_csv.shape()


(15, 3)

In [22]:

println!("Head: {}", df_csv.head(Some(2)));
println!("Tail: {}", df_csv.tail(Some(2)));

let mut sl: DataFrame = df_csv.slice(2,3);
println!("Slice:2:3 {}", sl);

sl.apply_at_idx(2, |s| s + 100);
println!("Apply at index +100 to price: {}", sl);

Head: shape: (2, 3)
┌────────┬──────┬───────┐
│ fruit  ┆ year ┆ price │
│ ---    ┆ ---  ┆ ---   │
│ str    ┆ i64  ┆ f64   │
╞════════╪══════╪═══════╡
│ apple  ┆ 2018 ┆ 1.5   │
│ banana ┆ 2019 ┆ 1.7   │
└────────┴──────┴───────┘
Tail: shape: (2, 3)
┌────────┬──────┬───────┐
│ fruit  ┆ year ┆ price │
│ ---    ┆ ---  ┆ ---   │
│ str    ┆ i64  ┆ f64   │
╞════════╪══════╪═══════╡
│ banana ┆ 2021 ┆ 2.5   │
│ orange ┆ 2022 ┆ 2.3   │
└────────┴──────┴───────┘
Slice:2:3 shape: (3, 3)
┌────────┬──────┬───────┐
│ fruit  ┆ year ┆ price │
│ ---    ┆ ---  ┆ ---   │
│ str    ┆ i64  ┆ f64   │
╞════════╪══════╪═══════╡
│ orange ┆ 2020 ┆ 1.2   │
│ apple  ┆ 2021 ┆ 1.0   │
│ banana ┆ 2022 ┆ 1.6   │
└────────┴──────┴───────┘
Apply at index +100 to price: shape: (3, 3)
┌────────┬──────┬───────┐
│ fruit  ┆ year ┆ price │
│ ---    ┆ ---  ┆ ---   │
│ str    ┆ i64  ┆ f64   │
╞════════╪══════╪═══════╡
│ orange ┆ 2020 ┆ 101.2 │
│ apple  ┆ 2021 ┆ 101.0 │
│ banana ┆ 2022 ┆ 101.6 │
└────────┴──────┴───────┘


In [6]:
let summary = df_csv
    .clone()
    .lazy()
    .select([
        col("price").count().alias("count"),
        col("price").null_count().alias("null_count"),
        col("price").mean().alias("mean"),
        col("price").std(1).alias("std"),
        col("price").min().alias("min"),
        col("price").quantile(lit(0.25), QuantileInterpolOptions::Linear).alias("25%"),
        col("price").quantile(lit(0.50), QuantileInterpolOptions::Linear).alias("50%"),
        col("price").quantile(lit(0.75), QuantileInterpolOptions::Linear).alias("75%"),
        col("price").max().alias("max"),
    ])
    .collect()
    .unwrap();

summary

shape: (1, 9)
┌───────┬────────────┬──────────┬──────────┬───┬──────┬─────┬──────┬─────┐
│ count ┆ null_count ┆ mean     ┆ std      ┆ … ┆ 25%  ┆ 50% ┆ 75%  ┆ max │
│ ---   ┆ ---        ┆ ---      ┆ ---      ┆   ┆ ---  ┆ --- ┆ ---  ┆ --- │
│ u32   ┆ u32        ┆ f64      ┆ f64      ┆   ┆ f64  ┆ f64 ┆ f64  ┆ f64 │
╞═══════╪════════════╪══════════╪══════════╪═══╪══════╪═════╪══════╪═════╡
│ 15    ┆ 0          ┆ 1.453333 ┆ 0.645718 ┆ … ┆ 0.85 ┆ 1.5 ┆ 1.95 ┆ 2.5 │
└───────┴────────────┴──────────┴──────────┴───┴──────┴─────┴──────┴─────┘

In [7]:
let fruits_means = fruits
    .clone()
    .lazy()
    .group_by([col("fruit")])
    .agg([col("price").mean().alias("avg_price")])
    .sort(["fruit"], Default::default())
    .collect()?;

let fruits_names: Vec<String> = fruits_means
    .column("fruit")?
    .str()?
    .into_no_null_iter()
    .map(|s| s.to_string())
    .collect();

let mean_prices: Vec<f64> = fruits_means
    .column("avg_price")?
    .f64()?
    .into_no_null_iter()
    .collect();

let chart = evcxr_figure((640, 480), move |root| {
    root.fill(&WHITE)?;
    
    let max_price = mean_prices.iter().cloned().fold(0.0, f64::max);
    let y_max = ((max_price * 1.1) * 4.0).ceil() / 4.0;
    let n = fruits_names.len();
    
    let mut ctx = ChartBuilder::on(&root)
        .x_label_area_size(50)
        .y_label_area_size(60)
        .margin(20)
        .caption("Average Fruit Prices", ("sans-serif", 24))
        .build_cartesian_2d(-0.5f64..(n as f64 - 0.5), 0.0f64..y_max)?;
    
    ctx.configure_mesh()
        .y_desc("Price ($)")
        .x_desc("fruit")
        .disable_x_mesh()
        .x_labels(n)
        .x_label_formatter(&|x| {
            let i = (x + 0.5) as usize;
            fruits_names.get(i).cloned().unwrap_or_default()
        })
        .draw()?;
    
    // Draw bars with gaps
    let bar_width = 0.6;
    ctx.draw_series(
        mean_prices.iter().enumerate().map(|(i, v)| {
            let center = i as f64;
            let x0 = center - bar_width / 2.0;
            let x1 = center + bar_width / 2.0;
            Rectangle::new(
                [(x0, 0.0), (x1, *v)],
                RGBColor(68, 114, 196).filled()
            )
        })
    )?;
    
    Ok(())
});

chart

In [8]:
fruits_means

shape: (3, 2)
┌────────┬───────────┐
│ fruit  ┆ avg_price │
│ ---    ┆ ---       │
│ str    ┆ f64       │
╞════════╪═══════════╡
│ apple  ┆ 1.18      │
│ banana ┆ 1.74      │
│ orange ┆ 1.44      │
└────────┴───────────┘