# CI/CD Data Science with Rust

In [2]:
// WARNING: expect ~5 mins to download and compile
// Import external dependencies
:dep polars = {version="0.32.1", features=["describe", "json"]}
:dep serde_json = "1.0.105"
:dep plotters = { version = "^0.3.5", default_features = false, features = ["evcxr", "all_series", "all_elements"] }
// Import local rusty-ds library
:dep rusty-ds = {path = "."}

### EDA Summary with Polars

In [3]:
// Summary
use rusty_ds::{load_file, df_summary};

fn summary(path: &str, headers: bool) {
    let df = rusty_ds::load_file(&path, headers);
    match df {
        Ok(df) => rusty_ds::df_summary(df),
        Err(e) => println!("Error: {}", e),
    }
}

In [4]:
// CSV Test
let CSV_PATH = "data/sample.csv";
let HEADERS = true;
summary(CSV_PATH, HEADERS);

Loading file...
.csv detected...
Dataframe Summary...
shape: (5, 4)
┌────────────┬──────────┬────────┬──────────┐
│ category   ┆ calories ┆ fats_g ┆ sugars_g │
│ ---        ┆ ---      ┆ ---    ┆ ---      │
│ str        ┆ i64      ┆ f64    ┆ i64      │
╞════════════╪══════════╪════════╪══════════╡
│ vegetables ┆ 45       ┆ 0.5    ┆ 2        │
│ seafood    ┆ 150      ┆ 5.0    ┆ 0        │
│ meat       ┆ 100      ┆ 5.0    ┆ 0        │
│ fruit      ┆ 60       ┆ 0.0    ┆ 11       │
│ seafood    ┆ 140      ┆ 5.0    ┆ 1        │
└────────────┴──────────┴────────┴──────────┘
Schema:
name: category, data type: Utf8
name: calories, data type: Int64
name: fats_g, data type: Float64
name: sugars_g, data type: Int64

Ok(shape: (9, 5)
┌────────────┬────────────┬───────────┬──────────┬──────────┐
│ describe   ┆ category   ┆ calories  ┆ fats_g   ┆ sugars_g │
│ ---        ┆ ---        ┆ ---       ┆ ---      ┆ ---      │
│ str        ┆ str        ┆ f64       ┆ f64      ┆ f64      │
╞════════════╪═══════

│ count      ┆ 27         ┆ 27.0      ┆ 27.0     ┆ 27.0     │
│ null_count ┆ 0          ┆ 0.0       ┆ 0.0      ┆ 0.0      │
│ mean       ┆ null       ┆ 88.592593 ┆ 3.203704 ┆ 3.444444 │
│ std        ┆ null       ┆ 58.563088 ┆ 3.484193 ┆ 5.48658  │
│ min        ┆ fruit      ┆ 20.0      ┆ 0.0      ┆ 0.0      │
│ 25%        ┆ null       ┆ 30.0      ┆ 0.0      ┆ 0.0      │
│ 50%        ┆ null       ┆ 100.0     ┆ 1.5      ┆ 2.0      │
│ 75%        ┆ null       ┆ 130.0     ┆ 6.0      ┆ 3.5      │
│ max        ┆ vegetables ┆ 200.0     ┆ 10.0     ┆ 25.0     │
└────────────┴────────────┴───────────┴──────────┴──────────┘)


In [5]:
// JSON Test
let JSON_PATH = "data/sample.json";
let HEADERS = false;
summary(JSON_PATH, HEADERS);

Loading file...
.json detected...
Dataframe Summary...
shape: (5, 4)
┌────────────┬──────────┬────────┬──────────┐
│ category   ┆ calories ┆ fats_g ┆ sugars_g │
│ ---        ┆ ---      ┆ ---    ┆ ---      │
│ str        ┆ i64      ┆ f64    ┆ i64      │
╞════════════╪══════════╪════════╪══════════╡
│ vegetables ┆ 45       ┆ 0.5    ┆ 2        │
│ seafood    ┆ 150      ┆ 5.0    ┆ 0        │
│ meat       ┆ 100      ┆ 5.0    ┆ 0        │
│ fruit      ┆ 60       ┆ 0.0    ┆ 11       │
│ seafood    ┆ 140      ┆ 5.0    ┆ 1        │
└────────────┴──────────┴────────┴──────────┘
Schema:
name: category, data type: Utf8
name: calories, data type: Int64
name: fats_g, data type: Float64
name: sugars_g, data type: Int64

Ok(shape: (9, 5)
┌────────────┬────────────┬───────────┬──────────┬──────────┐
│ describe   ┆ category   ┆ calories  ┆ fats_g   ┆ sugars_g │
│ ---        ┆ ---        ┆ ---       ┆ ---      ┆ ---      │
│ str        ┆ str        ┆ f64       ┆ f64      ┆ f64      │
╞════════════╪══════

### Visualization with Plotters

**[EvCxR Plotters Docs](https://github.com/plotters-rs/plotters#trying-with-jupyter-evcxr-kernel-interactively)**

In [9]:
use polars::prelude::*;
extern crate plotters;
use plotters::prelude::*;
use std::error::Error;

// load df
let df = load_file(CSV_PATH, HEADERS).unwrap();

// get (x,y) plotting data given df cols, x_col, y_col
fn get_data(df: &DataFrame, x_col: &str, y_col: &str) -> Vec<(f64, f64)> {
    // get x and y columns --> transform to f64 Vec
    let x = df.column(x_col).unwrap().cast(&DataType::Float64).unwrap();
    let x_vec: Vec<f64> = x.f64().unwrap().into_no_null_iter().collect();
    let y = df.column(y_col).unwrap().cast(&DataType::Float64).unwrap();
    // Create (x,y) pairs
    let data: Vec<(f64, f64)> = x_vec.iter().zip(y.f64().unwrap().into_no_null_iter()).map(|(x, y)| (*x, y)).collect();
    data
}

let data = get_data(&df, "calories", "fats_g");

Loading file...
.csv detected...


In [None]:
extern crate plotters;
use plotters::prelude::*;

let figure = evcxr_figure((640, 480), |root| {
    root.fill(&WHITE)?;
    let mut chart = ChartBuilder::on(&root)
        .caption("y=x^2", ("Arial", 50).into_font())
        .margin(5)
        .x_label_area_size(30)
        .y_label_area_size(30)
        .build_cartesian_2d(-1f32..1f32, -0.1f32..1f32)?;

    chart.configure_mesh().draw()?;

    chart.draw_series(LineSeries::new(
        (-50..=50).map(|x| x as f32 / 50.0).map(|x| (x, x * x)),
        &RED,
    )).unwrap()
        .label("y = x^2")
        .legend(|(x,y)| PathElement::new(vec![(x,y), (x + 20,y)], &RED));

    chart.configure_series_labels()
        .background_style(&WHITE.mix(0.8))
        .border_style(&BLACK)
        .draw()?;
    Ok(())
});
figure