# CI/CD Data Science with Rust

In [2]:
// WARNING: expect ~5 mins to download and compile
// Import external dependencies
:dep polars = {version="0.32.1", features=["describe", "json"]}
:dep serde_json = "1.0.105"
:dep plotters = { version = "^0.3.5", default_features = false, features = ["evcxr", "all_series", "all_elements"] }
// Import local rusty-ds library
:dep rusty-ds = {path = "."}

### EDA Summary with Polars

In [3]:
// Summary
use rusty_ds::{load_file, df_summary};

fn summary(path: &str, headers: bool) {
    let df = rusty_ds::load_file(&path, headers);
    match df {
        Ok(df) => rusty_ds::df_summary(df),
        Err(e) => println!("Error: {}", e),
    }
}

In [4]:
// CSV Test
let CSV_PATH = "data/sample.csv";
let HEADERS = true;
summary(CSV_PATH, HEADERS);

Loading file...
.csv detected...
Dataframe Summary...
shape: (5, 4)
┌────────────┬──────────┬────────┬──────────┐
│ category   ┆ calories ┆ fats_g ┆ sugars_g │
│ ---        ┆ ---      ┆ ---    ┆ ---      │
│ str        ┆ i64      ┆ f64    ┆ i64      │
╞════════════╪══════════╪════════╪══════════╡
│ vegetables ┆ 45       ┆ 0.5    ┆ 2        │
│ seafood    ┆ 150      ┆ 5.0    ┆ 0        │
│ meat       ┆ 100      ┆ 5.0    ┆ 0        │
│ fruit      ┆ 60       ┆ 0.0    ┆ 11       │
│ seafood    ┆ 140      ┆ 5.0    ┆ 1        │
└────────────┴──────────┴────────┴──────────┘
Schema:
name: category, data type: Utf8
name: calories, data type: Int64
name: fats_g, data type: Float64
name: sugars_g, data type: Int64

Ok(shape: (9, 5)
┌────────────┬────────────┬───────────┬──────────┬──────────┐
│ describe   ┆ category   ┆ calories  ┆ fats_g   ┆ sugars_g │
│ ---        ┆ ---        ┆ ---       ┆ ---      ┆ ---      │
│ str        ┆ str        ┆ f64       ┆ f64      ┆ f64      │
╞════════════╪═══════

│ std        ┆ null       ┆ 58.563088 ┆ 3.484193 ┆ 5.48658  │
│ min        ┆ fruit      ┆ 20.0      ┆ 0.0      ┆ 0.0      │
│ 25%        ┆ null       ┆ 30.0      ┆ 0.0      ┆ 0.0      │
│ 50%        ┆ null       ┆ 100.0     ┆ 1.5      ┆ 2.0      │
│ 75%        ┆ null       ┆ 130.0     ┆ 6.0      ┆ 3.5      │
│ max        ┆ vegetables ┆ 200.0     ┆ 10.0     ┆ 25.0     │
└────────────┴────────────┴───────────┴──────────┴──────────┘)


In [5]:
// JSON Test
let JSON_PATH = "data/sample.json";
let HEADERS = false;
summary(JSON_PATH, HEADERS);

Loading file...
.json detected...
Dataframe Summary...
shape: (5, 4)
┌────────────┬──────────┬────────┬──────────┐
│ category   ┆ calories ┆ fats_g ┆ sugars_g │
│ ---        ┆ ---      ┆ ---    ┆ ---      │
│ str        ┆ i64      ┆ f64    ┆ i64      │
╞════════════╪══════════╪════════╪══════════╡
│ vegetables ┆ 45       ┆ 0.5    ┆ 2        │
│ seafood    ┆ 150      ┆ 5.0    ┆ 0        │
│ meat       ┆ 100      ┆ 5.0    ┆ 0        │
│ fruit      ┆ 60       ┆ 0.0    ┆ 11       │
│ seafood    ┆ 140      ┆ 5.0    ┆ 1        │
└────────────┴──────────┴────────┴──────────┘
Schema:
name: category, data type: Utf8
name: calories, data type: Int64
name: fats_g, data type: Float64
name: sugars_g, data type: Int64

Ok(shape: (9, 5)
┌────────────┬────────────┬───────────┬──────────┬──────────┐
│ describe   ┆ category   ┆ calories  ┆ fats_g   ┆ sugars_g │
│ ---        ┆ ---        ┆ ---       ┆ ---      ┆ ---      │
│ str        ┆ str        ┆ f64       ┆ f64      ┆ f64      │
╞════════════╪══════

### Visualization with Plotters

**[EvCxR Plotters Docs](https://github.com/plotters-rs/plotters#trying-with-jupyter-evcxr-kernel-interactively)**

In [16]:
use polars::prelude::*;
extern crate plotters;
use plotters::prelude::*;
use std::error::Error;
use plotters::evcxr::SVGWrapper;

fn evcxr_plot(data: Vec<(f64, f64)>, xlims: (f64, f64), ylims: (f64, f64)) -> SVGWrapper{
    let xmin = xlims.0 + 1f64;
    let xmax = xlims.1 + 1f64;
    let ymin = ylims.0 + 1f64;
    let ymax = ylims.1 + 1f64;
    let figure = evcxr_figure((640, 480), |root| {
        root.fill(&WHITE)?;
        let mut ctx = ChartBuilder::on(&root)
            .set_label_area_size(LabelAreaPosition::Left, 40)
            .set_label_area_size(LabelAreaPosition::Bottom, 40)
            .caption("EvCxR Plot Demo", ("sans-serif", 40))
            .build_cartesian_2d(xmin..xmax, ymin..ymax)
            .unwrap();
    
        ctx.configure_mesh().draw()?;
    
        ctx.draw_series(data.iter().map(|point| Circle::new(*point, 5, &RED)))
            .unwrap();
        Ok(())
    });
    figure
}

// load df
let df = rusty_ds::load_file(CSV_PATH, HEADERS).unwrap();
// get data
let data = rusty_ds::zip_data(&df, "calories", "fats_g");
// get x and y limits
let xlims = rusty_ds::get_lims(&df, "calories");
let ylims = rusty_ds::get_lims(&df, "fats_g");
// plot data
let figure = evcxr_plot(data, xlims, ylims);
figure

Loading file...
.csv detected...
