# CI/CD Data Science with Rust

In [2]:
// WARNING: expect ~5 mins to download and compile
// Import external dependencies
:dep polars = {version="0.32.1", features=["describe", "json"]}
:dep serde_json = "1.0.105"
:dep plotters = { version = "^0.3.5", default_features = false, features = ["evcxr", "all_series", "all_elements"] }
// Import local rusty-ds library
:dep rusty-ds = {path = "."}

### EDA Summary with Polars

In [3]:
// Summary
use rusty_ds::{load_file, df_summary};

fn summary(path: &str, headers: bool) {
    let df = rusty_ds::load_file(&path, headers);
    match df {
        Ok(df) => rusty_ds::df_summary(df),
        Err(e) => println!("Error: {}", e),
    }
}

In [4]:
// CSV Test
let CSV_PATH = "data/sample.csv";
let HEADERS = true;
summary(CSV_PATH, HEADERS);

Loading file...
.csv detected...
Dataframe Summary...
shape: (5, 4)
┌────────────┬──────────┬────────┬──────────┐
│ category   ┆ calories ┆ fats_g ┆ sugars_g │
│ ---        ┆ ---      ┆ ---    ┆ ---      │
│ str        ┆ i64      ┆ f64    ┆ i64      │
╞════════════╪══════════╪════════╪══════════╡
│ vegetables ┆ 45       ┆ 0.5    ┆ 2        │
│ seafood    ┆ 150      ┆ 5.0    ┆ 0        │
│ meat       ┆ 100      ┆ 5.0    ┆ 0        │
│ fruit      ┆ 60       ┆ 0.0    ┆ 11       │
│ seafood    ┆ 140      ┆ 5.0    ┆ 1        │
└────────────┴──────────┴────────┴──────────┘
Schema:
name: category, data type: Utf8
name: calories, data type: Int64
name: fats_g, data type: Float64
name: sugars_g, data type: Int64

Ok(shape: (9, 5)
┌────────────┬────────────┬───────────┬──────────┬──────────┐
│ describe   ┆ category   ┆ calories  ┆ fats_g   ┆ sugars_g │
│ ---        ┆ ---        ┆ ---       ┆ ---      ┆ ---      │
│ str        ┆ str        ┆ f64       ┆ f64      ┆ f64      │
╞════════════╪═══════

In [5]:
// JSON Test
let JSON_PATH = "data/sample.json";
let HEADERS = false;
summary(JSON_PATH, HEADERS);

Loading file...
.json detected...
Dataframe Summary...
shape: (5, 4)
┌────────────┬──────────┬────────┬──────────┐
│ category   ┆ calories ┆ fats_g ┆ sugars_g │
│ ---        ┆ ---      ┆ ---    ┆ ---      │
│ str        ┆ i64      ┆ f64    ┆ i64      │
╞════════════╪══════════╪════════╪══════════╡
│ vegetables ┆ 45       ┆ 0.5    ┆ 2        │
│ seafood    ┆ 150      ┆ 5.0    ┆ 0        │
│ meat       ┆ 100      ┆ 5.0    ┆ 0        │
│ fruit      ┆ 60       ┆ 0.0    ┆ 11       │
│ seafood    ┆ 140      ┆ 5.0    ┆ 1        │
└────────────┴──────────┴────────┴──────────┘
Schema:
name: category, data type: Utf8
name: calories, data type: Int64
name: fats_g, data type: Float64
name: sugars_g, data type: Int64

Ok(shape: (9, 5)
┌────────────┬────────────┬───────────┬──────────┬──────────┐
│ describe   ┆ category   ┆ calories  ┆ fats_g   ┆ sugars_g │
│ ---        ┆ ---        ┆ ---       ┆ ---      ┆ ---      │
│ str        ┆ str        ┆ f64       ┆ f64      ┆ f64      │
╞════════════╪══════

### Visualization with Plotters

**[EvCxR Plotters Docs](https://github.com/plotters-rs/plotters#trying-with-jupyter-evcxr-kernel-interactively)**

In [59]:
use polars::prelude::*;
extern crate plotters;
use plotters::prelude::*;
use std::error::Error;
use plotters::evcxr::SVGWrapper;

// create (x,y) data given df cols, x_col, y_col
fn plot_data(df: &DataFrame, x_col: &str, y_col: &str) -> SVGWrapper{
    // get x and y columns --> transform to f64 Vec
    let x = df.column(x_col).unwrap().cast(&DataType::Float64).unwrap();
    let x_vec: Vec<f64> = x.f64().unwrap().into_no_null_iter().collect();
    let y = df.column(y_col).unwrap().cast(&DataType::Float64).unwrap();
    let y_vec: Vec<f64> = y.f64().unwrap().into_no_null_iter().collect();
    // Get min and max values as f64 values
    // let x_min = x_vec.iter().min_by(|x, y| x.partial_cmp(y).unwrap()).unwrap();
    // let x_max = x_vec.iter().max_by(|x, y| x.partial_cmp(y).unwrap()).unwrap();
    // let y_min = y_vec.iter().min_by(|x, y| x.partial_cmp(y).unwrap()).unwrap();
    // let y_max = y_vec.iter().max_by(|x, y| x.partial_cmp(y).unwrap()).unwrap();
    // Create (x,y) pairs
    let data: Vec<(f64, f64)> = x_vec.iter().zip(y_vec).map(|(x, y)| (*x, y)).collect();
    let figure = evcxr_figure((640, 480), |root| {
        root.fill(&WHITE)?;
        let mut ctx = ChartBuilder::on(&root)
            .set_label_area_size(LabelAreaPosition::Left, 40)
            .set_label_area_size(LabelAreaPosition::Bottom, 40)
            .caption("EvCxR Plot Demo", ("sans-serif", 40))
            .build_cartesian_2d(0f64..250f64, 0f64..50f64)
            .unwrap();
    
        ctx.configure_mesh().draw()?;
    
        ctx.draw_series(data.iter().map(|point| Circle::new(*point, 5, &RED)))
            .unwrap();
        Ok(())
    });
    figure
}

// load df
let df = load_file(CSV_PATH, HEADERS).unwrap();
// plot data
let figure = plot_data(&df, "calories", "fats_g");
figure

Loading file...
.csv detected...
