In [29]:
println!("EvCxR Jupyter Notebook");

EvCxR Jupyter Notebook


In [30]:
// Import dependencies -- WARNING: expect ~5 mins to download and compile
:dep polars = {version="0.32.1", features=["describe", "json"]}
:dep serde_json = "1.0.105"

In [31]:
// Define Utility Functions
use polars::prelude::*;
use std::fs;
use std::io::Cursor;

//read in a csv file
pub fn read_csv(path: &str, headers: bool) -> Result<DataFrame, PolarsError> {
    let df = CsvReader::from_path(path).unwrap().has_header(headers).finish().unwrap();
    Ok(df)
}

//read in a json file
pub fn read_json(path: &str) -> Result<DataFrame, PolarsError> {
    // Read json file to string
    let json_str= fs::read_to_string(path).expect("Unable to read JSON");
    let df = JsonReader::new(Cursor::new(json_str)).finish().unwrap();
    Ok(df)
}

//summarise dataframe
pub fn df_summary(df: DataFrame) {
    println!("Dataframe Summary...");
    println!("{:?}", df.head(Some(5)));
    println!("{:?}", df.schema());
    println!("{:?}", df.describe(None));
}

fn run(filepath: &str, format: &str) {
    let df = match format {
        "csv_with_headers" => {
            println!("Loading CSV to dataframe...");
            read_csv(&filepath, true)
        },
        "csv_no_headers" => {
            println!("Loading CSV to dataframe...");
            read_csv(&filepath, false)
        },
        "json" => {
            println!("Loading JSON to dataframe...");
            read_json(&filepath)
        }
        _ => {
            println!("No command specified");
            return;
        }
    };
    match df {
        Ok(df) => df_summary(df),
        Err(e) => println!("Error: {:?}", e)
    }
}

In [32]:
// CSV Test
let CSV_PATH = "data/sample.csv";
let CSV_FORMAT = "csv_with_headers";
run(CSV_PATH, CSV_FORMAT);

Loading CSV to dataframe...
Dataframe Summary...
shape: (5, 4)
┌────────────┬──────────┬────────┬──────────┐
│ category   ┆ calories ┆ fats_g ┆ sugars_g │
│ ---        ┆ ---      ┆ ---    ┆ ---      │
│ str        ┆ i64      ┆ f64    ┆ i64      │
╞════════════╪══════════╪════════╪══════════╡
│ vegetables ┆ 45       ┆ 0.5    ┆ 2        │
│ seafood    ┆ 150      ┆ 5.0    ┆ 0        │
│ meat       ┆ 100      ┆ 5.0    ┆ 0        │
│ fruit      ┆ 60       ┆ 0.0    ┆ 11       │
│ seafood    ┆ 140      ┆ 5.0    ┆ 1        │
└────────────┴──────────┴────────┴──────────┘
Schema:
name: category, data type: Utf8
name: calories, data type: Int64
name: fats_g, data type: Float64
name: sugars_g, data type: Int64

Ok(shape: (9, 5)
┌────────────┬────────────┬───────────┬──────────┬──────────┐
│ describe   ┆ category   ┆ calories  ┆ fats_g   ┆ sugars_g │
│ ---        ┆ ---        ┆ ---       ┆ ---      ┆ ---      │
│ str        ┆ str        ┆ f64       ┆ f64      ┆ f64      │
╞════════════╪════════════

In [33]:
// JSON Test
let JSON_PATH = "data/sample.json";
let JSON_FORMAT = "json";
run(JSON_PATH, JSON_FORMAT);

Loading JSON to dataframe...
Dataframe Summary...
shape: (5, 4)
┌────────────┬──────────┬────────┬──────────┐
│ category   ┆ calories ┆ fats_g ┆ sugars_g │
│ ---        ┆ ---      ┆ ---    ┆ ---      │
│ str        ┆ i64      ┆ f64    ┆ i64      │
╞════════════╪══════════╪════════╪══════════╡
│ vegetables ┆ 45       ┆ 0.5    ┆ 2        │
│ seafood    ┆ 150      ┆ 5.0    ┆ 0        │
│ meat       ┆ 100      ┆ 5.0    ┆ 0        │
│ fruit      ┆ 60       ┆ 0.0    ┆ 11       │
│ seafood    ┆ 140      ┆ 5.0    ┆ 1        │
└────────────┴──────────┴────────┴──────────┘
Schema:
name: category, data type: Utf8
name: calories, data type: Int64
name: fats_g, data type: Float64
name: sugars_g, data type: Int64

Ok(shape: (9, 5)
┌────────────┬────────────┬───────────┬──────────┬──────────┐
│ describe   ┆ category   ┆ calories  ┆ fats_g   ┆ sugars_g │
│ ---        ┆ ---        ┆ ---       ┆ ---      ┆ ---      │
│ str        ┆ str        ┆ f64       ┆ f64      ┆ f64      │
╞════════════╪═══════════