In [2]:
:dep smartcore = { version = "0.2.0", features=["nalgebra-bindings", "datasets"]}
:dep nalgebra = "0.23.0"

In [3]:
use nalgebra::{DMatrix, DVector, Scalar};

In [4]:
use std::error::Error;
use std::io::prelude::*;
use std::io::BufReader;
use std::fs::File;
use std::str::FromStr;

In [5]:
fn parse_csv<N, R>(input: R) -> Result<DMatrix<N>, Box<dyn Error>>
  where N: FromStr + Scalar,
        N::Err: Error,
        R: BufRead
{
  // initialize an empty vector to fill with numbers
  let mut data = Vec::new();

  // initialize the number of rows to zero; we'll increment this
  // every time we encounter a newline in the input
  let mut rows = 0;

  // for each line in the input,
  for line in input.lines() {
    // increment the number of rows
    rows += 1;
    // iterate over the items in the row, separated by commas
    for datum in line?.split_terminator(",") {
      // trim the whitespace from the item, parse it, and push it to
      // the data array
      data.push(N::from_str(datum.trim())?);
    }
  }

  // The number of items divided by the number of rows equals the
  // number of columns.
  let cols = data.len() / rows;

  // Construct a `DMatrix` from the data in the vector.
  Ok(DMatrix::from_row_slice(rows, cols, &data[..]))
}

In [6]:
let file = File::open("../data/boston.csv")?;
let bos: DMatrix<f64> = parse_csv(BufReader::new(file))?;
bos.shape()

(506, 14)

In [45]:
println!("{}", bos.rows(0, 5));


  ┌                                                                                                                 ┐
  │ 0.00632      18    2.31       0   0.538   6.575    65.2    4.09       1     296    15.3   396.9    4.98      24 │
  │ 0.02731       0    7.07       0   0.469   6.421    78.9  4.9671       2     242    17.8   396.9    9.14    21.6 │
  │ 0.02729       0    7.07       0   0.469   7.185    61.1  4.9671       2     242    17.8  392.83    4.03    34.7 │
  │ 0.03237       0    2.18       0   0.458   6.998    45.8  6.0622       3     222    18.7  394.63    2.94    33.4 │
  │ 0.06905       0    2.18       0   0.458   7.147    54.2  6.0622       3     222    18.7   396.9    5.33    36.2 │
  └                                                                                                                 ┘




In [46]:
let x = bos.columns(0, 13).into_owned();
let y = bos.column(13).into_owned();
(x.shape(), y.shape())

((506, 13), (506, 1))

In [47]:
println!("{}", x.rows(0, 5));
println!("{}", y.rows(0, 5));


  ┌                                                                                                         ┐
  │ 0.00632      18    2.31       0   0.538   6.575    65.2    4.09       1     296    15.3   396.9    4.98 │
  │ 0.02731       0    7.07       0   0.469   6.421    78.9  4.9671       2     242    17.8   396.9    9.14 │
  │ 0.02729       0    7.07       0   0.469   7.185    61.1  4.9671       2     242    17.8  392.83    4.03 │
  │ 0.03237       0    2.18       0   0.458   6.998    45.8  6.0622       3     222    18.7  394.63    2.94 │
  │ 0.06905       0    2.18       0   0.458   7.147    54.2  6.0622       3     222    18.7   396.9    5.33 │
  └                                                                                                         ┘



  ┌      ┐
  │   24 │
  │ 21.6 │
  │ 34.7 │
  │ 33.4 │
  │ 36.2 │
  └      ┘




In [48]:
use smartcore::model_selection::train_test_split;

In [11]:
let (x_train, x_test, y_train, y_test) = train_test_split(&x, &y.transpose(), 0.2, true);
(x_train.shape(), y_train.shape(), x_test.shape(), y_test.shape())

((405, 13), (1, 405), (101, 13), (1, 101))

In [12]:
let a = x_train.clone().insert_column(13, 1.0).into_owned();
let b = y_train.clone().transpose();
(a.shape(), b.shape())

((405, 14), (405, 1))

In [13]:
println!("{}", a.rows(0, 5));


  ┌                                                                                                                 ┐
  │ 8.15174       0    18.1       0     0.7    5.39    98.9  1.7281      24     666    20.2   396.9   20.85       1 │
  │ 2.36862       0   19.58       0   0.871   4.926    95.7  1.4608       5     403    14.7  391.71   29.53       1 │
  │  0.0351      95    2.68       0  0.4161   7.853    33.2   5.118       4     224    14.7  392.78    3.81       1 │
  │ 2.73397       0   19.58       0   0.871   5.597    94.9  1.5257       5     403    14.7  351.85   21.45       1 │
  │ 41.5292       0    18.1       0   0.693   5.531    85.4  1.6074      24     666    20.2  329.46   27.38       1 │
  └                                                                                                                 ┘


In [14]:
// A.T.dot(A)
let a_t_a = a.transpose() * &a;
// np.linalg.inv(A.T.dot(A))
let a_t_a_inv = a_t_a.try_inverse().unwrap();
// np.linalg.inv(A.T.dot(A)).dot(A.T).dot(b)
let x_hat = a_t_a_inv * &a.transpose() * &b;
let coeff = x_hat.rows(0, 13).into_owned();
let intercept = x_hat[(13, 0)];





In [51]:
println!("coeff: {}, intercept: {}", coeff, intercept);

coeff: 
  ┌                       ┐
  │  -0.06162990533095337 │
  │   0.04300822718705109 │
  │   0.06536511761697435 │
  │    3.1530150444836917 │
  │     -19.5870126994823 │
  │    3.6945169534169398 │
  │  0.006685009127805426 │
  │   -1.3921812097201318 │
  │    0.3110429415073153 │
  │ -0.012486793494871928 │
  │    -0.974980194868809 │
  │  0.010996331673317889 │
  │   -0.5546406878182586 │
  └                       ┘

, intercept: 37.10300123020261


In [52]:
let y_hat_inv = (x_test.clone() * &coeff).add_scalar(intercept);

In [17]:
use smartcore::metrics::mean_absolute_error;

In [18]:
mean_absolute_error(&y_test, &y_hat_inv.transpose())

2.905824437461634

In [37]:
println!("y_hat: {}, y_true: {}", y_hat_inv.transpose().columns(0, 5), y_test.columns(0, 5));

y_hat: 
  ┌                                                                                                ┐
  │  24.81581090423301 35.201621438642846  28.46519721699519 29.814288769031446 20.214666530438254 │
  └                                                                                                ┘

, y_true: 
  ┌                          ┐
  │ 24.7 33.1 23.3 30.1 24.5 │
  └                          ┘




In [38]:
// Q, R = np.linalg.qr(A)
let qr = a.clone().qr();
let (q, r) = (qr.q().transpose().to_owned(), qr.r().to_owned());
// np.linalg.inv(R).dot(Q.T).dot(b)
let r_inv = r.try_inverse().unwrap().to_owned();
let x_hat = r_inv * &q * &b;
let coeff = x_hat.rows(0, 13).into_owned();
let intercept = x_hat[(13, 0)];

In [39]:
println!("coeff: {}, intercept: {}", coeff, intercept);

coeff: 
  ┌                       ┐
  │  -0.06162990533095337 │
  │   0.04300822718705109 │
  │   0.06536511761697435 │
  │    3.1530150444836917 │
  │     -19.5870126994823 │
  │    3.6945169534169398 │
  │  0.006685009127805426 │
  │   -1.3921812097201318 │
  │    0.3110429415073153 │
  │ -0.012486793494871928 │
  │    -0.974980194868809 │
  │  0.010996331673317889 │
  │   -0.5546406878182586 │
  └                       ┘

, intercept: 37.10300123020261


In [40]:
let y_hat_qr = (x_test.clone() * &coeff).add_scalar(intercept);

In [23]:
mean_absolute_error(&y_test, &y_hat_qr.transpose())

2.905824437460306

In [41]:
use smartcore::linear::linear_regression::LinearRegression;

In [35]:
let lr = LinearRegression::fit(&x_train.clone(), &y_train.clone(), Default::default()).unwrap();
let lr_y_hat = lr.predict(&x_test).unwrap();

In [36]:
mean_absolute_error(&y_test, &lr_y_hat)

2.9058244374601854

In [42]:
use smartcore::ensemble::random_forest_regressor::RandomForestRegressor;

In [43]:
let rf_y_hat = RandomForestRegressor::fit(&x_train, &y_train, Default::default()).
                    and_then(|rf| rf.predict(&x_test)).unwrap();

In [44]:
mean_absolute_error(&y_test, &rf_y_hat)

2.1732080472022446