In [2]:
:dep smartcore = { version = "0.1.0", features=["nalgebra-bindings", "ndarray-bindings", "datasets"]}
:dep csv = "1.1.5"
:dep ndarray = "0.13.1"
:dep ndarray-csv = "0.4.1"
:dep nalgebra = "0.22.0"

In [3]:
use nalgebra::{DMatrix, DVector, RowDVector, Scalar};

In [4]:
use std::error::Error;
use std::io::prelude::*;
use std::io::BufReader;
use std::fs::File;
use std::str::FromStr;

In [5]:
fn parse_csv<N, R>(input: R) -> Result<DMatrix<N>, Box<dyn Error>>
  where N: FromStr + Scalar,
        N::Err: Error,
        R: BufRead
{
  // initialize an empty vector to fill with numbers
  let mut data = Vec::new();

  // initialize the number of rows to zero; we'll increment this
  // every time we encounter a newline in the input
  let mut rows = 0;

  // for each line in the input,
  for line in input.lines() {
    // increment the number of rows
    rows += 1;
    // iterate over the items in the row, separated by commas
    for datum in line?.split_terminator(",") {
      // trim the whitespace from the item, parse it, and push it to
      // the data array
      data.push(N::from_str(datum.trim())?);
    }
  }

  // The number of items divided by the number of rows equals the
  // number of columns.
  let cols = data.len() / rows;

  // Construct a `DMatrix` from the data in the vector.
  Ok(DMatrix::from_row_slice(rows, cols, &data[..]))
}

In [6]:
let file = File::open("../data/boston.csv")?;
let bos: DMatrix<f64> = parse_csv(BufReader::new(file))?;
bos.shape()

(506, 14)

In [7]:
println!("{}", bos.rows(0, 5));


  ┌                                                                                                                 ┐
  │ 0.00632      18    2.31       0   0.538   6.575    65.2    4.09       1     296    15.3   396.9    4.98      24 │
  │ 0.02731       0    7.07       0   0.469   6.421    78.9  4.9671       2     242    17.8   396.9    9.14    21.6 │
  │ 0.02729       0    7.07       0   0.469   7.185    61.1  4.9671       2     242    17.8  392.83    4.03    34.7 │
  │ 0.03237       0    2.18       0   0.458   6.998    45.8  6.0622       3     222    18.7  394.63    2.94    33.4 │
  │ 0.06905       0    2.18       0   0.458   7.147    54.2  6.0622       3     222    18.7   396.9    5.33    36.2 │
  └                                                                                                                 ┘




In [8]:
let x = bos.columns(0, 13).into_owned();
let y = bos.column(13).into_owned();
(x.shape(), y.shape())

((506, 13), (506, 1))

In [9]:
println!("{}", x.rows(0, 5));
println!("{}", y.rows(0, 5));


  ┌                                                                                                         ┐
  │ 0.00632      18    2.31       0   0.538   6.575    65.2    4.09       1     296    15.3   396.9    4.98 │
  │ 0.02731       0    7.07       0   0.469   6.421    78.9  4.9671       2     242    17.8   396.9    9.14 │
  │ 0.02729       0    7.07       0   0.469   7.185    61.1  4.9671       2     242    17.8  392.83    4.03 │
  │ 0.03237       0    2.18       0   0.458   6.998    45.8  6.0622       3     222    18.7  394.63    2.94 │
  │ 0.06905       0    2.18       0   0.458   7.147    54.2  6.0622       3     222    18.7   396.9    5.33 │
  └                                                                                                         ┘



  ┌      ┐
  │   24 │
  │ 21.6 │
  │ 34.7 │
  │ 33.4 │
  │ 36.2 │
  └      ┘




In [10]:
use smartcore::model_selection::train_test_split;

In [11]:
let (x_train, x_test, y_train, y_test) = train_test_split(&x, &y.transpose(), 0.2);
(x_train.shape(), y_train.shape(), x_test.shape(), y_test.shape())

((400, 13), (1, 400), (106, 13), (1, 106))

In [13]:
let a = x_train.clone().insert_column(13, 1.0).into_owned();
let b = y_train.clone().transpose();
(a.shape(), b.shape())

((400, 14), (400, 1))

In [14]:
println!("{}", a.rows(0, 5));


  ┌                                                                                                                 ┐
  │ 0.00632      18    2.31       0   0.538   6.575    65.2    4.09       1     296    15.3   396.9    4.98       1 │
  │ 0.02731       0    7.07       0   0.469   6.421    78.9  4.9671       2     242    17.8   396.9    9.14       1 │
  │ 0.02729       0    7.07       0   0.469   7.185    61.1  4.9671       2     242    17.8  392.83    4.03       1 │
  │ 0.03237       0    2.18       0   0.458   6.998    45.8  6.0622       3     222    18.7  394.63    2.94       1 │
  │ 0.02985       0    2.18       0   0.458    6.43    58.7  6.0622       3     222    18.7  394.12    5.21       1 │
  └                                                                                                                 ┘




In [15]:
// A.T.dot(A)
let a_t_a = a.transpose() * &a;
// np.linalg.inv(A.T.dot(A))
let a_t_a_inv = a_t_a.try_inverse().unwrap();
// np.linalg.inv(A.T.dot(A)).dot(A.T).dot(b)
let x_hat = a_t_a_inv * &a.transpose() * &b;
let coeff = x_hat.rows(0, 13).into_owned();
let intercept = x_hat[(13, 0)];

In [18]:
println!("coeff: {}, intercept: {}", coeff, intercept);

coeff: 
  ┌                        ┐
  │   -0.11963654326069977 │
  │    0.03863925768072219 │
  │    0.06701609736776759 │
  │      2.629438790269493 │
  │    -20.967597318475992 │
  │      4.103904424216561 │
  │ -0.0004976572448299054 │
  │    -1.5323807191103553 │
  │    0.35048193952706325 │
  │  -0.014317291144891548 │
  │    -0.9360162870498203 │
  │   0.007758241070873607 │
  │    -0.5686368353651949 │
  └                        ┘

, intercept: 37.433089824034546


In [19]:
let y_hat_inv = (x_test.clone() * &coeff).add_scalar(intercept);

In [20]:
use smartcore::metrics::mean_absolute_error;

In [23]:
mean_absolute_error(&y_test, &y_hat_inv.transpose())

3.1081864959381837

In [24]:
println!("y_hat: {}, y_true: {}", y_hat_inv.transpose().columns(0, 5), y_test.columns(0, 5));

y_hat: 
  ┌                                                                                                ┐
  │  28.39971210805418 18.231029547358542 20.916178691198844 10.768226700423511 13.423367602416278 │
  └                                                                                                ┘

, y_true: 
  ┌                          ┐
  │ 36.2 18.2   21 12.7 13.5 │
  └                          ┘




In [25]:
// Q, R = np.linalg.qr(A)
let qr = a.clone().qr();
let (q, r) = (qr.q().transpose().to_owned(), qr.r().to_owned());
// np.linalg.inv(R).dot(Q.T).dot(b)
let r_inv = r.try_inverse().unwrap().to_owned();
let x_hat = r_inv * &q * &b;
let coeff = x_hat.rows(0, 13).into_owned();
let intercept = x_hat[(13, 0)];

In [26]:
println!("coeff: {}, intercept: {}", coeff, intercept);

coeff: 
  ┌                        ┐
  │   -0.11963654326027438 │
  │    0.03863925768082262 │
  │    0.06701609736726818 │
  │     2.6294387902232588 │
  │    -20.967597318457692 │
  │      4.103904424216056 │
  │ -0.0004976572448448192 │
  │    -1.5323807191105798 │
  │     0.3504819395269898 │
  │  -0.014317291144885353 │
  │    -0.9360162870500541 │
  │   0.007758241070872939 │
  │    -0.5686368353652307 │
  └                        ┘

, intercept: 37.433089824040124


In [27]:
let y_hat_qr = (x_test.clone() * &coeff).add_scalar(intercept);

In [29]:
mean_absolute_error(&y_test, &y_hat_qr.transpose())

3.108186495937896

In [28]:
println!("y_hat: {}, y_true: {}", y_hat_qr.transpose().columns(0, 5), y_test.columns(0, 5));

y_hat: 
  ┌                                                                                                ┐
  │  28.39971210805763  18.23102954736146 20.916178691201022 10.768226700425757 13.423367602418757 │
  └                                                                                                ┘

, y_true: 
  ┌                          ┐
  │ 36.2 18.2   21 12.7 13.5 │
  └                          ┘




In [30]:
use smartcore::linear::linear_regression::LinearRegression;

In [31]:
let lr = LinearRegression::fit(&x_train.clone(), &y_train.clone(), Default::default()).unwrap();
let lr_coeff: DMatrix<f64> = lr.coefficients();
let lr_intercept: f64 = lr.intercept();

In [32]:
println!("coeff: {}, intercept: {}", lr_coeff, lr_intercept);

coeff: 
  ┌                        ┐
  │   -0.11963654326027114 │
  │   0.038639257680820216 │
  │    0.06701609736725139 │
  │     2.6294387902237863 │
  │      -20.9675973184574 │
  │      4.103904424216038 │
  │ -0.0004976572448452832 │
  │     -1.532380719110571 │
  │     0.3504819395269736 │
  │  -0.014317291144884357 │
  │    -0.9360162870500446 │
  │     0.0077582410708724 │
  │    -0.5686368353652304 │
  └                        ┘

, intercept: 37.43308982404


In [34]:
let lr_y_hat = lr.predict(&x_test).unwrap();

In [35]:
mean_absolute_error(&y_test, &lr_y_hat)

3.1081864959378716

In [36]:
println!("y_hat: {}, y_true: {}", lr_y_hat.columns(0, 5), y_test.columns(0, 5));

y_hat: 
  ┌                                                                                                ┐
  │  28.39971210805765 18.231029547361494 20.916178691201043 10.768226700425803 13.423367602418857 │
  └                                                                                                ┘

, y_true: 
  ┌                          ┐
  │ 36.2 18.2   21 12.7 13.5 │
  └                          ┘




In [37]:
use csv::{ReaderBuilder, WriterBuilder};
use ndarray::{Array, Array2, s};
use ndarray_csv::{Array2Reader, Array2Writer};
use std::error::Error;
use std::fs::File;

// Read Boston Housing Data from a CSV file
let file = File::open("../data/boston.csv")?;
let mut reader = ReaderBuilder::new().has_headers(false).from_reader(file);
// and turn data into a NxM matrix
let bos: Array2<f64> = reader.deserialize_array2((506, 14))?;

In [38]:
let x = bos.slice(s![0.., 0..13]).to_owned();
let y = bos.column(13).to_owned();
(x.shape(), y.shape())

([506, 13], [506])

In [41]:
println!("x: {}", x.slice(s![0..5, 0..]));
println!("y: {}", y.slice(s![0..5]));

x: [[0.00632, 18, 2.31, 0, 0.538, 6.575, 65.2, 4.09, 1, 296, 15.3, 396.9, 4.98],
 [0.02731, 0, 7.07, 0, 0.469, 6.421, 78.9, 4.9671, 2, 242, 17.8, 396.9, 9.14],
 [0.02729, 0, 7.07, 0, 0.469, 7.185, 61.1, 4.9671, 2, 242, 17.8, 392.83, 4.03],
 [0.03237, 0, 2.18, 0, 0.458, 6.998, 45.8, 6.0622, 3, 222, 18.7, 394.63, 2.94],
 [0.06905, 0, 2.18, 0, 0.458, 7.147, 54.2, 6.0622, 3, 222, 18.7, 396.9, 5.33]]
y: [24, 21.6, 34.7, 33.4, 36.2]


In [43]:
let (x_train, x_test, y_train, y_test) = train_test_split(&x, &y, 0.2);
(x_train.shape(), y_train.shape(), x_test.shape(), y_test.shape())

([388, 13], [388], [118, 13], [118])

In [44]:
let lr_y_hat = LinearRegression::fit(&x_train, &y_train, Default::default()).
                    and_then(|lr| lr.predict(&x_test)).unwrap();

In [46]:
mean_absolute_error(&y_test, &lr_y_hat)

3.5736947867842512

In [45]:
println!("y_hat: {}\ny_true: {}", lr_y_hat.slice(s![0..5]), y_test.slice(s![0..5]));

y_hat: [29.09793996553453, 19.44323628726571, 18.846586897221364, 21.721327644055297, 16.870683706691565]
y_true: [33.4, 27.1, 15, 18.9, 17.5]


In [48]:
use smartcore::ensemble::random_forest_regressor::RandomForestRegressor;

In [49]:
let rf_y_hat = RandomForestRegressor::fit(&x_train, &y_train, Default::default()).
                    and_then(|rf| rf.predict(&x_test)).unwrap();

In [51]:
mean_absolute_error(&y_test, &rf_y_hat)

2.3541988162496597

In [52]:
println!("y_hat: {}\ny_true: {}", rf_y_hat.slice(s![0..5]), y_test.slice(s![0..5]));

y_hat: [35.85, 20.299999999999994, 19.789999999999996, 22.14, 18.759999999999945]
y_true: [33.4, 27.1, 15, 18.9, 17.5]
