# Polars
## Install polars

In [2]:
:dep polars = { version = "0.20.0", features = ["ndarray", "random"]}

In [3]:
#[macro_use]
extern crate polars;
use polars::prelude::*;

## 演算

In [4]:
let s: Series = [1, 2, 3].iter().collect();
s

shape: (3,)
Series: '' [i32]
[
	1
	2
	3
]

In [5]:
Series::new("A", &[1, 2, 3])

shape: (3,)
Series: 'A' [i32]
[
	1
	2
	3
]

In [6]:
// series and numbers
println!("add:\n{:?}", &s + 1);
println!("sub:\n{:?}", &s - 1);
println!("mul:\n{:?}", &s * 2);
println!("div:\n{:?}", &s / 2);

add:
shape: (3,)
Series: '' [i32]
[
	2
	3
	4
]
sub:
shape: (3,)
Series: '' [i32]
[
	0
	1
	2
]
mul:
shape: (3,)
Series: '' [i32]
[
	2
	4
	6
]
div:
shape: (3,)
Series: '' [i32]
[
	0
	1
	1
]


In [7]:
// series and series
println!("add:\n{:?}", &s + &s);
println!("sub:\n{:?}", &s - &s);
println!("mul:\n{:?}", &s * &s);
println!("div:\n{:?}", &s / &s);
println!("eq:\n{:?}", &s.eq(&s));

add:
shape: (3,)
Series: '' [i32]
[
	2
	4
	6
]
sub:
shape: (3,)
Series: '' [i32]
[
	0
	0
	0
]
mul:
shape: (3,)
Series: '' [i32]
[
	1
	4
	9
]
div:
shape: (3,)
Series: '' [i32]
[
	1
	1
	1
]
eq:
true


In [8]:
// comparison series and number
println!("eq:\n{:?}", &s.equal(1));
println!("ne:\n{:?}", &s.not_equal(1));
println!("gt:\n{:?}", &s.gt(1));
println!("gteq:\n{:?}", &s.gt_eq(1));
println!("lt:\n{:?}", &s.lt(1));
println!("lteq:\n{:?}", &s.lt_eq(1));

eq:
shape: (3,)
ChunkedArray: '' [bool]
[
	true
	false
	false
]
ne:
shape: (3,)
ChunkedArray: '' [bool]
[
	false
	true
	true
]
gt:
shape: (3,)
ChunkedArray: '' [bool]
[
	false
	true
	true
]
gteq:
shape: (3,)
ChunkedArray: '' [bool]
[
	true
	true
	true
]
lt:
shape: (3,)
ChunkedArray: '' [bool]
[
	false
	false
	false
]
lteq:
shape: (3,)
ChunkedArray: '' [bool]
[
	true
	false
	false
]


In [9]:
// comparison series and series
let t: Series = [1, 3, 2].iter().collect();

println!("eq:\n{:?}", &s.equal(&t));
println!("ne:\n{:?}", &s.not_equal(&t));
println!("gt:\n{:?}", &s.gt(&t));
println!("gteq:\n{:?}", &s.gt_eq(&t));
println!("lt:\n{:?}", &s.lt(&t));
println!("lteq:\n{:?}", &s.lt_eq(&t));

eq:
shape: (3,)
ChunkedArray: '' [bool]
[
	true
	false
	false
]
ne:
shape: (3,)
ChunkedArray: '' [bool]
[
	false
	true
	true
]
gt:
shape: (3,)
ChunkedArray: '' [bool]
[
	false
	false
	true
]
gteq:
shape: (3,)
ChunkedArray: '' [bool]
[
	true
	false
	true
]
lt:
shape: (3,)
ChunkedArray: '' [bool]
[
	false
	true
	false
]
lteq:
shape: (3,)
ChunkedArray: '' [bool]
[
	true
	true
	false
]


## DataFrameの作成 

In [10]:
let mut df = df!("A" => &["a", "b", "a"],
             "B" => &[1, 3, 5],
             "C" => &[10, 11, 12],
             "D" => &[2, 4, 6]
    ).unwrap();
df

shape: (3, 4)
┌─────┬─────┬─────┬─────┐
│ A   ┆ B   ┆ C   ┆ D   │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i32 ┆ i32 ┆ i32 │
╞═════╪═════╪═════╪═════╡
│ a   ┆ 1   ┆ 10  ┆ 2   │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ b   ┆ 3   ┆ 11  ┆ 4   │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ a   ┆ 5   ┆ 12  ┆ 6   │
└─────┴─────┴─────┴─────┘

## 列の選択

In [14]:
df.select(&["A"])

Ok(shape: (3, 1)
┌─────┐
│ A   │
│ --- │
│ str │
╞═════╡
│ a   │
├╌╌╌╌╌┤
│ b   │
├╌╌╌╌╌┤
│ a   │
└─────┘)

In [15]:
df.select(&["A", "B"]) 

Ok(shape: (3, 2)
┌─────┬─────┐
│ A   ┆ B   │
│ --- ┆ --- │
│ str ┆ i32 │
╞═════╪═════╡
│ a   ┆ 1   │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ b   ┆ 3   │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ a   ┆ 5   │
└─────┴─────┘)

In [17]:
df.column("A")

Ok(shape: (3,)
Series: 'A' [str]
[
	"a"
	"b"
	"a"
])

## 条件に応じた列選択

In [18]:
df.select(&df.get_column_names()
            .iter()
            .filter(|x| x.starts_with("A"))
            .map(|&x| x)
            .collect::<Vec<&str>>()
        )

Ok(shape: (3, 1)
┌─────┐
│ A   │
│ --- │
│ str │
╞═════╡
│ a   │
├╌╌╌╌╌┤
│ b   │
├╌╌╌╌╌┤
│ a   │
└─────┘)

## 列の入れ替え

In [19]:
df.select(&["B", "A"])

Ok(shape: (3, 2)
┌─────┬─────┐
│ B   ┆ A   │
│ --- ┆ --- │
│ i32 ┆ str │
╞═════╪═════╡
│ 1   ┆ a   │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 3   ┆ b   │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 5   ┆ a   │
└─────┴─────┘)

## 列の追加

In [20]:
let col_e = df.column("B").unwrap()
                .i32().unwrap()
                .apply(|x| x * 2)
                .into_series()
                .rename("E")
                .to_owned();
df.with_column(col_e);
df.with_column(Series::new("F", &[2, 6, 10]));
df.with_column(df.select(&["B"]).unwrap()
                 .rename("B", "G").unwrap()
                 .apply("G", |x| x * 2).unwrap()
                 .column("G").unwrap()
                 .to_owned());

df.with_column(df.column("B").unwrap().to_owned().rename("H").to_owned() * 2)

Ok(shape: (3, 8)
┌─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┐
│ A   ┆ B   ┆ C   ┆ D   ┆ E   ┆ F   ┆ G   ┆ H   │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i32 ┆ i32 ┆ i32 ┆ i32 ┆ i32 ┆ i32 ┆ i32 │
╞═════╪═════╪═════╪═════╪═════╪═════╪═════╪═════╡
│ a   ┆ 1   ┆ 10  ┆ 2   ┆ 2   ┆ 2   ┆ 2   ┆ 2   │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ b   ┆ 3   ┆ 11  ┆ 4   ┆ 6   ┆ 6   ┆ 6   ┆ 6   │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ a   ┆ 5   ┆ 12  ┆ 6   ┆ 10  ┆ 10  ┆ 10  ┆ 10  │
└─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┘)

In [21]:
let new_df = df.replace_or_add("I", Series::new("I", &[2, 6, 10])).unwrap().clone();
new_df

shape: (3, 9)
┌─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┐
│ A   ┆ B   ┆ C   ┆ D   ┆ ... ┆ F   ┆ G   ┆ H   ┆ I   │
│ --- ┆ --- ┆ --- ┆ --- ┆     ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i32 ┆ i32 ┆ i32 ┆     ┆ i32 ┆ i32 ┆ i32 ┆ i32 │
╞═════╪═════╪═════╪═════╪═════╪═════╪═════╪═════╪═════╡
│ a   ┆ 1   ┆ 10  ┆ 2   ┆ ... ┆ 2   ┆ 2   ┆ 2   ┆ 2   │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ b   ┆ 3   ┆ 11  ┆ 4   ┆ ... ┆ 6   ┆ 6   ┆ 6   ┆ 6   │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ a   ┆ 5   ┆ 12  ┆ 6   ┆ ... ┆ 10  ┆ 10  ┆ 10  ┆ 10  │
└─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┘

## 条件による行選択

### 単独条件

In [22]:
df.filter(&df.column("B").unwrap().lt_eq(4))

Ok(shape: (2, 9)
┌─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┐
│ A   ┆ B   ┆ C   ┆ D   ┆ ... ┆ F   ┆ G   ┆ H   ┆ I   │
│ --- ┆ --- ┆ --- ┆ --- ┆     ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i32 ┆ i32 ┆ i32 ┆     ┆ i32 ┆ i32 ┆ i32 ┆ i32 │
╞═════╪═════╪═════╪═════╪═════╪═════╪═════╪═════╪═════╡
│ a   ┆ 1   ┆ 10  ┆ 2   ┆ ... ┆ 2   ┆ 2   ┆ 2   ┆ 2   │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ b   ┆ 3   ┆ 11  ┆ 4   ┆ ... ┆ 6   ┆ 6   ┆ 6   ┆ 6   │
└─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┘)

### 複数条件
ChunkedArrayはbit演算ができる

In [23]:
df.filter(&(
    df.column("B").unwrap().equal(1) | df.column("C").unwrap().not_equal(12)
))

Ok(shape: (2, 9)
┌─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┐
│ A   ┆ B   ┆ C   ┆ D   ┆ ... ┆ F   ┆ G   ┆ H   ┆ I   │
│ --- ┆ --- ┆ --- ┆ --- ┆     ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i32 ┆ i32 ┆ i32 ┆     ┆ i32 ┆ i32 ┆ i32 ┆ i32 │
╞═════╪═════╪═════╪═════╪═════╪═════╪═════╪═════╪═════╡
│ a   ┆ 1   ┆ 10  ┆ 2   ┆ ... ┆ 2   ┆ 2   ┆ 2   ┆ 2   │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ b   ┆ 3   ┆ 11  ┆ 4   ┆ ... ┆ 6   ┆ 6   ┆ 6   ┆ 6   │
└─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┘)

### 含まれているかなどの演算

In [24]:
let v: Vec<i32> = vec![1, 3];
let mask: ChunkedArray<BooleanType> = df.column("B").unwrap().i32()
            .unwrap().into_iter().map(|x| v.contains(&x.unwrap())).collect();

df.filter(&mask)

Ok(shape: (2, 9)
┌─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┐
│ A   ┆ B   ┆ C   ┆ D   ┆ ... ┆ F   ┆ G   ┆ H   ┆ I   │
│ --- ┆ --- ┆ --- ┆ --- ┆     ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i32 ┆ i32 ┆ i32 ┆     ┆ i32 ┆ i32 ┆ i32 ┆ i32 │
╞═════╪═════╪═════╪═════╪═════╪═════╪═════╪═════╪═════╡
│ a   ┆ 1   ┆ 10  ┆ 2   ┆ ... ┆ 2   ┆ 2   ┆ 2   ┆ 2   │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ b   ┆ 3   ┆ 11  ┆ 4   ┆ ... ┆ 6   ┆ 6   ┆ 6   ┆ 6   │
└─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┘)

## 重複行

In [25]:
let mut dup_df = df!("A" => &[1, 2, 2], "B" => &[1, 2, 2]).unwrap();
dup_df

shape: (3, 2)
┌─────┬─────┐
│ A   ┆ B   │
│ --- ┆ --- │
│ i32 ┆ i32 │
╞═════╪═════╡
│ 1   ┆ 1   │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 2   ┆ 2   │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 2   ┆ 2   │
└─────┴─────┘

### 重複行の抽出

In [26]:
dup_df.filter(&dup_df.is_duplicated().unwrap())

Ok(shape: (2, 2)
┌─────┬─────┐
│ A   ┆ B   │
│ --- ┆ --- │
│ i32 ┆ i32 │
╞═════╪═════╡
│ 2   ┆ 2   │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 2   ┆ 2   │
└─────┴─────┘)

### 重複行の削除

In [27]:
dup_df.drop_duplicates(true, None)

Ok(shape: (2, 2)
┌─────┬─────┐
│ A   ┆ B   │
│ --- ┆ --- │
│ i32 ┆ i32 │
╞═════╪═════╡
│ 1   ┆ 1   │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 2   ┆ 2   │
└─────┴─────┘)

## ndarrayへの変換
型が必要。

In [28]:
let arr = df!("A" => &[1, 2, 3], "B" => &[2, 3, 4]).unwrap();
arr.to_ndarray::<Float64Type>()

Ok([[1.0, 2.0],
 [2.0, 3.0],
 [3.0, 4.0]], shape=[3, 2], strides=[1, 3], layout=Ff (0xa), const ndim=2)

In [29]:
df.get(1)

Some([Utf8("b"), Int32(3), Int32(11), Int32(4), Int32(6), Int32(6), Int32(6), Int32(6), Int32(6)])

## Group By

In [30]:
// docs example

let dates = &[
"2020-08-21",
"2020-08-21",
"2020-08-22",
"2020-08-23",
"2020-08-22",
];
// date format
let fmt = "%Y-%m-%d";
// create date series
let s0 = DateChunked::parse_from_str_slice("date", dates, fmt)
        .into_series();
// create temperature series
let s1 = Series::new("temp", [20, 10, 7, 9, 1].as_ref());
// create rain series
let s2 = Series::new("rain", [0.2, 0.1, 0.3, 0.1, 0.01].as_ref());
// create a new DataFrame
let df = DataFrame::new(vec![s0, s1, s2]).unwrap();
println!("{:?}", df);

shape: (5, 3)
┌────────────┬──────┬──────┐
│ date       ┆ temp ┆ rain │
│ ---        ┆ ---  ┆ ---  │
│ date       ┆ i32  ┆ f64  │
╞════════════╪══════╪══════╡
│ 2020-08-21 ┆ 20   ┆ 0.2  │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2020-08-21 ┆ 10   ┆ 0.1  │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2020-08-22 ┆ 7    ┆ 0.3  │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2020-08-23 ┆ 9    ┆ 0.1  │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2020-08-22 ┆ 1    ┆ 0.01 │
└────────────┴──────┴──────┘


### Build inの演算

- count
- first
- last
- sum
- min
- max
- mean
- median
- var
- std
- count
- quantile
- n_unique

ができる。使い方は

1. 特定の列でGroupby
2. 演算したい列を指定 (指定なしなら全部)
3. 演算

In [33]:
df.groupby(&["date"])

Ok(GroupBy { df: shape: (5, 3)
┌────────────┬──────┬──────┐
│ date       ┆ temp ┆ rain │
│ ---        ┆ ---  ┆ ---  │
│ date       ┆ i32  ┆ f64  │
╞════════════╪══════╪══════╡
│ 2020-08-21 ┆ 20   ┆ 0.2  │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2020-08-21 ┆ 10   ┆ 0.1  │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2020-08-22 ┆ 7    ┆ 0.3  │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2020-08-23 ┆ 9    ┆ 0.1  │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2020-08-22 ┆ 1    ┆ 0.01 │
└────────────┴──────┴──────┘, selected_keys: [shape: (5,)
Series: 'date' [date]
[
	2020-08-21
	2020-08-21
	2020-08-22
	2020-08-23
	2020-08-22
]], groups: Idx(GroupsIdx { sorted: false, first: [3, 2, 0], all: [[3], [2, 4], [0, 1]] }), selected_agg: None })

In [34]:
df.groupby(&["date"]).unwrap().var()

Ok(shape: (3, 3)
┌────────────┬──────────────┬──────────────┐
│ date       ┆ temp_agg_var ┆ rain_agg_var │
│ ---        ┆ ---          ┆ ---          │
│ date       ┆ f64          ┆ f64          │
╞════════════╪══════════════╪══════════════╡
│ 2020-08-23 ┆ NaN          ┆ NaN          │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2020-08-22 ┆ 18.0         ┆ 0.04205      │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2020-08-21 ┆ 50.0         ┆ 0.005        │
└────────────┴──────────────┴──────────────┘)

In [36]:
df.groupby(&["date"]).unwrap().select(&["temp"]).sum()

Ok(shape: (3, 2)
┌────────────┬──────────┐
│ date       ┆ temp_sum │
│ ---        ┆ ---      │
│ date       ┆ i32      │
╞════════════╪══════════╡
│ 2020-08-21 ┆ 30       │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ 2020-08-23 ┆ 9        │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ 2020-08-22 ┆ 8        │
└────────────┴──────────┘)

In [37]:
// 複数の演算をまとめてやりたいとき
df.groupby(&["date"]).unwrap()
    .agg(&[("temp", &["sum", "min"]), ("rain", &["count", "first"])])

Ok(shape: (3, 5)
┌────────────┬──────────┬──────────┬────────────┬────────────┐
│ date       ┆ temp_sum ┆ temp_min ┆ rain_count ┆ rain_first │
│ ---        ┆ ---      ┆ ---      ┆ ---        ┆ ---        │
│ date       ┆ i32      ┆ i32      ┆ u32        ┆ f64        │
╞════════════╪══════════╪══════════╪════════════╪════════════╡
│ 2020-08-21 ┆ 30       ┆ 10       ┆ 2          ┆ 0.2        │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2020-08-22 ┆ 8        ┆ 1        ┆ 2          ┆ 0.3        │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2020-08-23 ┆ 9        ┆ 9        ┆ 1          ┆ 0.1        │
└────────────┴──────────┴──────────┴────────────┴────────────┘)

### 任意の演算

Group毎の任意の演算しかわからなかった

In [38]:
df.groupby(&["date"]).unwrap()
    .apply(|x| { println!("{:?}", x); Ok(x)});

shape: (1, 3)
┌────────────┬──────┬──────┐
│ date       ┆ temp ┆ rain │
│ ---        ┆ ---  ┆ ---  │
│ date       ┆ i32  ┆ f64  │
╞════════════╪══════╪══════╡
│ 2020-08-23 ┆ 9    ┆ 0.1  │
└────────────┴──────┴──────┘
shape: (2, 3)
┌────────────┬──────┬──────┐
│ date       ┆ temp ┆ rain │
│ ---        ┆ ---  ┆ ---  │
│ date       ┆ i32  ┆ f64  │
╞════════════╪══════╪══════╡
│ 2020-08-21 ┆ 20   ┆ 0.2  │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2020-08-21 ┆ 10   ┆ 0.1  │
└────────────┴──────┴──────┘
shape: (2, 3)
┌────────────┬──────┬──────┐
│ date       ┆ temp ┆ rain │
│ ---        ┆ ---  ┆ ---  │
│ date       ┆ i32  ┆ f64  │
╞════════════╪══════╪══════╡
│ 2020-08-22 ┆ 7    ┆ 0.3  │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2020-08-22 ┆ 1    ┆ 0.01 │
└────────────┴──────┴──────┘


## vstack, hstack

In [39]:
let df1 = df!(
    "A" => &[1, 2, 3],
    "B" => &[2, 3, 4]
).unwrap();

let df1_t = df!(
    "A" => &[4, 5, 6],
    "B" => &[5, 6, 7]
).unwrap();


let df2 = df!(
    "C" => &["a", "b", "c"],
    "D" => &[0.1, 0.2, 0.3]
).unwrap();

let s1 = Series::new("S1", [10, 11, 12]);
let s2 = Series::new("S2", ["ABC", "NMK", "XYZ"]);


In [40]:
df1.hstack(&[s1, s2])

Ok(shape: (3, 4)
┌─────┬─────┬─────┬─────┐
│ A   ┆ B   ┆ S1  ┆ S2  │
│ --- ┆ --- ┆ --- ┆ --- │
│ i32 ┆ i32 ┆ i32 ┆ str │
╞═════╪═════╪═════╪═════╡
│ 1   ┆ 2   ┆ 10  ┆ ABC │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ 2   ┆ 3   ┆ 11  ┆ NMK │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ 3   ┆ 4   ┆ 12  ┆ XYZ │
└─────┴─────┴─────┴─────┘)

In [41]:
let s_vec: Vec<Series> = df2.iter().map(|s| s.clone()).collect();
df1.hstack(&s_vec)

Ok(shape: (3, 4)
┌─────┬─────┬─────┬─────┐
│ A   ┆ B   ┆ C   ┆ D   │
│ --- ┆ --- ┆ --- ┆ --- │
│ i32 ┆ i32 ┆ str ┆ f64 │
╞═════╪═════╪═════╪═════╡
│ 1   ┆ 2   ┆ a   ┆ 0.1 │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ 2   ┆ 3   ┆ b   ┆ 0.2 │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ 3   ┆ 4   ┆ c   ┆ 0.3 │
└─────┴─────┴─────┴─────┘)

In [42]:
df1.vstack(&df2)

Err(SchemaMisMatch("cannot vstack: because column datatypes (dtypes) in the two DataFrames do not match for left.name='A' with left.dtype=i32 != right.dtype=str with right.name='C'"))

In [43]:
df1.vstack(&df1_t)

Ok(shape: (6, 2)
┌─────┬─────┐
│ A   ┆ B   │
│ --- ┆ --- │
│ i32 ┆ i32 │
╞═════╪═════╡
│ 1   ┆ 2   │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 2   ┆ 3   │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 3   ┆ 4   │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 4   ┆ 5   │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 5   ┆ 6   │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 6   ┆ 7   │
└─────┴─────┘)

# Join

In [44]:
let df1: DataFrame = df!("Fruit" => &["Apple", "Banana", "Pear"],
                         "Origin" => &["America", "Hawai", "Italy"],
                         "Phosphorus (mg/100g)" => &[11, 22, 12]).unwrap();
let df2: DataFrame = df!("Name" => &["Apple", "Banana", "Pear"],
                         "Origin" => &["France", "Hawai", "Italy"],
                         "Potassium (mg/100g)" => &[107, 358, 115]).unwrap();

In [None]:
// df1.inner_join(&df2, "Fruit", "Name")
df1.join(&df2, "Fruit", "Name", JoinType::Inner, None)

In [71]:
// df1.outer_join(&df2, &["Fruit", "Origin"], &["Name", "Origin"])
df1.join(&df2, &["Fruit", "Origin"], &["Name", "Origin"], JoinType::Outer, None)

Ok(shape: (4, 4)
+----------------------+--------+---------+---------------------+
| Phosphorus (mg/100g) | Fruit  | Origin  | Potassium (mg/100g) |
| ---                  | ---    | ---     | ---                 |
| i32                  | str    | str     | i32                 |
| null                 | Apple  | France  | 107                 |
+----------------------+--------+---------+---------------------+
| 22                   | Banana | Hawai   | 358                 |
+----------------------+--------+---------+---------------------+
| 12                   | Pear   | Italy   | 115                 |
+----------------------+--------+---------+---------------------+
| 11                   | Apple  | America | null                |
+----------------------+--------+---------+---------------------+
)

In [72]:
df1.left_join(&df2, "Origin", "Origin")

Ok(shape: (3, 5)
+--------+---------+----------------------+--------+---------------------+
| Fruit  | Origin  | Phosphorus (mg/100g) | Name   | Potassium (mg/100g) |
| ---    | ---     | ---                  | ---    | ---                 |
| str    | str     | i32                  | str    | i32                 |
| Apple  | America | 11                   | null   | null                |
+--------+---------+----------------------+--------+---------------------+
| Banana | Hawai   | 22                   | Banana | 358                 |
+--------+---------+----------------------+--------+---------------------+
| Pear   | Italy   | 12                   | Pear   | 115                 |
+--------+---------+----------------------+--------+---------------------+
)