# Polars
## Install polars

In [5]:
:dep polars = { version = "0.14.7", features = ["ndarray", "random"]}

In [6]:
#[macro_use]
extern crate polars;
use polars::prelude::*;

## 演算

In [7]:
let s: Series = [1, 2, 3].iter().collect();
s

shape: (3,)
Series: '' [i32]
[
	1
	2
	3
]

In [8]:
// series and numbers
println!("add:\n{:?}", &s + 1);
println!("sub:\n{:?}", &s - 1);
println!("mul:\n{:?}", &s * 2);
println!("div:\n{:?}", &s / 2);

add:
shape: (3,)
Series: '' [i32]
[
	2
	3
	4
]
sub:
shape: (3,)
Series: '' [i32]
[
	0
	1
	2
]
mul:


In [9]:
// series and series
println!("add:\n{:?}", &s + &s);
println!("sub:\n{:?}", &s - &s);
println!("mul:\n{:?}", &s * &s);
println!("div:\n{:?}", &s / &s);
println!("eq:\n{:?}", &s.eq(&s));

shape: (3,)
Series: '' [i32]
[
	2
	4
	6
]
div:
shape: (3,)
Series: '' [i32]
[
	0
	1
	1
]
add:
shape: (3,)
Series: '' [i32]
[
	2
	4
	6
]
sub:
shape: (3,)
Series: '' [i32]
[
	0
	0
	0
]
mul:
shape: (3,)
Series: '' [i32]
[
	1
	4
	9
]


In [10]:
// comparison series and number
println!("eq:\n{:?}", &s.eq(1));
println!("ne:\n{:?}", &s.neq(1));
println!("gt:\n{:?}", &s.gt(1));
println!("gteq:\n{:?}", &s.gt_eq(1));
println!("lt:\n{:?}", &s.lt(1));
println!("lteq:\n{:?}", &s.lt_eq(1));

div:
shape: (3,)
Series: '' [i32]
[
	1
	1
	1
]
eq:
shape: (3,)
ChunkedArray: '' [bool]
[
	true
	true
	true
]
eq:
shape: (3,)
ChunkedArray: '' [bool]
[
	true
	false
	false
]
ne:
shape: (3,)
ChunkedArray: '' [bool]
[
	false
	true
	true
]
gt:
shape: (3,)
ChunkedArray: '' [bool]
[
	false


In [11]:
// comparison series and series
let t: Series = [1, 3, 2].iter().collect();

println!("eq:\n{:?}", &s.eq(&t));
println!("ne:\n{:?}", &s.neq(&t));
println!("gt:\n{:?}", &s.gt(&t));
println!("gteq:\n{:?}", &s.gt_eq(&t));
println!("lt:\n{:?}", &s.lt(&t));
println!("lteq:\n{:?}", &s.lt_eq(&t));

	true
	true
]
gteq:
shape: (3,)
ChunkedArray: '' [bool]
[
	true
	true
	true
]
lt:
shape: (3,)
ChunkedArray: '' [bool]
[
	false
	false
	false
]
lteq:
shape: (3,)
ChunkedArray: '' [bool]
[
	true
	false
	false
]
eq:
shape: (3,)
ChunkedArray: '' [bool]
[
	true
	false
	false
]
ne:
shape: (3,)
ChunkedArray: '' [bool]
[
	false
	true
	true
]
gt:
shape: (3,)
ChunkedArray: '' [bool]
[
	false
	false
	true
]
gteq:
shape: (3,)
ChunkedArray: '' [bool]
[
	true
	false
	true
]
lt:
shape: (3,)
ChunkedArray: '' [bool]
[
	false
	true
	false
]
lteq:
shape: (3,)
ChunkedArray: '' [bool]
[
	true
	true
	false
]


## DataFrameの作成 

In [12]:
let mut df = df!("A" => &["a", "b", "a"],
             "B" => &[1, 3, 5],
             "C" => &[10, 11, 12],
             "D" => &[2, 4, 6]
    ).unwrap();
df

shape: (3, 4)
+-----+-----+-----+-----+
| A   | B   | C   | D   |
| --- | --- | --- | --- |
| str | i32 | i32 | i32 |
+=====+=====+=====+=====+
| "a" | 1   | 10  | 2   |
+-----+-----+-----+-----+
| "b" | 3   | 11  | 4   |
+-----+-----+-----+-----+
| "a" | 5   | 12  | 6   |
+-----+-----+-----+-----+


## 列の選択

In [13]:
df.select("A")

Ok(shape: (3, 1)
+-----+
| A   |
| --- |
| str |
+=====+
| "a" |
+-----+
| "b" |
+-----+
| "a" |
+-----+
)

In [14]:
// or df.select(vec!["A", "B"])
df.select(("A", "B")) 

Ok(shape: (3, 2)
+-----+-----+
| A   | B   |
| --- | --- |
| str | i32 |
+=====+=====+
| "a" | 1   |
+-----+-----+
| "b" | 3   |
+-----+-----+
| "a" | 5   |
+-----+-----+
)

In [15]:
df.column("A")

Ok(shape: (3,)
Series: 'A' [str]
[
	"a"
	"b"
	"a"
])

## 条件に応じた列選択

In [16]:
df.select(&df.get_column_names()
            .iter()
            .filter(|x| x.starts_with("A"))
            .map(|&x| x)
            .collect::<Vec<&str>>()
        )

Ok(shape: (3, 1)
+-----+
| A   |
| --- |
| str |
+=====+
| "a" |
+-----+
| "b" |
+-----+
| "a" |
+-----+
)

## 列の入れ替え

In [76]:
df.select(("B", "A"))

Ok(shape: (3, 2)
+-----+-----+
| B   | A   |
| --- | --- |
| i32 | str |
+=====+=====+
| 1   | "a" |
+-----+-----+
| 3   | "b" |
+-----+-----+
| 5   | "a" |
+-----+-----+
)

## 列の追加

In [81]:
let new_df = df.with_column(df.column("B").unwrap().i32().unwrap()
    .apply(|x| x * 2 )
    .into_series()
    .rename("E").to_owned()).unwrap();

Error: cannot infer an appropriate lifetime for autoref due to conflicting requirements

In [30]:
let col_e = df.column("B").unwrap()
                .i32().unwrap()
                .apply(|x| x * 2)
                .into_series()
                .rename("E")
                .to_owned();
df.with_column(col_e);
df.with_column(Series::new("F", &[2, 6, 10]));
df.with_column(df.select("B").unwrap()
                 .rename("B", "G").unwrap()
                 .apply("G", |x| x * 2).unwrap()
                 .column("G").unwrap()
                 .to_owned());

df.with_column(df.column("B").unwrap().to_owned().rename("H").to_owned() * 2)

Ok(shape: (3, 8)
+-----+-----+-----+-----+-----+-----+-----+-----+
| A   | B   | C   | D   | E   | F   | G   | H   |
| --- | --- | --- | --- | --- | --- | --- | --- |
| str | i32 | i32 | i32 | i32 | i32 | i32 | i32 |
+=====+=====+=====+=====+=====+=====+=====+=====+
| "a" | 1   | 10  | 2   | 2   | 2   | 2   | 2   |
+-----+-----+-----+-----+-----+-----+-----+-----+
| "b" | 3   | 11  | 4   | 6   | 6   | 6   | 6   |
+-----+-----+-----+-----+-----+-----+-----+-----+
| "a" | 5   | 12  | 6   | 10  | 10  | 10  | 10  |
+-----+-----+-----+-----+-----+-----+-----+-----+
)

## 条件による行選択

### 単独条件

In [33]:
df.filter(&df.column("B").unwrap().lt_eq(4))

Ok(shape: (2, 8)
+-----+-----+-----+-----+-----+-----+-----+-----+
| A   | B   | C   | D   | E   | F   | G   | H   |
| --- | --- | --- | --- | --- | --- | --- | --- |
| str | i32 | i32 | i32 | i32 | i32 | i32 | i32 |
+=====+=====+=====+=====+=====+=====+=====+=====+
| "a" | 1   | 10  | 2   | 2   | 2   | 2   | 2   |
+-----+-----+-----+-----+-----+-----+-----+-----+
| "b" | 3   | 11  | 4   | 6   | 6   | 6   | 6   |
+-----+-----+-----+-----+-----+-----+-----+-----+
)

### 複数条件
ChunkedArrayはbit演算ができる

In [36]:
df.filter(&(
    df.column("B").unwrap().eq(1) | df.column("C").unwrap().eq(12)
))

Ok(shape: (2, 8)
+-----+-----+-----+-----+-----+-----+-----+-----+
| A   | B   | C   | D   | E   | F   | G   | H   |
| --- | --- | --- | --- | --- | --- | --- | --- |
| str | i32 | i32 | i32 | i32 | i32 | i32 | i32 |
+=====+=====+=====+=====+=====+=====+=====+=====+
| "a" | 1   | 10  | 2   | 2   | 2   | 2   | 2   |
+-----+-----+-----+-----+-----+-----+-----+-----+
| "a" | 5   | 12  | 6   | 10  | 10  | 10  | 10  |
+-----+-----+-----+-----+-----+-----+-----+-----+
)

### 含まれているかなどの演算

In [38]:
let v: Vec<i32> = vec![1, 3];
df.filter(&(
    df.column("B").unwrap()
        .i32().unwrap()
        .map(|x| v.contains(&x)).unwrap()
        .collect()
))

Ok(shape: (2, 8)
+-----+-----+-----+-----+-----+-----+-----+-----+
| A   | B   | C   | D   | E   | F   | G   | H   |
| --- | --- | --- | --- | --- | --- | --- | --- |
| str | i32 | i32 | i32 | i32 | i32 | i32 | i32 |
+=====+=====+=====+=====+=====+=====+=====+=====+
| "a" | 1   | 10  | 2   | 2   | 2   | 2   | 2   |
+-----+-----+-----+-----+-----+-----+-----+-----+
| "b" | 3   | 11  | 4   | 6   | 6   | 6   | 6   |
+-----+-----+-----+-----+-----+-----+-----+-----+
)

## 重複行

In [56]:
let mut dup_df = df!("A" => &[1, 2, 2], "B" => &[1, 2, 2]).unwrap();
dup_df

shape: (3, 2)
+-----+-----+
| A   | B   |
| --- | --- |
| i32 | i32 |
+=====+=====+
| 1   | 1   |
+-----+-----+
| 2   | 2   |
+-----+-----+
| 2   | 2   |
+-----+-----+


### 重複行の抽出

In [59]:
dup_df.filter(&dup_df.is_duplicated().unwrap())

Ok(shape: (2, 2)
+-----+-----+
| A   | B   |
| --- | --- |
| i32 | i32 |
+=====+=====+
| 2   | 2   |
+-----+-----+
| 2   | 2   |
+-----+-----+
)

### 重複行の削除

In [60]:
dup_df.drop_duplicates(true, None)

Ok(shape: (2, 2)
+-----+-----+
| A   | B   |
| --- | --- |
| i32 | i32 |
+=====+=====+
| 1   | 1   |
+-----+-----+
| 2   | 2   |
+-----+-----+
)

## ndarrayへの変換
型が必要。

In [46]:
let arr = df!("A" => &[1, 2, 3], "B" => &[2, 3, 4]).unwrap();
arr.to_ndarray::<Float64Type>()

Ok([[1.0, 2.0],
 [2.0, 3.0],
 [3.0, 4.0]], shape=[3, 2], strides=[2, 1], layout=C (0x1), const ndim=2)

In [52]:
df.fields()

[Field { name: "A", data_type: Utf8 }, Field { name: "B", data_type: Int32 }, Field { name: "C", data_type: Int32 }, Field { name: "D", data_type: Int32 }, Field { name: "E", data_type: Int32 }, Field { name: "F", data_type: Int32 }, Field { name: "G", data_type: Int32 }, Field { name: "H", data_type: Int32 }]

In [31]:
let dates = &[
"2020-08-21",
"2020-08-21",
"2020-08-22",
"2020-08-23",
"2020-08-22",
];
// date format
let fmt = "%Y-%m-%d";
// create date series
let s0 = Date32Chunked::parse_from_str_slice("date", dates, fmt)
        .into_series();
// create temperature series
let s1 = Series::new("temp", [20, 10, 7, 9, 1].as_ref());
// create rain series
let s2 = Series::new("rain", [0.2, 0.1, 0.3, 0.1, 0.01].as_ref());
// create a new DataFrame
let df = DataFrame::new(vec![s0, s1, s2]).unwrap();
println!("{:?}", df);

shape: (5, 3)
+--------------+------+------+
| date         | temp | rain |
| ---          | ---  | ---  |
| date32(days) | i32  | f64  |
| 2020-08-21   | 20   | 0.2  |
+--------------+------+------+
| 2020-08-21   | 10   | 0.1  |
+--------------+------+------+
| 2020-08-22   | 7    | 0.3  |
+--------------+------+------+
| 2020-08-23   | 9    | 0.1  |
+--------------+------+------+
| 2020-08-22   | 1    | 0.01 |
+--------------+------+------+



In [34]:
df.groupby("date").unwrap().pivot(&["temp", "rain"]).mean()

Error: no method named `pivot` found for struct `polars::frame::groupby::GroupBy` in the current scope