# Iris Dataset

This example shows the intended usage. It allows you to incrementally
build up the workflow while you're still figuring out what you need to do. 

In [1]:
from algove import init_logger
logger = init_logger()
logger.info("Hello, algove!")

INFO:root:Hello, algove!


## Setup

In [2]:
from algove import LocalFS, Cache, download, truncated_print

fs = LocalFS()
cache = Cache(fs, display=truncated_print(10))

## Download Data

In [3]:
@cache("iris_description")
def get_names():
    return download(
        "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.names"
    )

description = get_names()

INFO:algove.cache:Status.COMPUTING: iris_description
INFO:algove.repository:Status.SAVING iris_description -> /app/docs/algove_data/iris_description.txt


###### iris_description ######

1. Title: Iris Plants Database
	Updated Sept 21 by C.Blake - Added discrepency information

2. Sources:
     (a) Creator: R.A. Fisher
     (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
     (c) Date: July, 1988

3. Past Usage:
   - Publications: too many to mention!!!  Here are a few.

#### end iris_description ####



In [4]:
@cache("iris_data")
def get_data():
    return download(
        "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
    )

data = get_data()

INFO:algove.cache:Status.COMPUTING: iris_data
INFO:algove.repository:Status.SAVING iris_data -> /app/docs/algove_data/iris_data.txt


###### iris_data ######

5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa

#### end iris_data ####



In [5]:
from io import StringIO
import polars as pl

@cache("parsed_data")
def parse_data(data: str) -> pl.DataFrame:
    # TODO: it shouldn't be necessary here to read string into memory
    f = StringIO(data)
    df = pl.read_csv(f, has_header=False)
    df.columns = [
        "sepal_len_cm",
        "sepal_width_cm",
        "petal_length_cm",
        "petal_width_cm",
        "class",
    ]
    return df

parsed = parse_data(data)

INFO:algove.cache:Status.COMPUTING: parsed_data
INFO:algove.repository:Status.SAVING parsed_data -> /app/docs/algove_data/parsed_data.parquet


###### parsed_data ######

shape: (150, 5)
┌──────────────┬────────────────┬─────────────────┬────────────────┬────────────────┐
│ sepal_len_cm ┆ sepal_width_cm ┆ petal_length_cm ┆ petal_width_cm ┆ class          │
│ ---          ┆ ---            ┆ ---             ┆ ---            ┆ ---            │
│ f64          ┆ f64            ┆ f64             ┆ f64            ┆ str            │
╞══════════════╪════════════════╪═════════════════╪════════════════╪════════════════╡
│ 5.1          ┆ 3.5            ┆ 1.4             ┆ 0.2            ┆ Iris-setosa    │
│ 4.9          ┆ 3.0            ┆ 1.4             ┆ 0.2            ┆ Iris-setosa    │
│ 4.7          ┆ 3.2            ┆ 1.3             ┆ 0.2            ┆ Iris-setosa    │
│ 4.6          ┆ 3.1            ┆ 1.5             ┆ 0.2            ┆ Iris-setosa    │

#### end parsed_data ####



## Prepare training set



In [6]:
from algove.transformers import FitLabelEncoder

@cache("train_set")
def make_train(df: pl.DataFrame) -> pl.DataFrame:
    encoder = FitLabelEncoder(column="class", output_col="class_encoded").fit(df)
    return encoder.transform(df)

train_set = make_train(parsed)

INFO:algove.cache:Status.COMPUTING: train_set
INFO:algove.repository:Status.SAVING train_set -> /app/docs/algove_data/train_set.parquet


###### train_set ######

shape: (150, 6)
┌──────────────┬────────────────┬────────────────┬────────────────┬────────────────┬───────────────┐
│ sepal_len_cm ┆ sepal_width_cm ┆ petal_length_c ┆ petal_width_cm ┆ class          ┆ class_encoded │
│ ---          ┆ ---            ┆ m              ┆ ---            ┆ ---            ┆ ---           │
│ f64          ┆ f64            ┆ ---            ┆ f64            ┆ str            ┆ i64           │
│              ┆                ┆ f64            ┆                ┆                ┆               │
╞══════════════╪════════════════╪════════════════╪════════════════╪════════════════╪═══════════════╡
│ 5.1          ┆ 3.5            ┆ 1.4            ┆ 0.2            ┆ Iris-setosa    ┆ 2             │
│ 4.9          ┆ 3.0            ┆ 1.4            ┆ 0.2            ┆ Iris-setosa    ┆ 2             │
│ 4.7          ┆ 3.2            ┆ 1.3            ┆ 0.2            ┆ Iris-setosa    ┆ 2             │

#### end train_set ####



## Fit and Run model

In [7]:
from algove import Pipeline
from algove.transformers import NumpyForcer
from sklearn.linear_model import LogisticRegression


@cache("predictions")
def make_predictions(df: pl.DataFrame) -> pl.DataFrame:
    forcer = NumpyForcer(
        ["sepal_len_cm", "sepal_width_cm", "petal_length_cm", "petal_width_cm"],
        y_col="class_encoded",
    )
    data = forcer.transform(df)
    model = LogisticRegression()
    model.fit(data.X, data.y)
    preds = model.predict(data.X)
    pred_series = pl.Series("predictions", preds)
    return df.with_columns(pred_series)


preds = make_predictions(train_set)

INFO:algove.cache:Status.COMPUTING: predictions
INFO:algove.repository:Status.SAVING predictions -> /app/docs/algove_data/predictions.parquet


###### predictions ######

shape: (150, 7)
┌────────────┬────────────┬────────────┬───────────────┬──────────────┬──────────────┬─────────────┐
│ sepal_len_ ┆ sepal_widt ┆ petal_leng ┆ petal_width_c ┆ class        ┆ class_encode ┆ predictions │
│ cm         ┆ h_cm       ┆ th_cm      ┆ m             ┆ ---          ┆ d            ┆ ---         │
│ ---        ┆ ---        ┆ ---        ┆ ---           ┆ str          ┆ ---          ┆ f64         │
│ f64        ┆ f64        ┆ f64        ┆ f64           ┆              ┆ i64          ┆             │
╞════════════╪════════════╪════════════╪═══════════════╪══════════════╪══════════════╪═════════════╡
│ 5.1        ┆ 3.5        ┆ 1.4        ┆ 0.2           ┆ Iris-setosa  ┆ 2            ┆ 2.0         │
│ 4.9        ┆ 3.0        ┆ 1.4        ┆ 0.2           ┆ Iris-setosa  ┆ 2            ┆ 2.0         │
│ 4.7        ┆ 3.2        ┆ 1.3        ┆ 0.2           ┆ Iris-setosa  ┆ 2            ┆ 2.0         │

#### end predictions ####



## Assembling the pipeline

In [8]:
pipeline = Pipeline("iris-demo") | get_names | get_data | parse_data | make_train | make_predictions
pipeline_preds = pipeline()

INFO:algove.cache:Status.CACHE_HIT: iris_description
INFO:algove.repository:Status.LOADING: iris_description <-- /app/docs/algove_data/iris_description.txt
INFO:algove.cache:Status.CACHE_HIT: iris_data
INFO:algove.repository:Status.LOADING: iris_data <-- /app/docs/algove_data/iris_data.txt
INFO:algove.cache:Status.CACHE_HIT: parsed_data
INFO:algove.repository:Status.LOADING: parsed_data <-- /app/docs/algove_data/parsed_data.parquet
INFO:algove.cache:Status.CACHE_HIT: train_set
INFO:algove.repository:Status.LOADING: train_set <-- /app/docs/algove_data/train_set.parquet
INFO:algove.cache:Status.CACHE_HIT: predictions
INFO:algove.repository:Status.LOADING: predictions <-- /app/docs/algove_data/predictions.parquet


###### iris_description ######

1. Title: Iris Plants Database
	Updated Sept 21 by C.Blake - Added discrepency information

2. Sources:
     (a) Creator: R.A. Fisher
     (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
     (c) Date: July, 1988

3. Past Usage:
   - Publications: too many to mention!!!  Here are a few.

#### end iris_description ####

###### iris_data ######

5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa

#### end iris_data ####

###### parsed_data ######

shape: (150, 5)
┌──────────────┬────────────────┬─────────────────┬────────────────┬────────────────┐
│ sepal_len_cm ┆ sepal_width_cm ┆ petal_length_cm ┆ petal_width_cm ┆ class          │
│ ---          ┆ ---            ┆ ---             ┆ ---            ┆ ---            │
│ f6

In [9]:
print(preds)

shape: (150, 7)
┌────────────┬────────────┬────────────┬───────────────┬──────────────┬──────────────┬─────────────┐
│ sepal_len_ ┆ sepal_widt ┆ petal_leng ┆ petal_width_c ┆ class        ┆ class_encode ┆ predictions │
│ cm         ┆ h_cm       ┆ th_cm      ┆ m             ┆ ---          ┆ d            ┆ ---         │
│ ---        ┆ ---        ┆ ---        ┆ ---           ┆ str          ┆ ---          ┆ f64         │
│ f64        ┆ f64        ┆ f64        ┆ f64           ┆              ┆ i64          ┆             │
╞════════════╪════════════╪════════════╪═══════════════╪══════════════╪══════════════╪═════════════╡
│ 5.1        ┆ 3.5        ┆ 1.4        ┆ 0.2           ┆ Iris-setosa  ┆ 2            ┆ 2.0         │
│ 4.9        ┆ 3.0        ┆ 1.4        ┆ 0.2           ┆ Iris-setosa  ┆ 2            ┆ 2.0         │
│ 4.7        ┆ 3.2        ┆ 1.3        ┆ 0.2           ┆ Iris-setosa  ┆ 2            ┆ 2.0         │
│ 4.6        ┆ 3.1        ┆ 1.5        ┆ 0.2           ┆ Iris-setosa  ┆ 2  