In [6]:
import polars as pl
from sklearn.datasets import load_iris

# Load the iris dataset
iris_raw = load_iris(as_frame=True)

# Convert to Polars DataFrame
df = pl.read_csv("iris.csv")
print(df.head(10))

shape: (10, 5)
┌──────────────┬─────────────┬──────────────┬─────────────┬─────────┐
│ sepal_length ┆ sepal_width ┆ petal_length ┆ petal_width ┆ species │
│ ---          ┆ ---         ┆ ---          ┆ ---         ┆ ---     │
│ f64          ┆ f64         ┆ f64          ┆ f64         ┆ str     │
╞══════════════╪═════════════╪══════════════╪═════════════╪═════════╡
│ 5.1          ┆ 3.5         ┆ 1.4          ┆ 0.2         ┆ setosa  │
│ 4.9          ┆ 3.0         ┆ 1.4          ┆ 0.2         ┆ setosa  │
│ 4.7          ┆ 3.2         ┆ 1.3          ┆ 0.2         ┆ setosa  │
│ 4.6          ┆ 3.1         ┆ 1.5          ┆ 0.2         ┆ setosa  │
│ 5.0          ┆ 3.6         ┆ 1.4          ┆ 0.2         ┆ setosa  │
│ 5.4          ┆ 3.9         ┆ 1.7          ┆ 0.4         ┆ setosa  │
│ 4.6          ┆ 3.4         ┆ 1.4          ┆ 0.3         ┆ setosa  │
│ 5.0          ┆ 3.4         ┆ 1.5          ┆ 0.2         ┆ setosa  │
│ 4.4          ┆ 2.9         ┆ 1.4          ┆ 0.2         ┆ setosa  │
│ 4.9

In [13]:
species_map = {"setosa" : 0, "versicolor" :1 , "virginica":3}
df = df.with_columns([
    df["species"].replace(species_map).alias("target")
])
print(df.head(10))

TypeError: cannot parse input of type 'str' into Polars data type (given: 'int8')

In [12]:
import altair as alt
import pandas as pd



# Create Altair chart
chart = alt.Chart(df).mark_point().encode(
    x='petal_length',
    y='petal_width',
    color='species',
    tooltip=['species', 'petal_length', 'petal_width']
).properties(
    width=600,
    height=400,
    title='Iris_Flower_Petal_Measurements'
).interactive()

chart.show()

In [14]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression

# Select only petal measurements and cast target to integer
X = df.select(["petal_length", "petal_width"])
y = df["target"].cast(int)

X_train, X_test, y_train, y_test = train_test_split(X, y.to_numpy(), test_size=0.5, random_state=42)

# Create preprocessing and model pipeline
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("classifier", LogisticRegression(max_iter=200))
])

# Fit the pipeline
pipeline.fit(X_train, y_train)



0,1,2
,steps,"[('scaler', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,200


In [15]:
from sklearn.metrics import classification_report, confusion_matrix

# Predictions
train_pred = pipeline.predict(X_train)
test_pred = pipeline.predict(X_test)

print("Training Performance:\n")
print(classification_report(y_train, train_pred))
print(confusion_matrix(y_train, train_pred))

print("\nTesting Performance:\n")
print(classification_report(y_test, test_pred))
print(confusion_matrix(y_test, test_pred))

Training Performance:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        21
           1       0.89      0.89      0.89        27
           3       0.89      0.89      0.89        27

    accuracy                           0.92        75
   macro avg       0.93      0.93      0.93        75
weighted avg       0.92      0.92      0.92        75

[[21  0  0]
 [ 0 24  3]
 [ 0  3 24]]

Testing Performance:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        29
           1       0.96      1.00      0.98        23
           3       1.00      0.96      0.98        23

    accuracy                           0.99        75
   macro avg       0.99      0.99      0.99        75
weighted avg       0.99      0.99      0.99        75

[[29  0  0]
 [ 0 23  0]
 [ 0  1 22]]
