# <span style="color:yellow;">Polars</span>

In [1]:
import polars as pl
from polars import col as c

In [18]:
pl_pokemon = (
    pl.read_csv(
        source = "data/pokemon.csv",
        schema_overrides = {
            "Type 1": pl.Categorical,
            "Type 2": pl.Categorical,
            "Generation": pl.Categorical,
            "Legendary": pl.Boolean
        }
    )
    .drop("#")
    .rename(lambda col: col.strip().replace(" ", "_").replace(".", ""))
    .with_columns(Generation = c("Generation").cast(pl.Utf8).cast(pl.Enum(["1", "2", "3", "4", "5", "6"])))
)

In [16]:
pl_pokemon.head(5)

Name,Type_1,Type_2,Total,HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Generation,Legendary
str,cat,cat,i64,i64,i64,i64,i64,i64,i64,enum,bool
"""Bulbasaur""","""Grass""","""Poison""",318,45,49,49,65,65,45,"""1""",False
"""Ivysaur""","""Grass""","""Poison""",405,60,62,63,80,80,60,"""1""",False
"""Venusaur""","""Grass""","""Poison""",525,80,82,83,100,100,80,"""1""",False
"""VenusaurMega Venusaur""","""Grass""","""Poison""",625,80,100,123,122,120,80,"""1""",False
"""Charmander""","""Fire""",,309,39,52,43,60,50,65,"""1""",False


In [24]:
# Type_1 equal to 'Fire' AND Generation equal to '1'
(
    pl_pokemon
    .filter((c("Type_1") == "Fire") & (c("Generation") == "1"))
    .select(["Name", "Type_1", "Type_2", "Generation"])
)

Name,Type_1,Type_2,Generation
str,cat,cat,enum
"""Charmander""","""Fire""",,"""1"""
"""Charmeleon""","""Fire""",,"""1"""
"""Charizard""","""Fire""","""Flying""","""1"""
"""CharizardMega Charizard X""","""Fire""","""Dragon""","""1"""
"""CharizardMega Charizard Y""","""Fire""","""Flying""","""1"""
…,…,…,…
"""Ponyta""","""Fire""",,"""1"""
"""Rapidash""","""Fire""",,"""1"""
"""Magmar""","""Fire""",,"""1"""
"""Flareon""","""Fire""",,"""1"""


In [25]:
# HP less than 50 OR HP greater than 100
(
    pl_pokemon
    .filter((c("HP") < 50) | (c("HP") > 100))
    .select(["Name", "Type_1", "Type_2", "HP", "Generation", "Legendary"])
)

Name,Type_1,Type_2,HP,Generation,Legendary
str,cat,cat,i64,enum,bool
"""Bulbasaur""","""Grass""","""Poison""",45,"""1""",false
"""Charmander""","""Fire""",,39,"""1""",false
"""Squirtle""","""Water""",,44,"""1""",false
"""Caterpie""","""Bug""",,45,"""1""",false
"""Weedle""","""Bug""","""Poison""",40,"""1""",false
…,…,…,…,…,…
"""PumpkabooSmall Size""","""Ghost""","""Grass""",44,"""6""",false
"""Noibat""","""Flying""","""Dragon""",40,"""6""",false
"""Xerneas""","""Fairy""",,126,"""6""",true
"""Yveltal""","""Dark""","""Flying""",126,"""6""",true


In [31]:
# (Type_1 equal to 'Fire' OR Type_1 equal to 'Water') AND Generation > to '4'
(
    pl_pokemon
    .filter(
        ((c("Type_1") == "Fire") | (c("Type_1") == "Water")) & (c("Generation") > "4") # because & has higher precedence than |
    )
)

Name,Type_1,Type_2,Total,HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Generation,Legendary
str,cat,cat,i64,i64,i64,i64,i64,i64,i64,enum,bool
"""Tepig""","""Fire""",,308,65,63,45,45,45,45,"""5""",false
"""Pignite""","""Fire""","""Fighting""",418,90,93,55,70,55,55,"""5""",false
"""Emboar""","""Fire""","""Fighting""",528,110,123,65,100,65,65,"""5""",false
"""Oshawott""","""Water""",,308,55,55,45,63,45,45,"""5""",false
"""Dewott""","""Water""",,413,75,75,60,83,60,60,"""5""",false
…,…,…,…,…,…,…,…,…,…,…,…
"""Litleo""","""Fire""","""Normal""",369,62,50,58,73,54,72,"""6""",false
"""Pyroar""","""Fire""","""Normal""",507,86,68,72,109,66,106,"""6""",false
"""Clauncher""","""Water""",,330,50,53,62,58,63,44,"""6""",false
"""Clawitzer""","""Water""",,500,71,73,88,120,89,59,"""6""",false


# <span style="color:yellow;">TidyPolars4sci</span>

In [1]:
import tidypolars4sci as tp
from tidypolars4sci import col as c

In [3]:
tp_baseball = tp.read_data(fn = "data/baseball.csv", sep = ",")

tp_baseball.head(5)

Loading data 'baseball.csv'... done!


Name,Team,Position,Height,Weight,Age,PosCategory
str,str,str,i64,i64,f64,str
"""Adam_Donachie""","""BAL""","""Catcher""",74,180,22.99,"""Catcher"""
"""Paul_Bako""","""BAL""","""Catcher""",74,215,34.69,"""Catcher"""
"""Ramon_Hernandez""","""BAL""","""Catcher""",72,210,30.78,"""Catcher"""
"""Kevin_Millar""","""BAL""","""First_Baseman""",72,210,35.43,"""Infielder"""
"""Chris_Gomez""","""BAL""","""First_Baseman""",73,188,35.71,"""Infielder"""


In [6]:
(
    tp_baseball
    .mutate(
        Height = c("Height") * 0.0254, # Inch to M
        Weight = c("Weight") * 0.45,  # Pounds to Kg
        BMI = c("Weight") / c("Height")**2
    )
    .head(6)
)

Name,Team,Position,Height,Weight,Age,PosCategory,BMI
str,str,str,f64,f64,f64,str,f64
"""Adam_Donachie""","""BAL""","""Catcher""",1.8796,81.0,22.99,"""Catcher""",22.927365
"""Paul_Bako""","""BAL""","""Catcher""",1.8796,96.75,34.69,"""Catcher""",27.385464
"""Ramon_Hernandez""","""BAL""","""Catcher""",1.8288,94.5,30.78,"""Catcher""",28.255265
"""Kevin_Millar""","""BAL""","""First_Baseman""",1.8288,94.5,35.43,"""Infielder""",28.255265
"""Chris_Gomez""","""BAL""","""First_Baseman""",1.8542,84.6,35.71,"""Infielder""",24.606917
"""Brian_Roberts""","""BAL""","""Second_Baseman""",1.7526,79.2,29.39,"""Infielder""",25.784551


In [8]:
(
    tp_baseball
    .group_by("Team")
    .summarize(
        count = tp.n(),
        avg_Height = c("Height").mean(),
        avg_Weight = c("Weight").mean(),
        avg_Age = c("Age").mean()
    )
)

Team,count,avg_Height,avg_Weight,avg_Age
str,u32,f64,f64,f64
"""BAL""",34,73.529412,196.323529,29.034706
"""CWS""",31,74.580645,209.935484,28.077419
"""ANA""",35,73.342857,201.085714,28.808857
"""BOS""",35,74.171429,204.114286,29.741143
"""CLE""",34,74.058824,200.529412,28.319706
…,…,…,…,…
"""SD""",32,73.5,203.875,29.841563
"""WAS""",36,74.138889,199.75,26.939444
"""PIT""",35,73.6,204.371429,27.194857
"""SF""",34,73.558824,202.794118,29.929706


In [9]:
(
    tp_baseball
    .group_by(["Team", "Position"])
    .summarize(
        count = tp.n(),
        avg_Height = c("Height").mean(),
        avg_Weight = c("Weight").mean(),
        avg_Age = c("Age").mean()
    )
)

Team,Position,count,avg_Height,avg_Weight,avg_Age
str,str,u32,f64,f64,f64
"""BAL""","""Catcher""",3,73.333333,201.666667,29.486667
"""BAL""","""First_Baseman""",2,72.5,199.0,35.57
"""BAL""","""Second_Baseman""",1,69.0,176.0,29.39
"""BAL""","""Shortstop""",1,69.0,209.0,30.77
"""BAL""","""Third_Baseman""",2,73.5,215.5,32.63
…,…,…,…,…,…
"""STL""","""Shortstop""",1,67.0,165.0,32.11
"""STL""","""Third_Baseman""",2,75.0,230.0,33.175
"""STL""","""Outfielder""",6,72.833333,196.666667,31.22
"""STL""","""Starting_Pitcher""",6,76.5,206.333333,29.355
