<a href="https://colab.research.google.com/github/anajikadam/Polars-alternative_Pandas/blob/main/Polars_alternative_Pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# The Super Fast Dataframe Library for Python: Polars

https://github.com/pola-rs/polars

In [8]:
# !pip install polar
!pip install polars

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting polars
  Downloading polars-0.16.9-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.4/15.4 MB[0m [31m53.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: polars
Successfully installed polars-0.16.9


Here are some reasons why you should choose polars.

- It uses all available cores on your computer.
- It optimizes queries to reduce unneeded work/memory allocations.
- It handles datasets larger than your available RAM.
- It has a strict schema (data types should be known before running the query).

In [1]:
import pandas as pd
import polars as pl

In [3]:
df = pl.read_csv("StudentsPerformance.csv")

[Docs](https://pola-rs.github.io/polars-book/user-guide/introduction.html)

In [4]:
type(df)

polars.internals.dataframe.frame.DataFrame

In [5]:
pandas_df = df.to_pandas()
type(pandas_df)

pandas.core.frame.DataFrame

In [12]:
df.head()

id,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
i64,str,str,str,str,str,i64,i64,i64
1,"""female""","""group B""","""bachelor's deg...","""standard""","""none""",72,72,74
2,"""female""","""group C""","""some college""","""standard""","""completed""",69,90,88
3,"""female""","""group B""","""master's degre...","""standard""","""none""",90,95,93
4,"""male""","""group A""","""associate's de...","""free/reduced""","""none""",47,57,44
5,"""male""","""group C""","""some college""","""standard""","""none""",76,78,75


In [13]:
# list of columns
df.columns

['id',
 'gender',
 'race/ethnicity',
 'parental level of education',
 'lunch',
 'test preparation course',
 'math score',
 'reading score',
 'writing score']

In [14]:
# Select 1 column
df.select(pl.col('gender'))

gender
str
"""female"""
"""female"""
"""female"""
"""male"""
"""male"""
"""female"""
"""female"""
"""male"""
"""male"""
"""female"""


In [15]:
# Select 2+ columns
df.select(pl.col(['gender', 'math score']))

gender,math score
str,i64
"""female""",72
"""female""",69
"""female""",90
"""male""",47
"""male""",76
"""female""",71
"""female""",88
"""male""",40
"""male""",64
"""female""",38


In [16]:
# Select all columns
df.select(pl.col('*'))

id,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
i64,str,str,str,str,str,i64,i64,i64
1,"""female""","""group B""","""bachelor's deg...","""standard""","""none""",72,72,74
2,"""female""","""group C""","""some college""","""standard""","""completed""",69,90,88
3,"""female""","""group B""","""master's degre...","""standard""","""none""",90,95,93
4,"""male""","""group A""","""associate's de...","""free/reduced""","""none""",47,57,44
5,"""male""","""group C""","""some college""","""standard""","""none""",76,78,75
6,"""female""","""group B""","""associate's de...","""standard""","""none""",71,83,78
7,"""female""","""group B""","""some college""","""standard""","""completed""",88,95,92
8,"""male""","""group B""","""some college""","""free/reduced""","""none""",40,43,39
9,"""male""","""group D""","""high school""","""free/reduced""","""completed""",64,64,67
10,"""female""","""group B""","""high school""","""free/reduced""","""none""",38,60,50


In [17]:
# polars: create "sum" column
df.with_columns(
    (pl.col('math score') + pl.col('reading score')).alias("sum")
)

# pandas: df['sum'] = df['math score'] + df['reading score']

id,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,sum
i64,str,str,str,str,str,i64,i64,i64,i64
1,"""female""","""group B""","""bachelor's deg...","""standard""","""none""",72,72,74,144
2,"""female""","""group C""","""some college""","""standard""","""completed""",69,90,88,159
3,"""female""","""group B""","""master's degre...","""standard""","""none""",90,95,93,185
4,"""male""","""group A""","""associate's de...","""free/reduced""","""none""",47,57,44,104
5,"""male""","""group C""","""some college""","""standard""","""none""",76,78,75,154
6,"""female""","""group B""","""associate's de...","""standard""","""none""",71,83,78,154
7,"""female""","""group B""","""some college""","""standard""","""completed""",88,95,92,183
8,"""male""","""group B""","""some college""","""free/reduced""","""none""",40,43,39,83
9,"""male""","""group D""","""high school""","""free/reduced""","""completed""",64,64,67,128
10,"""female""","""group B""","""high school""","""free/reduced""","""none""",38,60,50,98


In [18]:
# polars: create "average" column
df.with_columns(
    pl.col(['math score', 'reading score', 'writing score']).mean().alias('average')
)

# pandas: df['average'] = df[['math score', 'reading score', 'writing score']].mean(axis=1)

id,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,average
i64,str,str,str,str,str,i64,i64,i64,f64
1,"""female""","""group B""","""bachelor's deg...","""standard""","""none""",72,72,74,68.054
2,"""female""","""group C""","""some college""","""standard""","""completed""",69,90,88,68.054
3,"""female""","""group B""","""master's degre...","""standard""","""none""",90,95,93,68.054
4,"""male""","""group A""","""associate's de...","""free/reduced""","""none""",47,57,44,68.054
5,"""male""","""group C""","""some college""","""standard""","""none""",76,78,75,68.054
6,"""female""","""group B""","""associate's de...","""standard""","""none""",71,83,78,68.054
7,"""female""","""group B""","""some college""","""standard""","""completed""",88,95,92,68.054
8,"""male""","""group B""","""some college""","""free/reduced""","""none""",40,43,39,68.054
9,"""male""","""group D""","""high school""","""free/reduced""","""completed""",64,64,67,68.054
10,"""female""","""group B""","""high school""","""free/reduced""","""none""",38,60,50,68.054


In [19]:
# polars: simple filtering
df.filter(pl.col('gender')=='female')

# pandas: df[df['gender'] == 'female']

id,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
i64,str,str,str,str,str,i64,i64,i64
1,"""female""","""group B""","""bachelor's deg...","""standard""","""none""",72,72,74
2,"""female""","""group C""","""some college""","""standard""","""completed""",69,90,88
3,"""female""","""group B""","""master's degre...","""standard""","""none""",90,95,93
6,"""female""","""group B""","""associate's de...","""standard""","""none""",71,83,78
7,"""female""","""group B""","""some college""","""standard""","""completed""",88,95,92
10,"""female""","""group B""","""high school""","""free/reduced""","""none""",38,60,50
13,"""female""","""group B""","""high school""","""standard""","""none""",65,81,73
15,"""female""","""group A""","""master's degre...","""standard""","""none""",50,53,58
16,"""female""","""group C""","""some high scho...","""standard""","""none""",69,75,78
18,"""female""","""group B""","""some high scho...","""free/reduced""","""none""",18,32,28


In [20]:
# Multiple filtering 
df.filter(
    (pl.col('gender')=='female') &
    (pl.col('race/ethnicity')=='group B')
)

# pandas: df[(df['gender'] == 'female') & (df['race/ethnicity'] == 'group B')]

id,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
i64,str,str,str,str,str,i64,i64,i64
1,"""female""","""group B""","""bachelor's deg...","""standard""","""none""",72,72,74
3,"""female""","""group B""","""master's degre...","""standard""","""none""",90,95,93
6,"""female""","""group B""","""associate's de...","""standard""","""none""",71,83,78
7,"""female""","""group B""","""some college""","""standard""","""completed""",88,95,92
10,"""female""","""group B""","""high school""","""free/reduced""","""none""",38,60,50
13,"""female""","""group B""","""high school""","""standard""","""none""",65,81,73
18,"""female""","""group B""","""some high scho...","""free/reduced""","""none""",18,32,28
22,"""female""","""group B""","""some college""","""free/reduced""","""completed""",65,75,70
32,"""female""","""group B""","""some college""","""standard""","""none""",63,65,61
43,"""female""","""group B""","""associate's de...","""standard""","""none""",53,58,65


In [21]:
# Group by
df.groupby("race/ethnicity").count()

race/ethnicity,count
str,u32
"""group A""",89
"""group B""",190
"""group C""",319
"""group D""",262
"""group E""",140


In [22]:
df2 = pl.read_csv("LanguageScore.csv")

In [23]:
df.head()

id,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
i64,str,str,str,str,str,i64,i64,i64
1,"""female""","""group B""","""bachelor's deg...","""standard""","""none""",72,72,74
2,"""female""","""group C""","""some college""","""standard""","""completed""",69,90,88
3,"""female""","""group B""","""master's degre...","""standard""","""none""",90,95,93
4,"""male""","""group A""","""associate's de...","""free/reduced""","""none""",47,57,44
5,"""male""","""group C""","""some college""","""standard""","""none""",76,78,75


In [24]:
df2.head()

id,language score
i64,i64
1,74
2,67
3,34
4,33
5,75


In [25]:
# Join dataframes
df.join(df2, on='id')

id,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,language score
i64,str,str,str,str,str,i64,i64,i64,i64
1,"""female""","""group B""","""bachelor's deg...","""standard""","""none""",72,72,74,74
2,"""female""","""group C""","""some college""","""standard""","""completed""",69,90,88,67
3,"""female""","""group B""","""master's degre...","""standard""","""none""",90,95,93,34
4,"""male""","""group A""","""associate's de...","""free/reduced""","""none""",47,57,44,33
5,"""male""","""group C""","""some college""","""standard""","""none""",76,78,75,75
6,"""female""","""group B""","""associate's de...","""standard""","""none""",71,83,78,51
7,"""female""","""group B""","""some college""","""standard""","""completed""",88,95,92,95
8,"""male""","""group B""","""some college""","""free/reduced""","""none""",40,43,39,92
9,"""male""","""group D""","""high school""","""free/reduced""","""completed""",64,64,67,56
10,"""female""","""group B""","""high school""","""free/reduced""","""none""",38,60,50,60


In [None]:
# Inner, left and outer join
df.join(df2, on='id', how='inner')
df.join(df2, on='id', how='left')
df.join(df2, on='id', how='outer')

In [26]:
# Concatenate dataframes
pl.concat([df, df2], how="horizontal")

DuplicateError: ignored

In [27]:
# drop column "id" in df2
df2 = df2.drop("id")

# Concatenate dataframes
pl.concat([df, df2], how="horizontal")

id,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,language score
i64,str,str,str,str,str,i64,i64,i64,i64
1,"""female""","""group B""","""bachelor's deg...","""standard""","""none""",72,72,74,74
2,"""female""","""group C""","""some college""","""standard""","""completed""",69,90,88,67
3,"""female""","""group B""","""master's degre...","""standard""","""none""",90,95,93,34
4,"""male""","""group A""","""associate's de...","""free/reduced""","""none""",47,57,44,33
5,"""male""","""group C""","""some college""","""standard""","""none""",76,78,75,75
6,"""female""","""group B""","""associate's de...","""standard""","""none""",71,83,78,51
7,"""female""","""group B""","""some college""","""standard""","""completed""",88,95,92,95
8,"""male""","""group B""","""some college""","""free/reduced""","""none""",40,43,39,92
9,"""male""","""group D""","""high school""","""free/reduced""","""completed""",64,64,67,56
10,"""female""","""group B""","""high school""","""free/reduced""","""none""",38,60,50,60
