# Polars Cheat Sheet

#### Overview:

The following Notebook contains detailed Polars examples to help you get started using and become proficient with Polars

In [73]:
# Import polars
import polars as pl

# Import polars functions
from polars import (
    col, # Allows us to call on columns within a dataframe
    lit, # Allows us to assign static values to columns
    coalesce,
    when
    )


# Import pandas (used for example)
import pandas as pd

### Read in Data
- Show how to change dataframe to pandas and from pandas

In [None]:
# Read in a csv using polars
df = pl.read_csv('GOT_clean.csv') # Fake GOT data generated by OpenAI

# Convert polars to pandas (needed as a bridge to use many useful libraries)
df_pandas = df.to_pandas()

# Convert pandas df back to polars
df_polars = pl.from_pandas(df_pandas)

### Exploring Data

In [75]:
# Show the dataframe
display(df)

# Show the number of rows and columns in the dataframe
display(df.shape)

# Show the first n rows of the dataframe
display(df.head(2))

# Show the last n rows of the dataframe
display(df.tail(2))

# Show the columns of the dataframe
display(df.columns)

# Show a quantitative overview of the dataframe
display(df.describe())

# Show the unique values of a column
display(df.select('BirthPlace').unique())

# Show the unique values of a column and the count of each
display(df['BirthPlace'].value_counts())

FirstName,LastName,DateOfBirth,Residence,BirthPlace,NetWorth,TotalDebts
str,str,str,str,str,i64,i64
"""Eddard""","""Stark""","""07/25/263""","""Winterfell""","""Winterfell""",5000,800
"""Catelyn""","""Tully""","""08/19/264""","""Riverrun""","""Riverrun""",4500,600
"""Robb""","""Stark""","""04/10/283""","""Winterfell""","""Winterfell""",3500,500
"""Sansa""","""Stark""","""10/12/286""","""Winterfell""","""Winterfell""",3000,400
"""Arya""","""Stark""","""04/26/286""","""Winterfell""","""Winterfell""",2500,0
…,…,…,…,…,…,…
"""Edmure""","""Tully""","""03/24/267""","""Riverrun""","""Riverrun""",4000,600
"""Brynden""","""Tully""","""11/07/250""","""Riverrun""","""Riverrun""",4500,400
"""Lancel""","""Lannister""","""01/23/271""","""King'S Landing""","""Casterly Rock""",3000,500
"""Kevan""","""Lannister""","""05/14/245""","""Casterly Rock""","""Casterly Rock""",10000,1200


(60, 7)

FirstName,LastName,DateOfBirth,Residence,BirthPlace,NetWorth,TotalDebts
str,str,str,str,str,i64,i64
"""Eddard""","""Stark""","""07/25/263""","""Winterfell""","""Winterfell""",5000,800
"""Catelyn""","""Tully""","""08/19/264""","""Riverrun""","""Riverrun""",4500,600


FirstName,LastName,DateOfBirth,Residence,BirthPlace,NetWorth,TotalDebts
str,str,str,str,str,i64,i64
"""Kevan""","""Lannister""","""05/14/245""","""Casterly Rock""","""Casterly Rock""",10000,1200
"""Tom""","""Of Sevens""","""05/27/260""","""The Riverlands""","""Unknown""",1500,100


['FirstName',
 'LastName',
 'DateOfBirth',
 'Residence',
 'BirthPlace',
 'NetWorth',
 'TotalDebts']

statistic,FirstName,LastName,DateOfBirth,Residence,BirthPlace,NetWorth,TotalDebts
str,str,str,str,str,str,f64,f64
"""count""","""60""","""50""","""60""","""60""","""60""",60.0,60.0
"""null_count""","""0""","""10""","""0""","""0""","""0""",0.0,0.0
"""mean""",,,,,,4621.666667,720.0
"""std""",,,,,,3835.078576,869.599557
"""min""","""Alliser""","""Aemon""","""01/13/284""","""Braavos""","""Asshai""",500.0,0.0
"""25%""",,,,,,2000.0,100.0
"""50%""",,,,,,4000.0,500.0
"""75%""",,,,,,6000.0,1000.0
"""max""","""Ygritte""","""Zo Loraq""","""12/18/250""","""Winterfell""","""Winterfell""",20000.0,5000.0


BirthPlace
str
"""Lorath"""
"""Unknown"""
"""Asshai"""
"""Naath"""
"""Dragonstone"""
…
"""Flea Bottom"""
"""Tyrosh"""
"""Westerlands"""
"""Dreadfort"""


BirthPlace,count
str,u32
"""Bear Island""",1
"""Asshai""",1
"""Greywater Watch""",1
"""King'S Landing""",8
"""Flea Bottom""",1
…,…
"""Tyrosh""",1
"""Blackhaven""",1
"""Beyond The Wall""",2
"""Dragonstone""",3


### Sorting

In [76]:
# Sort by one column
df_sort1 = df.sort('NetWorth')

#Sort by two columns
df_sort2 = df.sort('NetWorth', 'FirstName')

# Sort by two columns and dictate how the columns are ordered
df_sort3 = df.sort(['NetWorth', 'FirstName'], descending=[True, False])
display(df_sort3.head(2))

FirstName,LastName,DateOfBirth,Residence,BirthPlace,NetWorth,TotalDebts
str,str,str,str,str,i64,i64
"""Tywin""","""Lannister""","""04/01/243""","""Casterly Rock""","""Casterly Rock""",20000,1000
"""Cersei""","""Lannister""","""11/06/266""","""King'S Landing""","""King'S Landing""",15000,2000


### Filtering

In [None]:
# Filter for a specific value in a specific column
df_filter1 = df.filter(col('BirthPlace') == 'Casterly Rock')

# Filter for where a specific value is not in a specific column
df_filter2 = df.filter(col('BirthPlace') != 'Casterly Rock')

# Filter for where a column is greater than a specific value
df_filter3 = df.filter(col('NetWorth') > 4000)

# Filter for where a column is less than a specific value
df_filter4 = df.filter(col('TotalDebts') < 1000)

# Filter for where a column is less than or equal to a specific value
df_filter5 = df.filter(col('TotalDebts') <= 200)

# Filter for multiple conditions
df_filter6 = df.filter((col('BirthPlace') == 'Winterfell') & (col('NetWorth') >= 3000))

# Filter for where LastName is null
df_filter7 = df.filter(col('LastName').is_null())

# Filter for where LastName is not null
df_filter8 = df.filter(col('LastName').is_not_null())

FirstName,LastName,DateOfBirth,Residence,BirthPlace,NetWorth,TotalDebts
str,str,str,str,str,i64,i64
"""Eddard""","""Stark""","""07/25/263""","""Winterfell""","""Winterfell""",5000,800
"""Robb""","""Stark""","""04/10/283""","""Winterfell""","""Winterfell""",3500,500


### Joining

In [None]:
df_sigils = pl.read_csv('GOT_sigils.csv')

# Left Join - Keep all records from the df on left (df) adding the new columns in df_sigils merging on "Residence" where possible
df_combined_left = df.join(df_sigils, on='Residence', how='left')

# Inner Join - Keep all records where a Residence Value has a match (drops those that would be nulls in the new columns)
df_combined_inner = df.join(df_sigils, on='Residence', how='inner') 

### Appending

In [79]:
# Filter into two different dataframes
df_part1 = df.filter(col('NetWorth') >= 2000)
df_part2 = df.filter(col('NetWorth') < 2000)

# Append the dataframes together
df_parts_combined = pl.concat([df_part1, df_part2])

### Working with Columns

In [83]:
# Rename columns
df_col_example = df.rename({'NetWorth':'AccountBalance',
                            'TotalDebts':'Debt'})

# Drop Columns
df_col_example = df_col_example.drop(['DateOfBirth', 'BirthPlace'])

# Create new column
df_col_example = df_col_example.with_columns(
    (col('AccountBalance') - col('Debt')).alias('NetWorth_New')
)

# Create new column with a static value
df_col_example = df_col_example.with_columns(
    lit('Game of Thrones').alias('Fandom')
)

# Select a subset of columns, reordering columns in the process
df_col_example = df_col_example.select('NetWorth_New', 'FirstName', 'LastName', 'Fandom')

# Select a subset of columns and sort by a column value
df_col_example = df_col_example.select('NetWorth_New', 'FirstName', 'LastName', 'Fandom').sort('NetWorth_New')

display(df_col_example.head(2))

NetWorth_New,FirstName,LastName,Fandom
i64,str,str,str
500,"""Ygritte""",,"""Game of Thrones"""
500,"""Gilly""",,"""Game of Thrones"""


### Handling Nulls

In [None]:
# Drop rows with any nulls throughout the dataframe
df_dropped = df.drop_nulls()

# Drop rows with any nulls in certain columns
df_dropped2 = df.drop_nulls(subset=['FirstName', 'LastName'])

# Fill nulls with certain values
df_fill_null = df.with_columns(
    col('LastName').fill_null('Unknown')
)

### Group Bys

In [None]:
# Get Total Net Worth by Residence
df_grouped1 = df.group_by('Residence').agg(
    col('NetWorth').sum().alias('Total Wealth')
)

display(df_grouped1)

# Get Total Debt and Count of individuals by Residence
df_grouped2 = df.group_by('Residence').agg(
    col('TotalDebts').sum().alias('Total Debt'),
    col('FirstName').count().alias('Total People')
)

display(df_grouped2)

Residence,Total Wealth
str,i64
"""Pyke""",9500
"""Meereen""",23700
"""Winterfell""",21100
"""Greywater Watch""",1200
"""The Riverlands""",6000
…,…
"""Storm'S End""",7000
"""Dreadfort""",6000
"""King'S Landing""",100500
"""Vaes Dothrak""",5000


Residence,Total Wealth,Total People
str,i64,u32
"""Riverrun""",1600,3
"""Winterfell""",2800,8
"""Storm'S End""",1500,1
"""Highgarden""",1500,1
"""King'S Landing""",18600,16
…,…,…
"""Vaes Dothrak""",1000,1
"""Braavos""",0,1
"""The Eyrie""",400,1
"""Pyke""",3400,2


### Conditional Formatting

### Chaining

### Other Functions

### Using SQL