# 📌 Introduction to Polars
Polars is a **blazing-fast DataFrame library** written in Rust and designed for **performance, parallelism**, and **memory efficiency**. It provides both eager and lazy APIs, supports Arrow data formats, and is especially suitable for large datasets.

We'll now explore Polars step by step with practical use cases.


In [86]:
# 📦 Install Polars
!pip install polars[all] --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m28.4/28.4 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf-polars-cu12 25.2.2 requires polars<1.22,>=1.20, but you have polars 0.20.0 which is incompatible.[0m[31m
[0m

In [87]:
import polars as pl
print(pl.__version__)

1.21.0


In [2]:
# ✅ Importing the Library
import polars as pl

In [60]:
# ▶️ Create DataFrame
df = pl.DataFrame({
    "col1": [1, 2, 3],
    "col2": ["x", "y", "z"]
})
df

col1,col2
i64,str
1,"""x"""
2,"""y"""
3,"""z"""


📥 Reading Data

In [10]:
from sklearn.datasets import make_regression
import pandas as pd
# Create synthetic finance-related data
# Let's imagine features like 'stock_price_yesterday', 'interest_rate', 'inflation_rate',
# 'volume', 'market_sentiment', and a target 'stock_price_today'.
X, y = make_regression(n_samples=300, n_features=10, random_state=42, n_informative=8, noise=10)

# Convert to pandas DataFrame
column_names = [f'feature_{i+1}' for i in range(10)]
df_finance = pd.DataFrame(X, columns=column_names)
df_finance['target_stock_price'] = y

# Add some columns that might be more finance-specific (even if synthetic)
df_finance['stock_price_yesterday'] = df_finance['feature_1'] * 100 + 500 # Example calculation
df_finance['interest_rate'] = df_finance['feature_2'] * 0.1 + 2.0 # Example calculation
df_finance['inflation_rate'] = df_finance['feature_3'] * 0.05 + 1.0 # Example calculation

# Drop some of the original 'feature' columns to keep it around 10
df_finance = df_finance[['stock_price_yesterday', 'interest_rate', 'inflation_rate',
                         'feature_4', 'feature_5', 'feature_6', 'feature_7', 'feature_8',
                         'feature_9', 'target_stock_price']]

# Save to CSV
csv_file_path = 'finance_data.csv'
df_finance.to_csv(csv_file_path, index=False)
# Save to CSV
csv_file_path = 'finance_data.xlsx'
df_finance.to_excel(csv_file_path, index=False)
#Save to DB
import sqlite3
conn = sqlite3.connect('finance_data.db')
# Save the pandas DataFrame to a SQLite table
df_finance.to_sql('finance_data', conn, if_exists='replace', index=False)
conn.close()
print("DataFrame 'df_finance' successfully saved to 'finance_data.db' as table 'finance_data'.")

print(f"Synthetic finance data saved to {csv_file_path}")
print(df_finance.head())

DataFrame 'df_finance' successfully saved to 'finance_data.db' as table 'finance_data'.
Synthetic finance data saved to finance_data.xlsx
   stock_price_yesterday  interest_rate  inflation_rate  feature_4  feature_5  \
0             526.032184       2.177531        0.987498  -0.670620   1.000582   
1             403.165555       2.139200        0.988768   0.940771  -0.989628   
2             518.142662       1.935023        0.935194  -0.351921  -0.487203   
3             585.243333       2.021646        1.009323   0.633919   2.143944   
4             456.451363       1.987962        1.009525  -0.090533  -0.535328   

   feature_6  feature_7  feature_8  feature_9  target_stock_price  
0  -1.193637   1.392465  -0.646227   0.919154          214.313595  
1   0.918317  -0.982487   0.179894  -1.570501            2.242062  
2  -1.203201  -0.769996   0.874517  -1.042044         -177.174358  
3   0.045572  -2.025143  -0.730367  -0.651600            0.874379  
4  -2.172670   0.331980   1.107081 

In [20]:
#/content/finance_data.csv
# 📁 Read CSV
df_csv = pl.read_csv("/content/finance_data.csv")
df_csv.head()

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
526.032184,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595
403.165555,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062
518.142662,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358
585.243333,2.021646,1.009323,0.633919,2.143944,0.045572,-2.025143,-0.730367,-0.6516,0.874379
456.451363,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037


In [21]:
# 📁 Read Excel (requires optional dependency)
df_excel = pl.read_excel("/content/finance_data.xlsx")
df_excel.head()

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
526.032184,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595
403.165555,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062
518.142662,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358
585.243333,2.021646,1.009323,0.633919,2.143944,0.045572,-2.025143,-0.730367,-0.6516,0.874379
456.451363,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037


In [17]:
# 🛢️ Read from Database (simulate with SQLite for example)
import sqlite3
conn = sqlite3.connect("/content/finance_data.db")
df_db = pl.read_database("SELECT * FROM finance_data", connection=conn)
df_db.head()

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
526.032184,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595
403.165555,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062
518.142662,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358
585.243333,2.021646,1.009323,0.633919,2.143944,0.045572,-2.025143,-0.730367,-0.6516,0.874379
456.451363,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037


💾 Writing Data

In [19]:
# 💾 Write to CSV
df_csv.write_csv("/content/finance_data.csv")

# 💾 Write to Excel
df_excel.write_excel("/content/finance_data.xlsx")

# 💾 Write to Database
# df_db.write_database(table_name="finance_data", connection=conn)

<xlsxwriter.workbook.Workbook at 0x78747dfbe050>

👀 View & Inspect Data

In [23]:
# 🔍 View Top/Bottom Rows
df_csv.head(5)

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
526.032184,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595
403.165555,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062
518.142662,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358
585.243333,2.021646,1.009323,0.633919,2.143944,0.045572,-2.025143,-0.730367,-0.6516,0.874379
456.451363,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037


In [24]:
df_csv.tail(5)

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
480.365015,1.965074,1.011939,0.430042,0.381935,-0.321635,1.030283,-0.348652,2.076748,156.382513
480.897224,1.983721,0.950283,-0.513214,0.740824,0.040919,-0.2286,-0.544114,-1.002187,-133.736108
323.09241,1.902645,0.974552,2.75966,1.06021,0.476358,0.392416,0.807123,0.50547,165.857321
613.37701,2.028555,0.992436,0.089581,0.555604,0.521122,-0.197338,-0.054894,0.645216,121.249546
585.277368,1.856172,1.064051,1.080048,1.873298,0.919229,-0.447322,0.11327,-0.668144,-61.832127


In [25]:
# 🧬 Schema Information
df_csv.schema

Schema([('stock_price_yesterday', Float64),
        ('interest_rate', Float64),
        ('inflation_rate', Float64),
        ('feature_4', Float64),
        ('feature_5', Float64),
        ('feature_6', Float64),
        ('feature_7', Float64),
        ('feature_8', Float64),
        ('feature_9', Float64),
        ('target_stock_price', Float64)])

In [26]:
# 📊 Statistical Summary
df_csv.describe()

statistic,stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""count""",300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",514.272218,2.006597,1.000073,0.030754,0.066574,0.001545,-0.060293,0.041553,-0.014469,11.696161
"""std""",97.701424,0.107692,0.047692,1.046048,1.031675,0.959008,1.001786,0.924322,0.961896,168.523969
"""min""",212.773779,1.705961,0.850443,-2.591042,-3.019512,-3.241267,-2.696887,-2.703232,-2.296181,-480.200674
"""25%""",450.809755,1.940061,0.965405,-0.69291,-0.612437,-0.638962,-0.763286,-0.5552,-0.73093,-88.363419
"""50%""",517.041622,2.003526,0.999535,0.040592,-0.003603,-0.006071,-0.033613,0.105376,-0.012089,17.8737
"""75%""",576.041466,2.07846,1.031541,0.693106,0.742095,0.632782,0.577072,0.642723,0.647196,122.218993
"""max""",885.273149,2.298526,1.156887,3.926238,2.644343,2.560085,2.949094,2.526932,2.601683,510.422988


In [27]:
# 📐 Dimensions
df_csv.height, df_csv.width

(300, 10)

In [28]:
# 📋 Column Names
df_csv.columns

['stock_price_yesterday',
 'interest_rate',
 'inflation_rate',
 'feature_4',
 'feature_5',
 'feature_6',
 'feature_7',
 'feature_8',
 'feature_9',
 'target_stock_price']

🧹 Handling Nulls / Missing Data

In [29]:
# ❓ Count Nulls
df_csv.null_count()

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,0,0,0,0,0


In [30]:
# ❌ Drop Nulls
df_csv.drop_nulls()

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
526.032184,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595
403.165555,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062
518.142662,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358
585.243333,2.021646,1.009323,0.633919,2.143944,0.045572,-2.025143,-0.730367,-0.6516,0.874379
456.451363,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037
…,…,…,…,…,…,…,…,…,…
480.365015,1.965074,1.011939,0.430042,0.381935,-0.321635,1.030283,-0.348652,2.076748,156.382513
480.897224,1.983721,0.950283,-0.513214,0.740824,0.040919,-0.2286,-0.544114,-1.002187,-133.736108
323.09241,1.902645,0.974552,2.75966,1.06021,0.476358,0.392416,0.807123,0.50547,165.857321
613.37701,2.028555,0.992436,0.089581,0.555604,0.521122,-0.197338,-0.054894,0.645216,121.249546


In [31]:
# 🔄 Fill Nulls
df_csv.fill_null(value=0)

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
526.032184,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595
403.165555,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062
518.142662,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358
585.243333,2.021646,1.009323,0.633919,2.143944,0.045572,-2.025143,-0.730367,-0.6516,0.874379
456.451363,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037
…,…,…,…,…,…,…,…,…,…
480.365015,1.965074,1.011939,0.430042,0.381935,-0.321635,1.030283,-0.348652,2.076748,156.382513
480.897224,1.983721,0.950283,-0.513214,0.740824,0.040919,-0.2286,-0.544114,-1.002187,-133.736108
323.09241,1.902645,0.974552,2.75966,1.06021,0.476358,0.392416,0.807123,0.50547,165.857321
613.37701,2.028555,0.992436,0.089581,0.555604,0.521122,-0.197338,-0.054894,0.645216,121.249546


🎯 Selecting Data

In [33]:
# 📌 Select Columns
df_csv["stock_price_yesterday"]
df_csv.select(["feature_4", "feature_5"])

feature_4,feature_5
f64,f64
-0.67062,1.000582
0.940771,-0.989628
-0.351921,-0.487203
0.633919,2.143944
-0.090533,-0.535328
…,…
0.430042,0.381935
-0.513214,0.740824
2.75966,1.06021
0.089581,0.555604


In [34]:
# 🎯 Select Rows
df_csv.slice(1, 2)  # start=1, length=2

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
403.165555,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062
518.142662,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358


In [45]:
#Supportive Code
import numpy as np
from datetime import datetime, timedelta

# Add 'forecast_date' column (random dates within a range)
start_date = datetime.now() - timedelta(days=365) # Start one year ago
end_date = datetime.now() + timedelta(days=365)   # End one year from now

random_dates = [start_date + timedelta(days=np.random.randint(0, (end_date - start_date).days)) for _ in range(df_csv.height)]
df_csv = df_csv.with_columns(
    pl.Series("forecast_date", [date.strftime("%Y-%m-%d") for date in random_dates])
)

# Add 'type' column (randomly 'High' or 'Low')
random_types = np.random.choice(['High', 'Low'], size=df_csv.height)
df_csv = df_csv.with_columns(
    pl.Series("type", random_types.tolist())
)

print(df_csv.head())

shape: (5, 12)
┌────────────┬────────────┬────────────┬───────────┬───┬───────────┬────────────┬───────────┬──────┐
│ stock_pric ┆ interest_r ┆ inflation_ ┆ feature_4 ┆ … ┆ feature_9 ┆ target_sto ┆ forecast_ ┆ type │
│ e_yesterda ┆ ate        ┆ rate       ┆ ---       ┆   ┆ ---       ┆ ck_price   ┆ date      ┆ ---  │
│ y          ┆ ---        ┆ ---        ┆ f64       ┆   ┆ f64       ┆ ---        ┆ ---       ┆ str  │
│ ---        ┆ f64        ┆ f64        ┆           ┆   ┆           ┆ f64        ┆ str       ┆      │
│ f64        ┆            ┆            ┆           ┆   ┆           ┆            ┆           ┆      │
╞════════════╪════════════╪════════════╪═══════════╪═══╪═══════════╪════════════╪═══════════╪══════╡
│ 526.032184 ┆ 2.177531   ┆ 0.987498   ┆ -0.67062  ┆ … ┆ 0.919154  ┆ 214.313595 ┆ 2025-05-1 ┆ Low  │
│            ┆            ┆            ┆           ┆   ┆           ┆            ┆ 7         ┆      │
│ 403.165555 ┆ 2.1392     ┆ 0.988768   ┆ 0.940771  ┆ … ┆ -1.570501 ┆ 2.24206

In [46]:
# 🔍 Filtering
df_csv.filter(pl.col("feature_5") > 0)
df_csv.filter(pl.col("type").str.contains("Low"))

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price,forecast_date,type
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str
526.032184,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595,"""2025-05-17""","""Low"""
456.451363,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037,"""2025-03-30""","""Low"""
472.31867,2.107251,0.94484,-1.966357,-1.044809,-0.364953,2.056207,0.538296,-0.83921,-45.832004,"""2025-04-06""","""Low"""
367.754204,2.096818,1.062234,-0.825411,-1.085151,-0.331308,2.949094,0.570613,-0.612237,26.295497,"""2024-10-25""","""Low"""
551.443883,1.940062,0.992881,0.62285,-0.450065,0.005244,-1.06762,-1.594428,0.046981,-194.557927,"""2025-08-30""","""Low"""
…,…,…,…,…,…,…,…,…,…,…,…
466.592277,2.116959,0.99689,0.514255,-1.531108,-0.229391,0.572057,1.047318,-0.043477,143.281369,"""2025-06-26""","""Low"""
450.809755,1.95427,1.010564,-2.246889,0.848431,-0.042823,-0.6067,1.237438,0.058023,17.8737,"""2024-07-12""","""Low"""
480.365015,1.965074,1.011939,0.430042,0.381935,-0.321635,1.030283,-0.348652,2.076748,156.382513,"""2026-02-08""","""Low"""
323.09241,1.902645,0.974552,2.75966,1.06021,0.476358,0.392416,0.807123,0.50547,165.857321,"""2026-04-13""","""Low"""


🧼 Data Cleaning / Transformation

In [49]:
# 📆 Convert to Datetime
df_date = df_csv.with_columns(pl.col("forecast_date").str.to_date())
# Display the head of the new DataFrame to confirm the change
df_date.head()

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price,forecast_date,type
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,date,str
526.032184,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595,2025-05-17,"""Low"""
403.165555,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062,2024-10-12,"""High"""
518.142662,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358,2025-09-03,"""High"""
585.243333,2.021646,1.009323,0.633919,2.143944,0.045572,-2.025143,-0.730367,-0.6516,0.874379,2026-05-02,"""High"""
456.451363,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037,2025-03-30,"""Low"""


In [50]:
# 🔀 Type Conversion Int
df_csv.with_columns(pl.col("stock_price_yesterday").cast(pl.Int16))

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price,forecast_date,type
i16,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str
526,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595,"""2025-05-17""","""Low"""
403,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062,"""2024-10-12""","""High"""
518,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358,"""2025-09-03""","""High"""
585,2.021646,1.009323,0.633919,2.143944,0.045572,-2.025143,-0.730367,-0.6516,0.874379,"""2026-05-02""","""High"""
456,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037,"""2025-03-30""","""Low"""
…,…,…,…,…,…,…,…,…,…,…,…
480,1.965074,1.011939,0.430042,0.381935,-0.321635,1.030283,-0.348652,2.076748,156.382513,"""2026-02-08""","""Low"""
480,1.983721,0.950283,-0.513214,0.740824,0.040919,-0.2286,-0.544114,-1.002187,-133.736108,"""2025-10-31""","""High"""
323,1.902645,0.974552,2.75966,1.06021,0.476358,0.392416,0.807123,0.50547,165.857321,"""2026-04-13""","""Low"""
613,2.028555,0.992436,0.089581,0.555604,0.521122,-0.197338,-0.054894,0.645216,121.249546,"""2024-07-17""","""Low"""


In [51]:
# 🔀 Type Conversion Float
df_csv.with_columns(pl.col("stock_price_yesterday").cast(pl.Float32))

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price,forecast_date,type
f32,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str
526.032166,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595,"""2025-05-17""","""Low"""
403.165558,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062,"""2024-10-12""","""High"""
518.142639,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358,"""2025-09-03""","""High"""
585.243347,2.021646,1.009323,0.633919,2.143944,0.045572,-2.025143,-0.730367,-0.6516,0.874379,"""2026-05-02""","""High"""
456.451355,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037,"""2025-03-30""","""Low"""
…,…,…,…,…,…,…,…,…,…,…,…
480.365021,1.965074,1.011939,0.430042,0.381935,-0.321635,1.030283,-0.348652,2.076748,156.382513,"""2026-02-08""","""Low"""
480.897217,1.983721,0.950283,-0.513214,0.740824,0.040919,-0.2286,-0.544114,-1.002187,-133.736108,"""2025-10-31""","""High"""
323.092407,1.902645,0.974552,2.75966,1.06021,0.476358,0.392416,0.807123,0.50547,165.857321,"""2026-04-13""","""Low"""
613.377014,2.028555,0.992436,0.089581,0.555604,0.521122,-0.197338,-0.054894,0.645216,121.249546,"""2024-07-17""","""Low"""


In [52]:
# 🔠 String Lowercase
df_csv.with_columns(pl.col("type").str.to_lowercase())

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price,forecast_date,type
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str
526.032184,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595,"""2025-05-17""","""low"""
403.165555,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062,"""2024-10-12""","""high"""
518.142662,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358,"""2025-09-03""","""high"""
585.243333,2.021646,1.009323,0.633919,2.143944,0.045572,-2.025143,-0.730367,-0.6516,0.874379,"""2026-05-02""","""high"""
456.451363,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037,"""2025-03-30""","""low"""
…,…,…,…,…,…,…,…,…,…,…,…
480.365015,1.965074,1.011939,0.430042,0.381935,-0.321635,1.030283,-0.348652,2.076748,156.382513,"""2026-02-08""","""low"""
480.897224,1.983721,0.950283,-0.513214,0.740824,0.040919,-0.2286,-0.544114,-1.002187,-133.736108,"""2025-10-31""","""high"""
323.09241,1.902645,0.974552,2.75966,1.06021,0.476358,0.392416,0.807123,0.50547,165.857321,"""2026-04-13""","""low"""
613.37701,2.028555,0.992436,0.089581,0.555604,0.521122,-0.197338,-0.054894,0.645216,121.249546,"""2024-07-17""","""low"""


In [53]:
# 🚿 Strip Characters
df_csv.with_columns(pl.col("type").str.strip_chars())

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price,forecast_date,type
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str
526.032184,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595,"""2025-05-17""","""Low"""
403.165555,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062,"""2024-10-12""","""High"""
518.142662,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358,"""2025-09-03""","""High"""
585.243333,2.021646,1.009323,0.633919,2.143944,0.045572,-2.025143,-0.730367,-0.6516,0.874379,"""2026-05-02""","""High"""
456.451363,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037,"""2025-03-30""","""Low"""
…,…,…,…,…,…,…,…,…,…,…,…
480.365015,1.965074,1.011939,0.430042,0.381935,-0.321635,1.030283,-0.348652,2.076748,156.382513,"""2026-02-08""","""Low"""
480.897224,1.983721,0.950283,-0.513214,0.740824,0.040919,-0.2286,-0.544114,-1.002187,-133.736108,"""2025-10-31""","""High"""
323.09241,1.902645,0.974552,2.75966,1.06021,0.476358,0.392416,0.807123,0.50547,165.857321,"""2026-04-13""","""Low"""
613.37701,2.028555,0.992436,0.089581,0.555604,0.521122,-0.197338,-0.054894,0.645216,121.249546,"""2024-07-17""","""Low"""


In [55]:
# 🔁 Replace Strings
df_csv.with_columns(pl.col("type").str.replace("w", "ww"))

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price,forecast_date,type
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str
526.032184,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595,"""2025-05-17""","""Loww"""
403.165555,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062,"""2024-10-12""","""High"""
518.142662,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358,"""2025-09-03""","""High"""
585.243333,2.021646,1.009323,0.633919,2.143944,0.045572,-2.025143,-0.730367,-0.6516,0.874379,"""2026-05-02""","""High"""
456.451363,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037,"""2025-03-30""","""Loww"""
…,…,…,…,…,…,…,…,…,…,…,…
480.365015,1.965074,1.011939,0.430042,0.381935,-0.321635,1.030283,-0.348652,2.076748,156.382513,"""2026-02-08""","""Loww"""
480.897224,1.983721,0.950283,-0.513214,0.740824,0.040919,-0.2286,-0.544114,-1.002187,-133.736108,"""2025-10-31""","""High"""
323.09241,1.902645,0.974552,2.75966,1.06021,0.476358,0.392416,0.807123,0.50547,165.857321,"""2026-04-13""","""Loww"""
613.37701,2.028555,0.992436,0.089581,0.555604,0.521122,-0.197338,-0.054894,0.645216,121.249546,"""2024-07-17""","""Loww"""


🆎 Rename & Create Columns

In [57]:
# 🏷️ Rename Columns
df_csv.rename({"type": "TYPE"})

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price,forecast_date,TYPE
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str
526.032184,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595,"""2025-05-17""","""Low"""
403.165555,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062,"""2024-10-12""","""High"""
518.142662,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358,"""2025-09-03""","""High"""
585.243333,2.021646,1.009323,0.633919,2.143944,0.045572,-2.025143,-0.730367,-0.6516,0.874379,"""2026-05-02""","""High"""
456.451363,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037,"""2025-03-30""","""Low"""
…,…,…,…,…,…,…,…,…,…,…,…
480.365015,1.965074,1.011939,0.430042,0.381935,-0.321635,1.030283,-0.348652,2.076748,156.382513,"""2026-02-08""","""Low"""
480.897224,1.983721,0.950283,-0.513214,0.740824,0.040919,-0.2286,-0.544114,-1.002187,-133.736108,"""2025-10-31""","""High"""
323.09241,1.902645,0.974552,2.75966,1.06021,0.476358,0.392416,0.807123,0.50547,165.857321,"""2026-04-13""","""Low"""
613.37701,2.028555,0.992436,0.089581,0.555604,0.521122,-0.197338,-0.054894,0.645216,121.249546,"""2024-07-17""","""Low"""


In [61]:
# ➕ New Column
df.with_columns((pl.col("col1") * 2).alias("col1_double"))

col1,col2,col1_double
i64,str,i64
1,"""x""",2
2,"""y""",4
3,"""z""",6


In [63]:
# 🧠 Apply Function
df.with_columns(
    pl.col("col1").map_elements(lambda x: x + 10).alias("plus_10")
)

Expr.map_elements is significantly slower than the native expressions API.
Only use if you absolutely CANNOT implement your logic otherwise.
Replace this expression...
  - pl.col("col1").map_elements(lambda x: ...)
with this one instead:
  + pl.col("col1") + 10

  pl.col("col1").map_elements(lambda x: x + 10).alias("plus_10")
  df.with_columns(


col1,col2,plus_10
i64,str,i64
1,"""x""",11
2,"""y""",12
3,"""z""",13


🧮 GroupBy & Aggregation

In [64]:
# 📊 Group and Aggregate
df_group = pl.DataFrame({
    "col": ["a", "a", "b", "b"],
    "value": [10, 20, 30, 40]
})
df_group.group_by("col").agg(pl.col("value").mean().alias("mean_val"))

col,mean_val
str,f64
"""a""",15.0
"""b""",35.0


🔗 Join and Concatenate

In [65]:
# 🔗 Join
df1 = pl.DataFrame({"id": [1, 2], "val": ["x", "y"]})
df2 = pl.DataFrame({"id": [1, 2], "score": [100, 200]})
df1.join(df2, on="id", how="inner")

id,val,score
i64,str,i64
1,"""x""",100
2,"""y""",200


In [66]:
# 📎 Concatenate
pl.concat([df1, df1], how="vertical")

id,val
i64,str
1,"""x"""
2,"""y"""
1,"""x"""
2,"""y"""


📊 Pivot and Unpivot

In [71]:
# 🔄 Pivot
df_pivot = pl.DataFrame({
    "city": ["NY", "NY", "LA", "LA"],
    "department": ["HR", "Tech", "HR", "Tech"],
    "salary": [1000, 1500, 1100, 1600]
})
df_pivot.pivot(index="city", columns="department", values="salary", aggregate_function="mean")

  df_pivot.pivot(index="city", columns="department", values="salary", aggregate_function="mean")


city,HR,Tech
str,f64,f64
"""NY""",1000.0,1500.0
"""LA""",1100.0,1600.0


In [73]:
# 🔁 Unpivot
# Changed id_vars to on and value_vars to index to match Polars unpivot syntax
df_pivot.unpivot(on=["city", "department"], index=["salary"])

salary,variable,value
i64,str,str
1000,"""city""","""NY"""
1500,"""city""","""NY"""
1100,"""city""","""LA"""
1600,"""city""","""LA"""
1000,"""department""","""HR"""
1500,"""department""","""Tech"""
1100,"""department""","""HR"""
1600,"""department""","""Tech"""


⏱️ Time-Based Operations

In [88]:
# 📆 Datetime Extraction
# Step 1: Generate the date range correctly using `start` and `end`
# date_range_df = pl.select(
#     pl.date_range(start="2024-01-01", end="2024-06-01", interval="1mo", name="date")
# )

# # Step 2: Extract the year from the date
# df_dt = date_range_df.with_columns(
#     pl.col("date").dt.year().alias("year")
# )

# df_dt

In [92]:
# # 🕒 Dynamic GroupBy
# df_time = pl.DataFrame({
#     "date": pl.date_range("2024-01-01", "2024-03-01", interval="1d"),
#     "value": range(60)
# })
# df_time.group_by_dynamic("date", every="1mo").agg(pl.col("value").sum())

In [93]:
# # 📉 Rolling Window
# df_time.rolling(index_column="date", period="7d").agg(pl.col("value").mean())

🔢 Counts and Frequency

In [95]:
# 📊 Value Counts
df_csv.group_by("type").len()

type,len
str,u32
"""High""",154
"""Low""",146


In [96]:
df_csv["type"].value_counts()

type,count
str,u32
"""Low""",146
"""High""",154


📦 Batch / Lazy Evaluation

In [99]:
# 🚚 Read CSV in Chunks
for df_chunk in pl.read_csv("/content/finance_data.csv", batch_size=10000):
    print(df_chunk.shape)

(300,)
(300,)
(300,)
(300,)
(300,)
(300,)
(300,)
(300,)
(300,)
(300,)


In [102]:
# 💤 Lazy Evaluation
pl.scan_csv("/content/finance_data.csv").filter(pl.col("feature_9") > 0).collect()

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
526.032184,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595
456.451363,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037
623.781631,2.049192,1.067694,-0.469176,1.17944,-1.320233,-1.713135,-0.898415,1.831459,152.789649
551.443883,1.940062,0.992881,0.62285,-0.450065,0.005244,-1.06762,-1.594428,0.046981,-194.557927
544.426331,1.887095,0.981259,0.712998,-1.222128,-0.52452,-0.240325,0.721672,0.489375,-61.933427
…,…,…,…,…,…,…,…,…,…
450.809755,1.95427,1.010564,-2.246889,0.848431,-0.042823,-0.6067,1.237438,0.058023,17.8737
422.169527,1.828983,1.017379,-0.183983,0.170865,-1.348185,0.018434,0.307802,0.743264,-110.366916
480.365015,1.965074,1.011939,0.430042,0.381935,-0.321635,1.030283,-0.348652,2.076748,156.382513
323.09241,1.902645,0.974552,2.75966,1.06021,0.476358,0.392416,0.807123,0.50547,165.857321


🔄 Category Encoding and Vector Ops

In [103]:
# 🏷️ Category
df_csv.with_columns(pl.col("type").cast(pl.Categorical))

stock_price_yesterday,interest_rate,inflation_rate,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,target_stock_price,forecast_date,type
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,cat
526.032184,2.177531,0.987498,-0.67062,1.000582,-1.193637,1.392465,-0.646227,0.919154,214.313595,"""2025-05-17""","""Low"""
403.165555,2.1392,0.988768,0.940771,-0.989628,0.918317,-0.982487,0.179894,-1.570501,2.242062,"""2024-10-12""","""High"""
518.142662,1.935023,0.935194,-0.351921,-0.487203,-1.203201,-0.769996,0.874517,-1.042044,-177.174358,"""2025-09-03""","""High"""
585.243333,2.021646,1.009323,0.633919,2.143944,0.045572,-2.025143,-0.730367,-0.6516,0.874379,"""2026-05-02""","""High"""
456.451363,1.987962,1.009525,-0.090533,-0.535328,-2.17267,0.33198,1.107081,0.847422,88.765037,"""2025-03-30""","""Low"""
…,…,…,…,…,…,…,…,…,…,…,…
480.365015,1.965074,1.011939,0.430042,0.381935,-0.321635,1.030283,-0.348652,2.076748,156.382513,"""2026-02-08""","""Low"""
480.897224,1.983721,0.950283,-0.513214,0.740824,0.040919,-0.2286,-0.544114,-1.002187,-133.736108,"""2025-10-31""","""High"""
323.09241,1.902645,0.974552,2.75966,1.06021,0.476358,0.392416,0.807123,0.50547,165.857321,"""2026-04-13""","""Low"""
613.37701,2.028555,0.992436,0.089581,0.555604,0.521122,-0.197338,-0.054894,0.645216,121.249546,"""2024-07-17""","""Low"""


In [105]:
# ⚡ Vectorized Operations
df.with_columns((pl.col("col1") * 3).alias("tripled"))

col1,col2,tripled
i64,str,i64
1,"""x""",3
2,"""y""",6
3,"""z""",9


# ✅ Summary of Polars Strengths
- **Speed:** Built in Rust, parallel execution by default.
- **Memory-efficient:** Lazy API uses minimal RAM.
- **Built-in Lazy Queries** for pipeline-style processing.
- **Excellent String & Date support**.
- **Arrow and Parquet integration** for efficient file operations.

📌 Use Polars when dealing with:
- Huge CSVs or structured data.
- Memory-sensitive environments.
- Data transformations with high performance requirements.

---
Now you’re equipped to use Polars as a modern, high-performance DataFrame engine. Start replacing slow pandas pipelines with Polars for 10x performance boosts!


In [None]:
"""
Act as a Python Expert who have worked on various advance concepts of Polars Libraries in Python with various Real life like use cases. now I am learning dataframe management and already learn pandas and in that I have seen polars lybraries alternative of of pandas with same use case time to time, so I though I should Learn polars for myself. Now as someone who never know anything about Polars. help me understand and learn best practices of polars library used on regular bases, and also help me learn how its useful while using in different use case like: -
DataFrame Creation: pl.DataFrame({"col1": [1, 2, 3], "col2": ["x", "y", "z"]})
Read CSV: pl.read_csv("file.csv")
Read Excel: pl.read_excel("file.xlsx")
Read Database: pl.read_database(query="SELECT * FROM table", connection=conn)
Write CSV: df.write_csv("output.csv")
Write Excel: df.write_excel("output.xlsx")
Write Database: df.write_database(table_name="table", connection=conn)
View First Rows: df.head(n=5)
View Last Rows: df.tail(n=5)
Schema Information: df.schema
Statistical Summary: df.describe()
Dimensions: df.height, df.width
List Columns: df.columns
Count Missing Values: df.null_count()
Drop Missing Values: df.drop_nulls()
Fill Missing Values: df.fill_null(value=0)
Select Single Column: df["column_name"]
Select Multiple Columns: df.select(["col1", "col2"])
Select Rows by Position: df.slice(start, length)
Filter Rows: df.filter(pl.col("col") > value)
String Contains Filter: df.filter(pl.col("col").str.contains("pattern"))
Drop Duplicates: df.unique() or df.unique(subset=["col1"])
Convert to Datetime: df.with_columns(pl.col("date").str.to_datetime())
Convert Data Type: df.with_columns(pl.col("col").cast(pl.Int32))
String Lowercase: df.with_columns(pl.col("col").str.to_lowercase())
String Strip: df.with_columns(pl.col("col").str.strip_chars())
String Replace: df.with_columns(pl.col("col").str.replace("old", "new"))
Rename Columns: df.rename({"old_name": "new_name"})
Create New Column: df.with_columns(pl.col("col") * 2.0.alias("new_col"))
Apply Function: df.with_columns(pl.col("col").map_elements(lambda x: func(x)))
GroupBy and Aggregate: df.group_by("col").agg(pl.col("value").mean().alias("mean_value"))
Join DataFrames: df.join(df2, on="key_column", how="inner")
Concatenate DataFrames: pl.concat([df1, df2], how="vertical")
Pivot Table: df.pivot(index="city", columns="department", values="salary", aggregate_function="mean")
Unpivot (Melt): df.unpivot(index="city", on=["salary", "bonus"])
Datetime Extraction: df.with_columns(pl.col("date").dt.year().alias("year"))
Dynamic GroupBy (Resampling): df.group_by_dynamic("date", every="1mo").agg(pl.col("value").sum())
Rolling Window: df.rolling(index_column="date", period="3d").agg(pl.col("value").mean())
Value Counts: df.group_by("col").len() or df["col"].value_counts()
Read in Chunks: pl.read_csv("file.csv", batch_size=10000)
Lazy Evaluation: pl.scan_csv("file.csv").filter(pl.col("age") > 25).collect()
Category Data Type: df.with_columns(pl.col("col").cast(pl.Categorical))
Vectorized Operations: pl.col("col") * 2 (used within with_columns or select), All I want to say is keep the examples diverse for different component seperate for all the components I wrote with polars, with polas's strength with its most used concept or practices of pydantic. Provide Cell by cell code and markdowns(appropriate to colab) I will copy paste it myself. Can Search on web for details.
"""