#### Installing the dependencies 

In [1]:
%pip install pandas polars





In [8]:
import pandas as pd
import polars as pl
import time


In [9]:
# Load the dataset
file_path = 'movies_dataset.csv'  # Replace with your actual CSV file path
df_pandas = pd.read_csv(file_path)
df_polars = pl.read_csv(file_path)


In [29]:
# Assuming df_pandas is already loaded
# Timing for Pandas
start_pandas = time.time()
result_pandas = df_pandas[df_pandas["Rating"] >= 4][["Genre", "Rating"]]  # Filter and select specific columns
end_pandas = time.time()

In [24]:

# Timing for Polars
start_polars = time.time()
result_polars = (
    df_polars.lazy()  # Start with LazyFrame
    .filter(pl.col("Rating") >= 4)  # Filter rows where Rating >= 4
    .select(["Genre", "Rating"])  # Select only Genre and Rating columns
    .collect()  # Collect into a DataFrame (convert LazyFrame to DataFrame)
)
end_polars = time.time()

In [31]:
print("Pandas Query Result:")
print(result_pandas)


Pandas Query Result:
                                Genre  Rating
0         Comedy|Crime|Drama|Thriller     5.0
2                               Drama     5.0
3                    Comedy|Drama|War     5.0
5               Drama|Musical|Romance     4.0
8                               Drama     5.0
...                               ...     ...
25000088   Adventure|Children|Fantasy     4.0
25000089    Animation|Children|Comedy     4.5
25000090     Animation|Children|Drama     4.5
25000093      Action|Crime|Drama|IMAX     4.0
25000094                        Drama     5.0

[12452811 rows x 2 columns]


In [32]:
print("Polars Query Result:")
print(result_polars)

Polars Query Result:
shape: (12_452_811, 2)
┌─────────────────────────────┬────────┐
│ Genre                       ┆ Rating │
│ ---                         ┆ ---    │
│ str                         ┆ f64    │
╞═════════════════════════════╪════════╡
│ Comedy|Crime|Drama|Thriller ┆ 5.0    │
│ Drama                       ┆ 5.0    │
│ Comedy|Drama|War            ┆ 5.0    │
│ Drama|Musical|Romance       ┆ 4.0    │
│ Drama                       ┆ 5.0    │
│ …                           ┆ …      │
│ Adventure|Children|Fantasy  ┆ 4.0    │
│ Animation|Children|Comedy   ┆ 4.5    │
│ Animation|Children|Drama    ┆ 4.5    │
│ Action|Crime|Drama|IMAX     ┆ 4.0    │
│ Drama                       ┆ 5.0    │
└─────────────────────────────┴────────┘


In [33]:
# Output results
print(f"Pandas Query Time: {end_pandas - start_pandas} seconds")
print(f"Polars Query Time: {end_polars - start_polars} seconds")

Pandas Query Time: 1.0594635009765625 seconds
Polars Query Time: 0.2368612289428711 seconds


In [37]:
percentage_faster = (((end_pandas - start_pandas) - (end_polars - start_polars)) / (end_pandas - start_pandas)) * 100

print(f"Polars was {percentage_faster:.2f}% faster than Pandas.")

Polars was 77.64% faster than Pandas.
