# Column filtering (e.g. columns=)

To improve performance and readability when working with large catalogs, users can specify the column names they want to load instead of loading the entire dataset. This approach reduces memory usage and speeds up data processing by avoiding unnecessary data retrieval

In [None]:
# Import LSDB and start a Dask client.

import lsdb

from dask.distributed import Client

client = Client(n_workers=4, memory_limit="auto")

Perhaps you already have a cluster running?
Hosting the HTTP server on port 23155 instead


In [3]:
# Specify the path to the LSDB catalog you want to use.

surveys_path = "https://data.lsdb.io/hats/"
ztf_object_path = f"{surveys_path}/ztf_dr14/ztf_object"

## Load the catalog with all columns

In [None]:
ztf_object = lsdb.open_catalog(ztf_object_path)
ztf_object

Unnamed: 0_level_0,ps1_objid,ra,dec,ps1_gMeanPSFMag,ps1_rMeanPSFMag,ps1_iMeanPSFMag,nobs_g,nobs_r,nobs_i,mean_mag_g,mean_mag_r,mean_mag_i
npartitions=2352,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
"Order: 3, Pixel: 0",int64[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],int32[pyarrow],int32[pyarrow],int32[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow]
"Order: 3, Pixel: 1",...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 4, Pixel: 3070",...,...,...,...,...,...,...,...,...,...,...,...
"Order: 4, Pixel: 3071",...,...,...,...,...,...,...,...,...,...,...,...


## Load the catalog with a subset of columns

In [None]:
ztf_object = lsdb.open_catalog(ztf_object_path, columns=["ps1_objid", "ra", "dec", "mean_mag_r"])
ztf_object

Unnamed: 0_level_0,ps1_objid,ra,dec,mean_mag_r
npartitions=2352,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Order: 3, Pixel: 0",int64[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow]
"Order: 3, Pixel: 1",...,...,...,...
...,...,...,...,...
"Order: 4, Pixel: 3070",...,...,...,...
"Order: 4, Pixel: 3071",...,...,...,...


In [None]:
# Close the Dask client.

client.close()

## About

**Authors**: Olivia Lynn

**Last updated on**: May 19, 2025

If you use `lsdb` for published research, please cite following [instructions](https://docs.lsdb.io/en/stable/citation.html).