#### How to find the number of rows where all columns are NULL?

| Name	   | department | Customer_ID | Change_Date | Load_Date  | Status	|   description   |
|----------|------------|-------------|-------------|------------|--------|-----------------|
| Jagadish |    HR      |  101        | 2024-01-05  | 2025-03-06 | Active | Employee record |
| null	   | null	      |  null       |	   null	    |    null    |  null  |      null       |
| Rakesh   |    HR      |  102        | 2024-01-05  | 2024-09-06 | Active | Sales record    |
| Swetha   |    HR      |  103        | 2024-01-05  | 2024-07-06 | Active | Deployed        |
| null	   | null	      |   null      |	   null	    |   null     |  null  |      null       |
| Sandhya  |    HR      |  104        | 2024-01-05  | 2024-10-26 | Active | Distribution    |
| null	   | null	      |   null      |	null	      |  null      |  null  |      null       |
| Naresh   |    HR      |  105        | 2024-03-05  | 2020-06-06 | Active | Make up         |
| Kamalesh |    HR      |  106        | 2024-04-05  | 2021-05-06 | Active | Hardware issue  |
| null	   |  null	    |  null       |	null	      |  null      |  null  |   null          |
| Rajini   |    HR      |  107        | 2024-09-05  | 2022-01-06 | Active | Supply Channel  |
| Jayesh   |    HR      |  108        | 2024-07-15  | 2023-01-06 | Active | Monitoring face |
| null	   |  null	    |   null      |	null	      |  null      |  null  |   null          |
| Gajanan  |    HR      |  109        | 2025-01-05  | 2025-01-06 | Active | Successfully    |

In [0]:
%sql
-- Create table
DROP TABLE IF EXISTS tblNullsBlankRows;
CREATE TABLE tblNullsBlankRows (
    Name VARCHAR(50),
    department VARCHAR(50),
    Customer_ID INT,
    Change_Date DATE,
    Load_Date DATE,
    Status VARCHAR(20),
    description VARCHAR(100)
);

-- Insert sample data
INSERT INTO tblNullsBlankRows VALUES
('Jagadish', 'HR', 101, '2024-01-05', '2024-01-06', 'Active', 'Employee record'),
('Ashwin', 'Admin', 102, '2024-04-05', '2024-03-06', 'Default', 'Employee Obsent'),
('Swarna', 'Finance', 103, '2024-05-05', '2024-04-06', 'Deactivate', 'Products'),
(NULL, NULL, NULL, NULL, NULL, NULL, NULL),
(NULL, 'Finance', 103, '2024-05-05', '2024-04-06', 'Deactivate', 'Products'),
('Swapna', 'IT', 104, '2024-08-10', '2024-05-12', 'Inactive', 'Resigned'),
('Praveen', 'Accounts', 105, '2024-09-05', '2024-03-06', 'Active', 'Description'),
(NULL, NULL, NULL, NULL, NULL, NULL, NULL),
('Bibin', 'Finance', 105, '2024-10-01', '2024-09-02', 'Active', 'Salary details'),
('Bharath', 'HR', 106, '2024-12-05', '2024-09-06', 'Default', 'Sales Details'),
(NULL, NULL, NULL, NULL, NULL, NULL, NULL),
(NULL, 'Finance', 103, '2024-05-05', '2024-04-06', 'Deactivate', 'Products'),
('Carl', 'Admin', 104, '2024-04-11', '2024-04-15', 'Active', 'Shift change'),
('Joseph', 'Developer', 101, '2024-01-05', '2024-01-06', 'Active', 'Employee record'),
(NULL, NULL, NULL, NULL, NULL, NULL, NULL),
('Damu', 'Sales', 105, '2024-05-18', '2024-05-20', 'Inactive', 'Transferred'),
('Josna', 'Transport', 101, '2024-01-05', '2024-01-06', 'Active', 'Unlock offers'),
(NULL, NULL, NULL, NULL, NULL, NULL, NULL),
('Kiran', 'Domestic', 101, '2024-12-05', '2025-05-06', 'Inactive', 'Savings');

SELECT * FROM tblNullsBlankRows;

Name,department,Customer_ID,Change_Date,Load_Date,Status,description
Jagadish,HR,101.0,2024-01-05,2024-01-06,Active,Employee record
Ashwin,Admin,102.0,2024-04-05,2024-03-06,Default,Employee Obsent
Swarna,Finance,103.0,2024-05-05,2024-04-06,Deactivate,Products
,,,,,,
,Finance,103.0,2024-05-05,2024-04-06,Deactivate,Products
Swapna,IT,104.0,2024-08-10,2024-05-12,Inactive,Resigned
Praveen,Accounts,105.0,2024-09-05,2024-03-06,Active,Description
,,,,,,
Bibin,Finance,105.0,2024-10-01,2024-09-02,Active,Salary details
Bharath,HR,106.0,2024-12-05,2024-09-06,Default,Sales Details


##### 1) Using WHERE with AND conditions

In [0]:
%sql
SELECT * FROM tblNullsBlankRows
WHERE Name IS NULL AND
      department IS NULL AND
      Customer_ID IS NULL AND
      Change_Date IS NULL AND
      Load_Date IS NULL AND
      Status IS NULL AND
      description IS NULL;

Name,department,Customer_ID,Change_Date,Load_Date,Status,description
,,,,,,
,,,,,,
,,,,,,
,,,,,,
,,,,,,


In [0]:
%sql
SELECT COUNT(*) AS all_null_rows
FROM tblNullsBlankRows
WHERE Name IS NULL AND
      department IS NULL AND
      Customer_ID IS NULL AND
      Change_Date IS NULL AND
      Load_Date IS NULL AND
      Status IS NULL AND
      description IS NULL;

all_null_rows
5


##### 2) Using CASE inside SUM()
- 1 = NULL
- 0 = NOT NULL

In [0]:
%sql
SELECT 
    CASE 
        WHEN Name IS NULL 
            AND department IS NULL 
            AND Customer_ID IS NULL 
            AND Change_Date IS NULL 
            AND Load_Date IS NULL 
            AND Status IS NULL 
            AND description IS NULL
        THEN 1 
        ELSE 0 
    END AS all_null_rows
FROM tblNullsBlankRows;

all_null_rows
0
0
0
1
0
0
0
1
0
0


In [0]:
%sql
SELECT SUM(
    CASE WHEN Name IS NULL
          AND department IS NULL
          AND Customer_ID IS NULL
          AND Change_Date IS NULL
          AND Load_Date IS NULL
          AND Status IS NULL
          AND description IS NULL
    THEN 1 ELSE 0 END
) AS all_null_rows
FROM tblNullsBlankRows;

all_null_rows
5


##### 3) Using COALESCE

In [0]:
%sql
-- The COALESCE function requires all arguments to be of the same data type.
-- You need to ensure that all columns passed to COALESCE are of the same type.
SELECT *
FROM tblNullsBlankRows
WHERE COALESCE(
      Name,
      department,
      CAST(Customer_ID AS STRING),
      CAST(Change_Date AS STRING),
      CAST(Load_Date AS STRING),
      Status,
      description
) IS NULL;

Name,department,Customer_ID,Change_Date,Load_Date,Status,description
,,,,,,
,,,,,,
,,,,,,
,,,,,,
,,,,,,


In [0]:
%sql
-- The COALESCE function requires all arguments to be of the same data type.
-- You need to ensure that all columns passed to COALESCE are of the same type.
SELECT COUNT(*) AS all_null_rows
FROM tblNullsBlankRows
WHERE COALESCE(
      Name,
      department,
      CAST(Customer_ID AS STRING),
      CAST(Change_Date AS STRING),
      CAST(Load_Date AS STRING),
      Status,
      description
) IS NULL;

all_null_rows
5


- **COALESCE(col1, col2, …)** returns the **first non-null value**.
- If **all columns** are **NULL**, the result is **NULL**.
- **WHERE COALESCE(...) IS NULL** ensures we **only count rows** where **all columns are NULL**.

##### 4) Using a derived column (CASE) to check row emptiness

In [0]:
%sql
SELECT *
FROM (
  SELECT *,
         CASE 
           WHEN Name IS NULL
                AND department IS NULL
                AND Customer_ID IS NULL
                AND Change_Date IS NULL
                AND Load_Date IS NULL
                AND Status IS NULL
                AND description IS NULL
           THEN 1 ELSE 0 END AS is_all_null
  FROM tblNullsBlankRows
) t
WHERE is_all_null = 1;

Name,department,Customer_ID,Change_Date,Load_Date,Status,description,is_all_null
,,,,,,,1
,,,,,,,1
,,,,,,,1
,,,,,,,1
,,,,,,,1


In [0]:
%sql
SELECT COUNT(*) AS all_null_rows
FROM (
  SELECT *,
         CASE 
           WHEN Name IS NULL
                AND department IS NULL
                AND Customer_ID IS NULL
                AND Change_Date IS NULL
                AND Load_Date IS NULL
                AND Status IS NULL
                AND description IS NULL
           THEN 1 ELSE 0 END AS is_all_null
  FROM tblNullsBlankRows
) t
WHERE is_all_null = 1;

all_null_rows
5


In [0]:
%sql
  SELECT *,
         CASE 
           WHEN Name IS NULL
                AND department IS NULL
                AND Customer_ID IS NULL
                AND Change_Date IS NULL
                AND Load_Date IS NULL
                AND Status IS NULL
                AND description IS NULL
           THEN 1 ELSE 0 END AS is_all_null
  FROM tblNullsBlankRows

Name,department,Customer_ID,Change_Date,Load_Date,Status,description,is_all_null
Jagadish,HR,101.0,2024-01-05,2024-01-06,Active,Employee record,0
Ashwin,Admin,102.0,2024-04-05,2024-03-06,Default,Employee Obsent,0
Swarna,Finance,103.0,2024-05-05,2024-04-06,Deactivate,Products,0
,,,,,,,1
Swapna,IT,104.0,2024-08-10,2024-05-12,Inactive,Resigned,0
Praveen,Accounts,105.0,2024-09-05,2024-03-06,Active,Description,0
,,,,,,,1
Bibin,Finance,105.0,2024-10-01,2024-09-02,Active,Salary details,0
Bharath,HR,106.0,2024-12-05,2024-09-06,Default,Sales Details,0
,,,,,,,1


##### 5) Dynamic method

In [0]:
from pyspark.sql.functions import col

# Load the table into a DataFrame
df = spark.table("tblNullsBlankRows")

# Generate the condition for checking all columns for NULL values
condition = " AND ".join([f"{col} IS NULL" for col in df.columns])

# Create the SQL query
sql_query = f"SELECT * FROM tblNullsBlankRows WHERE {condition}"
# sql_query = f"SELECT COUNT(*) AS all_null_rows FROM tblNullsBlankRows WHERE {condition}"

# Execute the query
result_df = spark.sql(sql_query)

# Display the result
display(result_df)

Name,department,Customer_ID,Change_Date,Load_Date,Status,description
,,,,,,
,,,,,,
,,,,,,
,,,,,,
,,,,,,
