In [22]:
import openpyxl
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, TimestampType
from pyspark.sql.functions import lit

file_path = "/mnt/apps/Files/Excel/Rebate Fee.xlsx"

# Use openpyxl to get sheet names
workbook = openpyxl.load_workbook(file_path, read_only=True)
sheet_names = workbook.sheetnames
workbook.close()

# Initialize Spark Session (if not already done)
spark = SparkSession.builder \
    .appName("ReadAllSheets") \
    .config("spark.jars", "/opt/bitnami/spark/jars/spark-excel_2.12-3.5.1_0.20.4.jar") \
    .getOrCreate()

schema = StructType([
    StructField("NAV_DATE", TimestampType(), True),
    StructField("AMT", StringType(), True),
    StructField("FUND_CODE", StringType(), True),
    StructField("SHEET_NAME", StringType(), True)
])

final_df = spark.createDataFrame([], schema)

# Loop through each sheet name and read it into a DataFrame
for sheet_name in sheet_names:
    dataAdress = f"'{sheet_name}'!A1:M1500"
    print(f"Reading sheet: {sheet_name}")
    print(f"here the sheet name '{sheet_name}'!A1:M1500")
    df = spark.read.format("com.crealytics.spark.excel") \
        .option("header", "true") \
        .option("inferSchema", "true") \
        .option("sheetName", sheet_name) \
        .option("dataAddress", dataAdress) \
        .load(file_path)

    df = df.withColumn("SHEET_NAME", lit(sheet_name))
    
    final_df = final_df.union(df)
    df.printSchema()
    df.show()
    
final_df.show()

print(sheet_names)

Reading sheet: PSMA Fund
here the sheet name 'PSMA Fund'!A1:M1500
root
 |-- NAV_DATE: timestamp (nullable = true)
 |-- AMT: double (nullable = true)
 |-- FUND_CODE: string (nullable = true)
 |-- SHEET_NAME: string (nullable = false)

+-------------------+---------+---------+----------+
|           NAV_DATE|      AMT|FUND_CODE|SHEET_NAME|
+-------------------+---------+---------+----------+
|2025-08-01 00:00:00| 500000.0|     PSMA| PSMA Fund|
|2025-08-02 00:00:00|1200000.0|     PSMA| PSMA Fund|
|2025-08-03 00:00:00| 200000.0|     PSMA| PSMA Fund|
+-------------------+---------+---------+----------+

Reading sheet: PSAE Fund Lapsed 
here the sheet name 'PSAE Fund Lapsed '!A1:M1500
root
 |-- NAV_DATE: timestamp (nullable = true)
 |-- AMT: double (nullable = true)
 |-- FUND_CODE: string (nullable = true)
 |-- SHEET_NAME: string (nullable = false)

+-------------------+--------+---------+-----------------+
|           NAV_DATE|     AMT|FUND_CODE|       SHEET_NAME|
+-------------------+-----

In [None]:
from pyspark.sql import SparkSession

# Path to your Excel file
file_path = "/mnt/apps/Files/Excel/Rebate Fee.xlsx"

# Initialize Spark Session (assuming the spark-excel JAR is already configured)
spark = SparkSession.builder \
    .appName("ReadSpecificSheet") \
    .getOrCreate()

# Read the data from the 'PSAE' sheet
df_psae = spark.read.format("com.crealytics.spark.excel") \
    .option("header", "true") \
    .option("inferSchema", "true") \
    .option("sheetName", "PSAE") \
    .option("dataAddress", "'PSAE'!A1:M1500") \
    .load(file_path)

# Show the content of the DataFrame from the 'PSAE' sheet
print("Content of the 'PSAE' sheet:")
df_psae.show()
df_psae.printSchema()