In [0]:
from pyspark.sql import SparkSession

In [0]:
spark = SparkSession.builder.appName("CSVtoDelta").getOrCreate()

In [0]:
try:
    # 1. Read the CSV file
    # Replace with your actual CSV file path
    input_path = "dbfs:/FileStore/tables/employees.csv"
    df = spark.read.option("header", "true") \
                  .option("inferSchema", "true") \
                  .csv(input_path)
    
    # Show initial row count
    print(f"Initial row count: {df.count()}")
    
    # 2. Drop rows with any null values
    df_cleaned = df.na.drop()
    
    # Show cleaned row count
    print(f"Row count after dropping nulls: {df_cleaned.count()}")
    
    # 3. Save as Delta table
    # Replace with your desired output path and table name
    output_path = "dbfs:/delta/employees_table"
    table_name = "employees_table"
    
    df_cleaned.write.format("delta") \
                   .mode("overwrite") \
                   .save(output_path)
    
    # 4. Register the Delta table for SQL queries
    spark.sql(f"CREATE TABLE IF NOT EXISTS {table_name} USING DELTA LOCATION '{output_path}'")
    
    # 5. Verify the table creation
    print("Delta table created successfully!")
    display(df_cleaned.limit(10))  # Show first 10 rows
    
    # Optional: Run a SQL query
    result = spark.sql(f"SELECT * FROM {table_name} LIMIT 5")
    display(result)
    
except Exception as e:
    print(f"An error occurred: {str(e)}")

# Optional: Show table schema
df_cleaned.printSchema()

Initial row count: 50
Row count after dropping nulls: 50
Delta table created successfully!


EMPLOYEE_ID,FIRST_NAME,LAST_NAME,EMAIL,PHONE_NUMBER,HIRE_DATE,JOB_ID,SALARY,COMMISSION_PCT,MANAGER_ID,DEPARTMENT_ID
198,Donald,OConnell,DOCONNEL,650.507.9833,21-JUN-07,SH_CLERK,2600,-,124,50
199,Douglas,Grant,DGRANT,650.507.9844,13-JAN-08,SH_CLERK,2600,-,124,50
200,Jennifer,Whalen,JWHALEN,515.123.4444,17-SEP-03,AD_ASST,4400,-,101,10
201,Michael,Hartstein,MHARTSTE,515.123.5555,17-FEB-04,MK_MAN,13000,-,100,20
202,Pat,Fay,PFAY,603.123.6666,17-AUG-05,MK_REP,6000,-,201,20
203,Susan,Mavris,SMAVRIS,515.123.7777,07-JUN-02,HR_REP,6500,-,101,40
204,Hermann,Baer,HBAER,515.123.8888,07-JUN-02,PR_REP,10000,-,101,70
205,Shelley,Higgins,SHIGGINS,515.123.8080,07-JUN-02,AC_MGR,12008,-,101,110
206,William,Gietz,WGIETZ,515.123.8181,07-JUN-02,AC_ACCOUNT,8300,-,205,110
100,Steven,King,SKING,515.123.4567,17-JUN-03,AD_PRES,24000,-,-,90


EMPLOYEE_ID,FIRST_NAME,LAST_NAME,EMAIL,PHONE_NUMBER,HIRE_DATE,JOB_ID,SALARY,COMMISSION_PCT,MANAGER_ID,DEPARTMENT_ID
198,Donald,OConnell,DOCONNEL,650.507.9833,21-JUN-07,SH_CLERK,2600,-,124,50
199,Douglas,Grant,DGRANT,650.507.9844,13-JAN-08,SH_CLERK,2600,-,124,50
200,Jennifer,Whalen,JWHALEN,515.123.4444,17-SEP-03,AD_ASST,4400,-,101,10
201,Michael,Hartstein,MHARTSTE,515.123.5555,17-FEB-04,MK_MAN,13000,-,100,20
202,Pat,Fay,PFAY,603.123.6666,17-AUG-05,MK_REP,6000,-,201,20


root
 |-- EMPLOYEE_ID: integer (nullable = true)
 |-- FIRST_NAME: string (nullable = true)
 |-- LAST_NAME: string (nullable = true)
 |-- EMAIL: string (nullable = true)
 |-- PHONE_NUMBER: string (nullable = true)
 |-- HIRE_DATE: string (nullable = true)
 |-- JOB_ID: string (nullable = true)
 |-- SALARY: integer (nullable = true)
 |-- COMMISSION_PCT: string (nullable = true)
 |-- MANAGER_ID: string (nullable = true)
 |-- DEPARTMENT_ID: integer (nullable = true)

