### Options for handling double quotes

id,name,remarks<br>
1,"Ramesh, K.P","Good performer"<br>
2,"Manoj","Needs "special" attention"<br>

In [0]:
df = spark.read.options(header=True, inferSchema=True).csv('/Volumes/inceptez_catalog/inputdb/empdata/emp_perf.csv')

df.display()

In [0]:
df = (
    spark.read
    .option("header", "True")              # first line as header
    .option("inferSchema", "True")         # infer column types
    .option("escape", "\"")                # allow quotes inside quoted fields
    .option("quote", "\"")                 # fields enclosed in double quotes
    .format("csv")
    .load("dbfs:/Volumes/inceptez_catalog/inputdb/empdata/emp_perf.csv")
)
df.display()

### Multiple line read
id,name,remarks<br>
1,"Ramesh, K.P","Good performer"<br>
2,"Manoj","This person<br>
has feedback written<br>
across multiple lines"<br>
3,"Test User","Normal one line remark"<br>

In [0]:
df = (
    spark.read
    .option("header", "True")              # first line as header
    .option("inferSchema", "True")         # infer column types
    .option("escape", "\"")                # allow quotes inside quoted fields
    .option("quote", "\"")                 # fields enclosed in double quotes
    .format("csv")
    .load("dbfs:/Volumes/inceptez_catalog/inputdb/empdata/emp_perf_multiline.csv")
)
df.display()

In [0]:
df = (
    spark.read.format("csv")
    .option("header", "true")
	.option("inferSchema", "true")
    .option("quote", "\"")
    .option("escape", "\"")
    .option("multiLine", "true")           # support multiline values
    .load("dbfs:/Volumes/inceptez_catalog/inputdb/empdata/emp_perf_multiline.csv")
)
df.display()

### Read modes in csv

id,name,remarks<br>
1,"Ramesh, K.P.","Good performer"<br>
2,"Manoj","Needs "special" attention"<br>
3,"Test ""double quotes"" inside field"<br>
4,"Incomplete row<br>
5,"Extra","Column","Here"<br>

### There are 3 typical read modes and the default read mode is permissive.
##### 1. permissive — All fields are set to null and corrupted records are placed in a string column called _corrupt_record
##### 	2. dropMalformed — Drops all rows containing corrupt records.
##### 3. failFast — Fails when corrupt records are encountered.

In [0]:
df_permissive = (
    spark.read.format("csv")
    .option("header", "true")
    .option("inferSchema", "true")
    .option("quote", "\"")
    .option("escape", "\"")
    .option("multiLine", "true")
    .option("mode", "PERMISSIVE")
    .load("dbfs:/Volumes/inceptez_catalog/inputdb/empdata/emp_perf_modes.csv")
)
df_permissive.display()

In [0]:
df_dropmal = (
    spark.read.format("csv")
    .option("header", "true")
    .option("inferSchema", "true")
    .option("quote", "\"")
    .option("escape", "\"")
    .option("multiLine", "true")
    .option("mode", "dropMalformed")
    .load("dbfs:/Volumes/inceptez_catalog/inputdb/empdata/emp_perf_modes.csv")
)
df_dropmal.display()

In [0]:
df_dropmal = (
    spark.read.format("csv")
    .option("header", "true")
    .option("inferSchema", "true")
    .option("quote", "\"")
    .option("escape", "\"")
    .option("multiLine", "true")
    .option("mode", "failFast")
    .load("dbfs:/Volumes/inceptez_catalog/inputdb/empdata/emp_perf_modes.csv")
)
df_dropmal.display()