In [4]:
from pyspark.sql import SparkSession
from pyspark.sql.types import TimestampType, StructType, StringType, IntegerType, FloatType, StructField, DateType
from pyspark.sql.functions import lit

In [5]:
ss = SparkSession \
    .builder \
    .master("local[*]") \
    .appName("us_import_sample") \
    .getOrCreate()

In [6]:
header_schema = StructType([
    StructField("identifier", StringType()),
    StructField("carrier_code", StringType()),
    StructField("vessel_country_code", StringType()),
    StructField("vessel_name", StringType()),
    StructField("port_of_unlading", StringType()),
    StructField("estimated_arrival_date", DateType()),
    StructField("foreign_port_of_lading_qualifier", StringType()),
    StructField("foreign_port_of_lading", StringType()),
    StructField("manifest_quantity", IntegerType()),
    StructField("manifest_unit", StringType()),
    StructField("weight", IntegerType()),
    StructField("weight_unit", StringType()),
    StructField("record_status_indicator", StringType()),
    StructField("place_of_receipt", StringType()),
    StructField("port_of_destination", StringType()),
    StructField("foreign_port_of_destination_qualifier", StringType()),
    StructField("foreign_port_of_destination", StringType()),
    StructField("conveyance_id_qualifier", StringType()),
    StructField("conveyance_id", StringType()),
    StructField("mode_of_transportation", StringType()),
    StructField("actual_arrival_date", DateType())
])

In [8]:
header = ss.read \
    .option("header", True) \
    .option("escape", '"') \
    .csv(
        './ams/2020/202001201500/ams__header_2020__202001201500.csv', 
        schema=header_schema
    )

In [9]:
header.printSchema()

root
 |-- identifier: string (nullable = true)
 |-- carrier_code: string (nullable = true)
 |-- vessel_country_code: string (nullable = true)
 |-- vessel_name: string (nullable = true)
 |-- port_of_unlading: string (nullable = true)
 |-- estimated_arrival_date: date (nullable = true)
 |-- foreign_port_of_lading_qualifier: string (nullable = true)
 |-- foreign_port_of_lading: string (nullable = true)
 |-- manifest_quantity: integer (nullable = true)
 |-- manifest_unit: string (nullable = true)
 |-- weight: integer (nullable = true)
 |-- weight_unit: string (nullable = true)
 |-- record_status_indicator: string (nullable = true)
 |-- place_of_receipt: string (nullable = true)
 |-- port_of_destination: string (nullable = true)
 |-- foreign_port_of_destination_qualifier: string (nullable = true)
 |-- foreign_port_of_destination: string (nullable = true)
 |-- conveyance_id_qualifier: string (nullable = true)
 |-- conveyance_id: string (nullable = true)
 |-- mode_of_transportation: string (n