# Inicio

In [2]:
!pip install findspark
!pip install pyspark
!pip install spark

Collecting findspark
  Downloading findspark-2.0.1-py2.py3-none-any.whl (4.4 kB)
Installing collected packages: findspark
Successfully installed findspark-2.0.1
Collecting pyspark
  Downloading pyspark-3.4.1.tar.gz (310.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.8/310.8 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.4.1-py2.py3-none-any.whl size=311285387 sha256=801a4f430ad75d5ced9a36acbfd6873ddd80c558b9d94d136cd0d27618df7542
  Stored in directory: /root/.cache/pip/wheels/0d/77/a3/ff2f74cc9ab41f8f594dabf0579c2a7c6de920d584206e0834
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.4.1
Collecting spark
  Downloading spark-0.2.1.tar.gz (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0

In [64]:
import pyspark
import findspark
import spark

findspark.init()

from pyspark.sql import *
from pyspark.sql.types import *
from pyspark.sql.functions import *

spark = SparkSession.builder.getOrCreate()

sc = spark.sparkContext

# Manejo de Fechas

In [16]:
data = spark.read.option('inferSchema','true').parquet('/content/convertir/part-00001-6b84f509-ffb8-43d4-820e-866fb80c0d08-c000.snappy.parquet')

In [14]:
data.show()

+----------+--------------------+----------+----------------+
|      date|           timestamp|  date_str|          ts_str|
+----------+--------------------+----------+----------------+
|2021-01-01|2021-01-01 20:10:...|01-01-2021|18-08-2021 46:58|
+----------+--------------------+----------+----------------+



In [17]:
data.printSchema()

root
 |-- date: string (nullable = true)
 |-- timestamp: string (nullable = true)
 |-- date_str: string (nullable = true)
 |-- ts_str: string (nullable = true)



In [21]:
data1 = data.select(
    to_date(col('date')).alias('date1'),
    to_timestamp(col('timestamp')).alias('ts1'),
    to_date(col('date_str'), 'dd-MM-yyyy').alias('date2'),
    to_timestamp(col('ts_str'), 'dd-MM-yyyy mm:ss').alias('ts2')
)

In [22]:
data1.printSchema()

root
 |-- date1: date (nullable = true)
 |-- ts1: timestamp (nullable = true)
 |-- date2: date (nullable = true)
 |-- ts2: timestamp (nullable = true)



In [23]:
data1.select(
    date_format(col('date1'), 'dd-MM-yyyy')
).show()

+------------------------------+
|date_format(date1, dd-MM-yyyy)|
+------------------------------+
|                    01-01-2021|
+------------------------------+



In [24]:
df = spark.read.parquet('/content/calculo/calculo.parquet')

In [25]:
df.show()

+------+-------------+------------+-------------------+
|nombre|fecha_ingreso|fecha_salida|       baja_sistema|
+------+-------------+------------+-------------------+
|  Jose|   2021-01-01|  2021-11-14|2021-10-14 15:35:59|
|Mayara|   2021-02-06|  2021-11-25|2021-11-25 10:35:55|
+------+-------------+------------+-------------------+



In [27]:
df.select(
    'nombre',
    datediff('fecha_salida', 'fecha_ingreso').alias('dias'),
    months_between('fecha_salida', 'fecha_ingreso').alias('meses'),
    last_day('fecha_salida').alias('ultimo_dia_mes')
).show()

+------+----+-----------+--------------+
|nombre|dias|      meses|ultimo_dia_mes|
+------+----+-----------+--------------+
|  Jose| 317|10.41935484|    2021-11-30|
|Mayara| 292| 9.61290323|    2021-11-30|
+------+----+-----------+--------------+



In [28]:
df.select(
    'nombre',
    'fecha_ingreso',
    date_add('fecha_ingreso', 14).alias('mas_14_dias'),
    date_sub('fecha_ingreso', 1).alias('menos_1_dia')
).show()

+------+-------------+-----------+-----------+
|nombre|fecha_ingreso|mas_14_dias|menos_1_dia|
+------+-------------+-----------+-----------+
|  Jose|   2021-01-01| 2021-01-15| 2020-12-31|
|Mayara|   2021-02-06| 2021-02-20| 2021-02-05|
+------+-------------+-----------+-----------+



In [29]:
df.select(
    'baja_sistema',
    year('baja_sistema'),
    month('baja_sistema'),
    dayofmonth('baja_sistema'),
    dayofyear('baja_sistema'),
    hour('baja_sistema'),
    minute('baja_sistema'),
    second('baja_sistema')
).show()

+-------------------+------------------+-------------------+------------------------+-----------------------+------------------+--------------------+--------------------+
|       baja_sistema|year(baja_sistema)|month(baja_sistema)|dayofmonth(baja_sistema)|dayofyear(baja_sistema)|hour(baja_sistema)|minute(baja_sistema)|second(baja_sistema)|
+-------------------+------------------+-------------------+------------------------+-----------------------+------------------+--------------------+--------------------+
|2021-10-14 15:35:59|              2021|                 10|                      14|                    287|                15|                  35|                  59|
|2021-11-25 10:35:55|              2021|                 11|                      25|                    329|                10|                  35|                  55|
+-------------------+------------------+-------------------+------------------------+-----------------------+------------------+-----------------

# Manejo de Strings

In [4]:
data = spark.read.parquet('data.parquet')

In [6]:
data.show()

+-------+
| nombre|
+-------+
| Spark |
+-------+



In [7]:
data.select(
    ltrim('nombre'),
    rtrim('nombre'),
    trim('nombre')
).show()

+-------------+-------------+------------+
|ltrim(nombre)|rtrim(nombre)|trim(nombre)|
+-------------+-------------+------------+
|       Spark |        Spark|       Spark|
+-------------+-------------+------------+



In [11]:
data.select(
    trim(col('nombre')).alias('trim')
).select(
    lpad(col('trim'), 8, '-'),
    rpad(col('trim'), 8, '=')
).show()

+----------------+----------------+
|lpad(trim, 8, -)|rpad(trim, 8, =)|
+----------------+----------------+
|        ---Spark|        Spark===|
+----------------+----------------+



In [12]:
df1 = spark.createDataFrame([('Spark', 'es', 'maravilloso')], ['sujeto','verbo','adjetivo'])

In [13]:
df1.show()

+------+-----+-----------+
|sujeto|verbo|   adjetivo|
+------+-----+-----------+
| Spark|   es|maravilloso|
+------+-----+-----------+



In [15]:
df1.select(
    concat_ws(' ','sujeto','verbo','adjetivo').alias('frase')
).select(
    lower('frase').alias('minuscula'),
    upper('frase').alias('mayuscula'),
    initcap('frase').alias('initcap'),
    reverse('frase').alias('reverse')
).show()

+--------------------+--------------------+--------------------+--------------------+
|           minuscula|           mayuscula|             initcap|             reverse|
+--------------------+--------------------+--------------------+--------------------+
|spark es maravilloso|SPARK ES MARAVILLOSO|Spark Es Maravilloso|osollivaram se krapS|
+--------------------+--------------------+--------------------+--------------------+



In [4]:
df2 = spark.createDataFrame([(' voy a casa por mis llaves',)], ['frase'])

In [20]:
df2.show(truncate=False)

+--------------------------+
|frase                     |
+--------------------------+
| voy a casa por mis llaves|
+--------------------------+



In [6]:
df2.select(
    regexp_replace(col('frase'), 'voy|por', 'ir').alias('nueva_frase')
).show(truncate=False)

+------------------------+
|nueva_frase             |
+------------------------+
| ir a casa ir mis llaves|
+------------------------+



# Trabajo con colecciones



In [7]:
data = spark.read.parquet('/content/part-00000-96f39196-ef97-4a14-926e-b24a86c2e32d-c000.snappy.parquet')

In [10]:
data.show(truncate=False)

+-----+--------------------------------------------+
|dia  |tareas                                      |
+-----+--------------------------------------------+
|lunes|[hacer la tarea, buscar agua, lavar el auto]|
+-----+--------------------------------------------+



In [11]:
data.printSchema()

root
 |-- dia: string (nullable = true)
 |-- tareas: array (nullable = true)
 |    |-- element: string (containsNull = true)



In [15]:
data.select(
    size('tareas').alias('tamaño'),
    sort_array('tareas').alias('array_ordenado'),
    array_contains('tareas', 'buscar agua').alias('busacr_agua')
).show(truncate=False)

+------+--------------------------------------------+-----------+
|tamaño|array_ordenado                              |busacr_agua|
+------+--------------------------------------------+-----------+
|3     |[buscar agua, hacer la tarea, lavar el auto]|true       |
+------+--------------------------------------------+-----------+



In [16]:
data.select(
    'dia',
    explode('tareas').alias('tareas')
).show()

+-----+--------------+
|  dia|        tareas|
+-----+--------------+
|lunes|hacer la tarea|
|lunes|   buscar agua|
|lunes| lavar el auto|
+-----+--------------+



# Formato JSON

In [19]:
jsondf = spark.read.parquet('/content/part-00000-9f0e2b16-0b50-4212-9b83-654e1b8dd137-c000.snappy.parquet')

In [21]:
jsondf.show(truncate=False)

+---------------------------------------------------------------------------+
|tareas_str                                                                 |
+---------------------------------------------------------------------------+
|{"dia": "lunes","tareas": ["hacer la tarea","buscar agua","lavar el auto"]}|
+---------------------------------------------------------------------------+



In [22]:
jsondf.printSchema()

root
 |-- tareas_str: string (nullable = true)



In [28]:
schema_json = StructType(
    [
        StructField('dia', StringType(), True),
        StructField('tareas', ArrayType(StringType()), True)
    ]
)

In [31]:
json_df = jsondf.select(
    from_json('tareas_str', schema_json).alias('por_hacer')
)

In [32]:
json_df.printSchema()

root
 |-- por_hacer: struct (nullable = true)
 |    |-- dia: string (nullable = true)
 |    |-- tareas: array (nullable = true)
 |    |    |-- element: string (containsNull = true)



In [35]:
json_df.select(
    col('por_hacer').getItem('dia'),
    col('por_hacer').getItem('tareas'),
    col('por_hacer').getItem('tareas').getItem(0).alias('primer_tarea')
).show(truncate=False)

+-------------+--------------------------------------------+--------------+
|por_hacer.dia|por_hacer.tareas                            |primer_tarea  |
+-------------+--------------------------------------------+--------------+
|lunes        |[hacer la tarea, buscar agua, lavar el auto]|hacer la tarea|
+-------------+--------------------------------------------+--------------+



In [37]:
json_df.select(
    to_json('por_hacer')
).show(truncate=False)

+-------------------------------------------------------------------------+
|to_json(por_hacer)                                                       |
+-------------------------------------------------------------------------+
|{"dia":"lunes","tareas":["hacer la tarea","buscar agua","lavar el auto"]}|
+-------------------------------------------------------------------------+



# When, coalesce, lit

In [38]:
data = spark.read.parquet('/content/part-00000-a9b42845-6edf-4329-996e-2528aa78bb4a-c000.snappy.parquet')

In [39]:
data.printSchema()

root
 |-- nombre: string (nullable = true)
 |-- pago: long (nullable = true)



In [40]:
data.show()

+------+----+
|nombre|pago|
+------+----+
|  Jose|   1|
| Julia|   2|
| Katia|   1|
|  null|   3|
|  Raul|   3|
+------+----+



In [42]:
data.select(
    'nombre',
    when(col('pago') == 1, 'Pagado').\
    when(col('pago') == 2, 'Sin Pagar').\
    otherwise('Sin Iniciar').alias('Pago')
).show()

+------+-----------+
|nombre|       Pago|
+------+-----------+
|  Jose|     Pagado|
| Julia|  Sin Pagar|
| Katia|     Pagado|
|  null|Sin Iniciar|
|  Raul|Sin Iniciar|
+------+-----------+



In [45]:
data.select(
    coalesce('nombre', lit('sin nombre')).alias('nombre')
).show()

+----------+
|    nombre|
+----------+
|      Jose|
|     Julia|
|     Katia|
|sin nombre|
|      Raul|
+----------+



# Funciones definidas por el usuario

In [46]:
def f_cubo(n):
  return n * n * n

In [47]:
spark.udf.register('cubo', f_cubo, LongType())

<function __main__.f_cubo(n)>

In [49]:
spark.range(1,10).createOrReplaceTempView('df_temp')

In [50]:
spark.sql('select id, cubo(id) as cubo from df_temp').show()

+---+----+
| id|cubo|
+---+----+
|  1|   1|
|  2|   8|
|  3|  27|
|  4|  64|
|  5| 125|
|  6| 216|
|  7| 343|
|  8| 512|
|  9| 729|
+---+----+



In [55]:
def bienvenida(nombre):
  return (f'Hola {nombre}')

In [57]:
bienvenida_udf = udf(lambda x: bienvenida(x), StringType())

In [58]:
df_nombre = spark.createDataFrame([('Jose',), ('Julia',)], ['nombre'])

In [59]:
df_nombre.show()

+------+
|nombre|
+------+
|  Jose|
| Julia|
+------+



In [60]:
df_nombre.select(
    'nombre',
    bienvenida_udf('nombre').alias('bienvenida')
).show()

+------+----------+
|nombre|bienvenida|
+------+----------+
|  Jose| Hola Jose|
| Julia|Hola Julia|
+------+----------+



In [61]:
@udf(returnType=StringType())
def mayuscula(s):
  return s.upper()

In [62]:
df_nombre.select(
    'nombre',
    mayuscula('nombre').alias('mayuscula_nombre')
).show()

+------+----------------+
|nombre|mayuscula_nombre|
+------+----------------+
|  Jose|            JOSE|
| Julia|           JULIA|
+------+----------------+



In [63]:
import pandas as pd

In [65]:
def cubo_pandas(a:pd.Series)->pd.Series:
  return a * a * a

In [66]:
cubo_udf = pandas_udf(cubo_pandas, returnType=LongType())

In [67]:
x = pd.Series([1,2,3])

In [68]:
print(cubo_pandas(x))

0     1
1     8
2    27
dtype: int64


In [69]:
df = spark.range(5)

In [70]:
df.select(
    'id',
    cubo_udf('id').alias('cubo_pandas')
).show()

+---+-----------+
| id|cubo_pandas|
+---+-----------+
|  0|          0|
|  1|          1|
|  2|          8|
|  3|         27|
|  4|         64|
+---+-----------+



# Funciones de Ventana (window)

In [71]:
df = spark.read.parquet('/content/funciones_ventana.parquet')

In [72]:
df.show()

+-------+----+------------+----------+
| nombre|edad|departamento|evaluacion|
+-------+----+------------+----------+
| Lazaro|  45|      letras|        98|
|   Raul|  24|  matemática|        76|
|  Maria|  34|  matemática|        27|
|   Jose|  30|     química|        78|
| Susana|  51|     química|        98|
|   Juan|  44|      letras|        89|
|  Julia|  55|      letras|        92|
|  Kadir|  38|arquitectura|        39|
| Lilian|  23|arquitectura|        94|
|   Rosa|  26|      letras|        91|
|   Aian|  50|  matemática|        73|
|Yaneisy|  29|      letras|        89|
|Enrique|  40|     química|        92|
|    Jon|  25|arquitectura|        78|
|  Luisa|  39|arquitectura|        94|
+-------+----+------------+----------+



In [73]:
from pyspark.sql.window import Window

In [74]:
windowSpec = Window.partitionBy('departamento').orderBy(desc('evaluacion'))

In [77]:
# row_number

df.withColumn('row_number', row_number().over(windowSpec)).filter(col('row_number') <= 2).show()

+-------+----+------------+----------+----------+
| nombre|edad|departamento|evaluacion|row_number|
+-------+----+------------+----------+----------+
| Lilian|  23|arquitectura|        94|         1|
|  Luisa|  39|arquitectura|        94|         2|
| Lazaro|  45|      letras|        98|         1|
|  Julia|  55|      letras|        92|         2|
|   Raul|  24|  matemática|        76|         1|
|   Aian|  50|  matemática|        73|         2|
| Susana|  51|     química|        98|         1|
|Enrique|  40|     química|        92|         2|
+-------+----+------------+----------+----------+



In [78]:
# rank

df.withColumn('rank', rank().over(windowSpec)).show()

+-------+----+------------+----------+----+
| nombre|edad|departamento|evaluacion|rank|
+-------+----+------------+----------+----+
| Lilian|  23|arquitectura|        94|   1|
|  Luisa|  39|arquitectura|        94|   1|
|    Jon|  25|arquitectura|        78|   3|
|  Kadir|  38|arquitectura|        39|   4|
| Lazaro|  45|      letras|        98|   1|
|  Julia|  55|      letras|        92|   2|
|   Rosa|  26|      letras|        91|   3|
|   Juan|  44|      letras|        89|   4|
|Yaneisy|  29|      letras|        89|   4|
|   Raul|  24|  matemática|        76|   1|
|   Aian|  50|  matemática|        73|   2|
|  Maria|  34|  matemática|        27|   3|
| Susana|  51|     química|        98|   1|
|Enrique|  40|     química|        92|   2|
|   Jose|  30|     química|        78|   3|
+-------+----+------------+----------+----+



In [80]:
# dense_rank

df.withColumn('dense_rank', dense_rank().over(windowSpec)).show()

+-------+----+------------+----------+----------+
| nombre|edad|departamento|evaluacion|dense_rank|
+-------+----+------------+----------+----------+
| Lilian|  23|arquitectura|        94|         1|
|  Luisa|  39|arquitectura|        94|         1|
|    Jon|  25|arquitectura|        78|         2|
|  Kadir|  38|arquitectura|        39|         3|
| Lazaro|  45|      letras|        98|         1|
|  Julia|  55|      letras|        92|         2|
|   Rosa|  26|      letras|        91|         3|
|   Juan|  44|      letras|        89|         4|
|Yaneisy|  29|      letras|        89|         4|
|   Raul|  24|  matemática|        76|         1|
|   Aian|  50|  matemática|        73|         2|
|  Maria|  34|  matemática|        27|         3|
| Susana|  51|     química|        98|         1|
|Enrique|  40|     química|        92|         2|
|   Jose|  30|     química|        78|         3|
+-------+----+------------+----------+----------+



In [81]:
windowSpecAgg = Window.partitionBy('departamento')

In [82]:
df.withColumn('min', min('evaluacion').over(windowSpecAgg)).\
withColumn('max', max('evaluacion').over(windowSpecAgg)).\
withColumn('avg', avg('evaluacion').over(windowSpecAgg)).\
withColumn('row_number', row_number().over(windowSpec)).show()

+-------+----+------------+----------+---+---+------------------+----------+
| nombre|edad|departamento|evaluacion|min|max|               avg|row_number|
+-------+----+------------+----------+---+---+------------------+----------+
| Lilian|  23|arquitectura|        94| 39| 94|             76.25|         1|
|  Luisa|  39|arquitectura|        94| 39| 94|             76.25|         2|
|    Jon|  25|arquitectura|        78| 39| 94|             76.25|         3|
|  Kadir|  38|arquitectura|        39| 39| 94|             76.25|         4|
| Lazaro|  45|      letras|        98| 89| 98|              91.8|         1|
|  Julia|  55|      letras|        92| 89| 98|              91.8|         2|
|   Rosa|  26|      letras|        91| 89| 98|              91.8|         3|
|   Juan|  44|      letras|        89| 89| 98|              91.8|         4|
|Yaneisy|  29|      letras|        89| 89| 98|              91.8|         5|
|   Raul|  24|  matemática|        76| 27| 76|58.666666666666664|         1|

# Catalyst optimizer

In [86]:
data = spark.read.option('inferSchema', 'true').option('header','true').parquet('vuelos.parquet')

In [87]:
data.show()

+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+--------+---------+-------+-----------------+------------+-------------+--------+---------+-------------------+----------------+--------------+-------------+-------------------+-------------+
|YEAR|MONTH|DAY|DAY_OF_WEEK|AIRLINE|FLIGHT_NUMBER|TAIL_NUMBER|ORIGIN_AIRPORT|DESTINATION_AIRPORT|SCHEDULED_DEPARTURE|DEPARTURE_TIME|DEPARTURE_DELAY|TAXI_OUT|WHEELS_OFF|SCHEDULED_TIME|ELAPSED_TIME|AIR_TIME|DISTANCE|WHEELS_ON|TAXI_IN|SCHEDULED_ARRIVAL|ARRIVAL_TIME|ARRIVAL_DELAY|DIVERTED|CANCELLED|CANCELLATION_REASON|AIR_SYSTEM_DELAY|SECURITY_DELAY|AIRLINE_DELAY|LATE_AIRCRAFT_DELAY|WEATHER_DELAY|
+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+-

In [89]:
data.printSchema()

root
 |-- YEAR: integer (nullable = true)
 |-- MONTH: integer (nullable = true)
 |-- DAY: integer (nullable = true)
 |-- DAY_OF_WEEK: integer (nullable = true)
 |-- AIRLINE: string (nullable = true)
 |-- FLIGHT_NUMBER: integer (nullable = true)
 |-- TAIL_NUMBER: string (nullable = true)
 |-- ORIGIN_AIRPORT: string (nullable = true)
 |-- DESTINATION_AIRPORT: string (nullable = true)
 |-- SCHEDULED_DEPARTURE: integer (nullable = true)
 |-- DEPARTURE_TIME: integer (nullable = true)
 |-- DEPARTURE_DELAY: integer (nullable = true)
 |-- TAXI_OUT: integer (nullable = true)
 |-- WHEELS_OFF: integer (nullable = true)
 |-- SCHEDULED_TIME: integer (nullable = true)
 |-- ELAPSED_TIME: integer (nullable = true)
 |-- AIR_TIME: integer (nullable = true)
 |-- DISTANCE: integer (nullable = true)
 |-- WHEELS_ON: integer (nullable = true)
 |-- TAXI_IN: integer (nullable = true)
 |-- SCHEDULED_ARRIVAL: integer (nullable = true)
 |-- ARRIVAL_TIME: integer (nullable = true)
 |-- ARRIVAL_DELAY: integer (null

In [90]:
nuevodf = data.filter(col('month').isin(6,7,8)).\
            withColumn('dis_tiempo_aire', col('DISTANCE')/col('AIR_TIME')).\
            select(
                'AIRLINE',
                'dis_tiempo_aire'
                ).\
            where(col('AIRLINE').isin('AA','DL', 'AS'))

In [92]:
nuevodf.explain(True)

== Parsed Logical Plan ==
'Filter 'AIRLINE IN (AA,DL,AS)
+- Project [AIRLINE#521, dis_tiempo_aire#705]
   +- Project [YEAR#517, MONTH#518, DAY#519, DAY_OF_WEEK#520, AIRLINE#521, FLIGHT_NUMBER#522, TAIL_NUMBER#523, ORIGIN_AIRPORT#524, DESTINATION_AIRPORT#525, SCHEDULED_DEPARTURE#526, DEPARTURE_TIME#527, DEPARTURE_DELAY#528, TAXI_OUT#529, WHEELS_OFF#530, SCHEDULED_TIME#531, ELAPSED_TIME#532, AIR_TIME#533, DISTANCE#534, WHEELS_ON#535, TAXI_IN#536, SCHEDULED_ARRIVAL#537, ARRIVAL_TIME#538, ARRIVAL_DELAY#539, DIVERTED#540, ... 8 more fields]
      +- Filter month#518 IN (6,7,8)
         +- Relation [YEAR#517,MONTH#518,DAY#519,DAY_OF_WEEK#520,AIRLINE#521,FLIGHT_NUMBER#522,TAIL_NUMBER#523,ORIGIN_AIRPORT#524,DESTINATION_AIRPORT#525,SCHEDULED_DEPARTURE#526,DEPARTURE_TIME#527,DEPARTURE_DELAY#528,TAXI_OUT#529,WHEELS_OFF#530,SCHEDULED_TIME#531,ELAPSED_TIME#532,AIR_TIME#533,DISTANCE#534,WHEELS_ON#535,TAXI_IN#536,SCHEDULED_ARRIVAL#537,ARRIVAL_TIME#538,ARRIVAL_DELAY#539,DIVERTED#540,... 7 more fields]