In [72]:
from pyspark.sql.types import StructType, StructField, FloatType, LongType, StringType
from pyspark.shell import spark

feats = []
f = open('features.txt')
for line_num, line in enumerate(f):
    if line_num == 0:
        # Timestamp
        feats.append(StructField(line.strip(), LongType(), True))
    elif line_num == 1:
        # Geohash
        feats.append(StructField(line.strip(), StringType(), True))
    else:
        # Other features
        feats.append(StructField(line.strip(), FloatType(), True))
    
schema = StructType(feats)

In [73]:
df = spark.read.format('csv').option('sep', '\t').schema(schema).load('inputs/nam_mini.tdv')

In [74]:
len(df.columns)

58

In [75]:
import pyspark.sql.functions as sf
summary_values = []
feature_names = []
for i in range(2,len(df.columns)):
    feature_names.append(df.columns[i])
    summary_values.append(df.select(sf.max(df.columns[i]).alias("Max"),
              sf.min(df.columns[i]).alias("Min"),
              sf.avg(df.columns[i]).alias("Avg"),
              sf.stddev(df.columns[i]).alias("Std_Dev")))

In [76]:
for i in range(0,len(summary_values)):
    print("Feature: " + feature_names[i] + "\n")
    df_summ = summary_values[i]
    df_summ.select(df_summ.Max,df_summ.Min,df_summ.Avg,df_summ.Std_Dev).show()

Feature: geopotential_height_lltw

+--------+---------+------------------+------------------+
|     Max|      Min|               Avg|           Std_Dev|
+--------+---------+------------------+------------------+
|4902.578|-5817.172|1571.8196034524215|1850.1112628212659|
+--------+---------+------------------+------------------+

Feature: water_equiv_of_accum_snow_depth_surface

+------+---+------------------+-----------------+
|   Max|Min|               Avg|          Std_Dev|
+------+---+------------------+-----------------+
|5501.0|0.0|20.543379364668635|68.09250334632227|
+------+---+------------------+-----------------+

Feature: drag_coefficient_surface

+-------+---+-------------------+------------------+
|    Max|Min|                Avg|           Std_Dev|
+-------+---+-------------------+------------------+
|11800.0|0.0|0.08109455386769765|18.554648814462873|
+-------+---+-------------------+------------------+

Feature: sensible_heat_net_flux_surface

+---------+----------+----

+---+---+-------------------+------------------+
|Max|Min|                Avg|           Std_Dev|
+---+---+-------------------+------------------+
|1.0|0.0|0.09887382416511918|0.2984926729769393|
+---+---+-------------------+------------------+

Feature: downward_long_wave_rad_flux_surface

+---------+---------+-----------------+-----------------+
|      Max|      Min|              Avg|          Std_Dev|
+---------+---------+-----------------+-----------------+
|477.64667|128.84128|317.2842871058952|66.07543316435003|
+---------+---------+-----------------+-----------------+

Feature: planetary_boundary_layer_height_surface

+-------+-----+------------------+------------------+
|    Max|  Min|               Avg|           Std_Dev|
+-------+-----+------------------+------------------+
|16691.5|49.75|1474.2216218054582|1204.0192087738408|
+-------+-----+------------------+------------------+

Feature: soil_type_as_in_zobler_surface

+----+---+-----------------+------------------+
| Max|M

+---------+---------+-----------------+------------------+
|      Max|      Min|              Avg|           Std_Dev|
+---------+---------+-----------------+------------------+
|59.385254|0.7078148|16.54948786552142|10.593789793864588|
+---------+---------+-----------------+------------------+

Feature: u-component_of_wind_pblri

+---------+----------+-----------------+-----------------+
|      Max|       Min|              Avg|          Std_Dev|
+---------+----------+-----------------+-----------------+
|27.820816|-30.641708|0.514368695782223|6.255974012101941|
+---------+----------+-----------------+-----------------+

Feature: direct_evaporation_cease_soil_moisture_surface

+----------+---+--------------------+-------------------+
|       Max|Min|                 Avg|            Std_Dev|
+----------+---+--------------------+-------------------+
|0.13499999|0.0|0.031106272933598897|0.03738218278900426|
+----------+---+--------------------+-------------------+

