## Imports

In [189]:
import polars as pl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os
from pathlib import Path
from datetime import datetime
import metpy.io as mtio
import metpy.calc as mtcl
import metpy.units as mtun
import pandas as pd
from tqdm import tqdm
import math

sns.set()

## Reading data

In [190]:
data_dir = Path(os.getcwd()).parent/"data"/"interm"
main_ds = pl.read_csv(data_dir/"train_val.csv", null_values="NA")

In [191]:
print(main_ds.head())

shape: (5, 10)
┌─────────────────────┬────────────────────┬────────┬─────────┬───┬────────────────────┬────────────────────┬────────────────────┬────────┐
│ flightid            ┆ hora_ref           ┆ origem ┆ destino ┆ … ┆ metar              ┆ prev_troca_cabecei ┆ troca_cabeceira_ho ┆ espera │
│ ---                 ┆ ---                ┆ ---    ┆ ---     ┆   ┆ ---                ┆ ra                 ┆ ra_anterior        ┆ ---    │
│ str                 ┆ str                ┆ str    ┆ str     ┆   ┆ str                ┆ ---                ┆ ---                ┆ i64    │
│                     ┆                    ┆        ┆         ┆   ┆                    ┆ i64                ┆ i64                ┆        │
╞═════════════════════╪════════════════════╪════════╪═════════╪═══╪════════════════════╪════════════════════╪════════════════════╪════════╡
│ 504a62621cd231d6ab6 ┆ 2022-06-01T01:00:0 ┆ SBCF   ┆ SBFL    ┆ … ┆ METAR SBFL 010000Z ┆ 0                  ┆ 1                  ┆ 0      │
│ 7e6

In [192]:
main_ds.schema

OrderedDict([('flightid', String),
             ('hora_ref', String),
             ('origem', String),
             ('destino', String),
             ('url_img_satelite', String),
             ('metaf', String),
             ('metar', String),
             ('prev_troca_cabeceira', Int64),
             ('troca_cabeceira_hora_anterior', Int64),
             ('espera', Int64)])

## Description of each column

In [193]:
print(main_ds.describe())

shape: (9, 11)
┌────────────┬────────────────────┬───────────────────┬────────┬───┬───────────────────┬───────────────────┬───────────────────┬──────────┐
│ statistic  ┆ flightid           ┆ hora_ref          ┆ origem ┆ … ┆ metar             ┆ prev_troca_cabece ┆ troca_cabeceira_h ┆ espera   │
│ ---        ┆ ---                ┆ ---               ┆ ---    ┆   ┆ ---               ┆ ira               ┆ ora_anterior      ┆ ---      │
│ str        ┆ str                ┆ str               ┆ str    ┆   ┆ str               ┆ ---               ┆ ---               ┆ f64      │
│            ┆                    ┆                   ┆        ┆   ┆                   ┆ f64               ┆ f64               ┆          │
╞════════════╪════════════════════╪═══════════════════╪════════╪═══╪═══════════════════╪═══════════════════╪═══════════════════╪══════════╡
│ count      ┆ 211679             ┆ 211679            ┆ 211679 ┆ … ┆ 210055            ┆ 211679.0          ┆ 211679.0          ┆ 211679.0 │
│ nul

## Learning to process 'hora_ref'

In [194]:
main_ds["hora_ref"][0] # visualize first element

'2022-06-01T01:00:00Z'

In [195]:
main_ds["hora_ref"].str.ends_with("Z").all() # check if all follow this format

True

### Casting to datetime

In [196]:
main_ds = main_ds.with_columns(
    pl.col("hora_ref").cast(pl.Datetime)
)
print(main_ds.head())

shape: (5, 10)
┌──────────────────────┬──────────────┬────────┬─────────┬───┬──────────────────────┬──────────────────────┬─────────────────────┬────────┐
│ flightid             ┆ hora_ref     ┆ origem ┆ destino ┆ … ┆ metar                ┆ prev_troca_cabeceira ┆ troca_cabeceira_hor ┆ espera │
│ ---                  ┆ ---          ┆ ---    ┆ ---     ┆   ┆ ---                  ┆ ---                  ┆ a_anterior          ┆ ---    │
│ str                  ┆ datetime[μs] ┆ str    ┆ str     ┆   ┆ str                  ┆ i64                  ┆ ---                 ┆ i64    │
│                      ┆              ┆        ┆         ┆   ┆                      ┆                      ┆ i64                 ┆        │
╞══════════════════════╪══════════════╪════════╪═════════╪═══╪══════════════════════╪══════════════════════╪═════════════════════╪════════╡
│ 504a62621cd231d6ab67 ┆ 2022-06-01   ┆ SBCF   ┆ SBFL    ┆ … ┆ METAR SBFL 010000Z   ┆ 0                    ┆ 1                   ┆ 0      │
│ e67

### Visualizing some possible (pre-)features

In [197]:
dt = main_ds["hora_ref"][200]
print(dt.year, dt.month, dt.day, dt.weekday(), dt.hour, dt.minute)

2022 6 1 2 11 0


## Learning to process 'metar' and 'metaf'

In [198]:
i = 0
mtio.metar.parse_metar(main_ds["metar"][i], main_ds["hora_ref"][i].year, main_ds["hora_ref"][i].month)

metar(station_id='SBFL', latitude=-27.67, longitude=-48.53, elevation=5, date_time=datetime.datetime(2022, 6, 1, 0, 0), wind_direction=170, wind_speed=9.0, wind_gust=nan, visibility=9999, current_wx1=nan, current_wx2=nan, current_wx3=nan, skyc1='BKN', skylev1=3000.0, skyc2=nan, skylev2=nan, skyc3=nan, skylev3=nan, skyc4=nan, skylev4=nan, cloudcover=6, temperature=14.0, dewpoint=7.0, altimeter=30.1205829670303, current_wx1_symbol=0, current_wx2_symbol=0, current_wx3_symbol=0, remarks='')

In [199]:
mtio.metar.parse_metar(main_ds["metar"][i], 0, 0)

metar(station_id='SBFL', latitude=-27.67, longitude=-48.53, elevation=5, date_time=nan, wind_direction=170, wind_speed=9.0, wind_gust=nan, visibility=9999, current_wx1=nan, current_wx2=nan, current_wx3=nan, skyc1='BKN', skylev1=3000.0, skyc2=nan, skylev2=nan, skyc3=nan, skylev3=nan, skyc4=nan, skylev4=nan, cloudcover=6, temperature=14.0, dewpoint=7.0, altimeter=30.1205829670303, current_wx1_symbol=0, current_wx2_symbol=0, current_wx3_symbol=0, remarks='')

In [200]:
mtio.metar.parse_metar("METAR ZZZZ 000000Z", 0, 0)

metar(station_id='ZZZZ', latitude=nan, longitude=nan, elevation=nan, date_time=nan, wind_direction=nan, wind_speed=nan, wind_gust=nan, visibility=nan, current_wx1=nan, current_wx2=nan, current_wx3=nan, skyc1=nan, skylev1=nan, skyc2=nan, skylev2=nan, skyc3=nan, skylev3=nan, skyc4=nan, skylev4=nan, cloudcover=10, temperature=nan, dewpoint=nan, altimeter=nan, current_wx1_symbol=0, current_wx2_symbol=0, current_wx3_symbol=0, remarks='')

In [201]:
main_ds = main_ds.with_columns(
    pl.col("metar").fill_null("")
)

In [221]:
def get_parsed_metars(dataset):
    n = dataset.shape[0]
    elevs = []
    temps = []
    dew_pts = []
    visibs = []
    wind_dirs = []
    wind_spds = []
    wind_gusts = []
    skyc1s = []
    skylev1s = []
    skyc2s = []
    skylev2s = []
    skyc3s = []
    skylev3s = []
    skyc4s = []
    skylev4s = []
    cloud_covs = []
    altimeters = []
    pressures = [] # mtcl.altimeter_to_sea_level_pressure

    for i in tqdm(range(n)):
        metar = main_ds["metar"][i]
        hora_ref = main_ds["hora_ref"][i]
        if metar == "":
            elevs.append(None)
            temps.append(None)
            dew_pts.append(None)
            visibs.append(None)
            wind_dirs.append(None)
            wind_spds.append(None)
            wind_gusts.append(None)
            skyc1s.append(None)
            skylev1s.append(None)
            skyc2s.append(None)
            skylev2s.append(None)
            skyc3s.append(None)
            skylev3s.append(None)
            skyc4s.append(None)
            skylev4s.append(None)
            cloud_covs.append(None)
            altimeters.append(None)
            pressures.append(None)
            continue
        try:
            metar = metar.replace(" COR ", " ")
            metar = metar.replace(" AUTO ", " ")
            metar = metar.replace(" AO1 ", " ")
            metar = metar.replace(" AO2 ", " ")
            metar = mtio.metar.parse_metar(metar, hora_ref.year, hora_ref.month)
            elevs.append(None if math.isnan(metar.elevation) else metar.elevation)
            temps.append(None if math.isnan(metar.temperature) else metar.temperature)
            dew_pts.append(None if math.isnan(metar.dewpoint) else metar.dewpoint)
            visibs.append(None if math.isnan(metar.visibility) else metar.visibility)
            wind_dirs.append(None if math.isnan(metar.wind_direction) else np.deg2rad(metar.wind_direction))
            wind_spds.append(None if math.isnan(metar.wind_speed) else metar.wind_speed)
            wind_gusts.append(None if math.isnan(metar.wind_gust) else metar.wind_gust)
            skyc1s.append(None if type(metar.skyc1) != str else metar.skyc1)
            skylev1s.append(None if math.isnan(metar.skylev1) else metar.skylev1)
            skyc2s.append(None if type(metar.skyc2) != str else metar.skyc2)
            skylev2s.append(None if math.isnan(metar.skylev2) else metar.skylev2)
            skyc3s.append(None if type(metar.skyc3) != str else metar.skyc3)
            skylev3s.append(None if math.isnan(metar.skylev3) else metar.skylev3)
            skyc4s.append(None if type(metar.skyc4) != str else metar.skyc4)
            skylev4s.append(None if math.isnan(metar.skylev4) else metar.skylev4)
            cloud_covs.append(None if math.isnan(metar.cloudcover) else metar.cloudcover)
            altimeters.append(None if math.isnan(metar.altimeter) else metar.altimeter)
            if not (math.isnan(metar.altimeter) or math.isnan(metar.elevation) or math.isnan(metar.temperature)):
                pressures.append(mtcl.altimeter_to_station_pressure(
                    mtun.pint.Quantity(metar.altimeter, "inHg"), 
                    mtun.pint.Quantity(metar.elevation, "meters")
                ).to("hPa").magnitude / 1_013.25) # convert to 'atm'
            else:
                pressures.append(None)
        except Exception as e:
            print(e)
            elevs.append(None)
            temps.append(None)
            dew_pts.append(None)
            visibs.append(None)
            wind_dirs.append(None)
            wind_spds.append(None)
            wind_gusts.append(None)
            skyc1s.append(None)
            skylev1s.append(None)
            skyc2s.append(None)
            skylev2s.append(None)
            skyc3s.append(None)
            skylev3s.append(None)
            skyc4s.append(None)
            skylev4s.append(None)
            cloud_covs.append(None)
            altimeters.append(None)
            pressures.append(None)
    return elevs, temps, dew_pts, visibs, wind_dirs, wind_spds, wind_gusts, skyc1s, skylev1s, \
            skyc2s, skylev2s, skyc3s, skylev3s, skyc4s, skylev4s, cloud_covs, altimeters, pressures

In [222]:
columns = [
    "elevation", "air_temperature", "dew_point_temp", "visibility", "wind_direction_rad", "wind_speed", "wind_gust", 
    "low_cloud_type", "low_cloud_level", "medium_cloud_type", "medium_cloud_level", "high_cloud_type", 
    "high_cloud_level", "highest_cloud_type", "highest_cloud_level", "cloud_coverage_oktas", "altimeter", "pressure_station_level_atm"
]
metar_data = get_parsed_metars(main_ds)
metar_data = pl.DataFrame(metar_data, schema=columns)
metar_data

 71%|█████████████████████████████████████████████████████████████████                           | 149805/211679 [03:14<01:19, 779.62it/s]

Line 1: expected one of:

    - [\d] from METAR::datetime
    - "Z" from METAR::datetime

     1 | METAR SBSV 051300 10012KT 9999 FEW021 SCT050 28/24 Q1014=
                          ^
Line 1: expected one of:

    - [\d] from METAR::datetime
    - "Z" from METAR::datetime

     1 | METAR SBSV 051300 10012KT 9999 FEW021 SCT050 28/24 Q1014=
                          ^
Line 1: expected one of:

    - [\d] from METAR::datetime
    - "Z" from METAR::datetime

     1 | METAR SBSV 051400 09013KT 9999 VCSH FEW021 SCT050 29/24 Q1013=
                          ^
Line 1: expected one of:

    - [\d] from METAR::datetime
    - "Z" from METAR::datetime

     1 | METAR SBSV 051400 09013KT 9999 VCSH FEW021 SCT050 29/24 Q1013=
                          ^
Line 1: expected one of:

    - [\d] from METAR::datetime
    - "Z" from METAR::datetime

     1 | METAR SBSV 051400 09013KT 9999 VCSH FEW021 SCT050 29/24 Q1013=
                          ^
Line 1: expected one of:

    - [\d] from METAR::datetime
  

 78%|███████████████████████████████████████████████████████████████████████▌                    | 164575/211679 [03:33<00:59, 793.58it/s]

Line 1: expected one of:

    - [\d] from METAR::datetime
    - "Z" from METAR::datetime

     1 | METAR SBFL 221300 17006KT 9999 BKN020 24/16 Q1017=
                          ^
Line 1: expected one of:

    - [\d] from METAR::datetime
    - "Z" from METAR::datetime

     1 | METAR SBFL 221300 17006KT 9999 BKN020 24/16 Q1017=
                          ^


100%|████████████████████████████████████████████████████████████████████████████████████████████| 211679/211679 [04:40<00:00, 755.45it/s]


elevation,air_temperature,dew_point_temp,visibility,wind_direction_rad,wind_speed,wind_gust,low_cloud_type,low_cloud_level,medium_cloud_type,medium_cloud_level,high_cloud_type,high_cloud_level,highest_cloud_type,highest_cloud_level,cloud_coverage_oktas,altimeter,pressure_station_level_atm
i64,f64,f64,i64,f64,f64,f64,str,f64,str,f64,str,f64,str,f64,i64,f64,f64
5,14.0,7.0,9999,2.96706,9.0,,"""BKN""",3000.0,,,,,,,6,30.120583,1.006362
5,14.0,7.0,9999,2.96706,9.0,,"""BKN""",3000.0,,,,,,,6,30.120583,1.006362
828,21.0,14.0,10000,2.094395,6.0,,,,,,,,,,0,30.061523,0.910229
908,10.0,10.0,7000,1.745329,6.0,,"""BKN""",400.0,"""OVC""",1200.0,,,,,8,30.091053,0.902375
6,27.0,21.0,9999,1.919862,8.0,,"""FEW""",2300.0,,,,,,,2,29.943403,1.000324
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
803,23.0,17.0,9999,6.108652,14.0,,"""SCT""",3000.0,,,,,,,4,29.854813,0.906588
19,29.0,23.0,9999,1.745329,7.0,,"""BKN""",2000.0,,,,,,,6,29.854813,0.995828
3,29.0,22.0,9999,5.585054,10.0,,"""FEW""",3500.0,"""BKN""",10000.0,,,,,6,29.677633,0.9918
661,24.0,19.0,9999,6.283185,12.0,,"""FEW""",2300.0,,,,,,,2,29.854813,0.922252
