# Parse Satellite Data

In [41]:
from pathlib import Path
import pandas as pd

def load_lst_folder_with_datetime(
    folder_path,
    column_names,
    target_columns,
    datetime_cols=("year", "month", "day", "hour", "minute", "second"),
    file_extension=".lst",
    skiprows=0,
    fill_value=1.0e35,
    datetime_col_name="datetime"
):
    folder_path = Path(folder_path)
    files = sorted(folder_path.glob(f"*{file_extension}"))

    if not files:
        raise FileNotFoundError(f"No {file_extension} files found in {folder_path}")

    dfs = []
    for file in files:
        df = pd.read_csv(
            file,
            sep='\s+',
            names=column_names,
            header=None,
            skiprows=skiprows
        )
        df["year"] = 1900 + df["year"]  # Adjust year if needed
        # Construct UTC datetime
        df["datetime"] = pd.to_datetime(
            df[list(datetime_cols)],
            errors="coerce",
            utc=True
        )
        df[datetime_col_name] = df[datetime_col_name].dt.tz_localize(None)
        df.drop(columns=list(datetime_cols), inplace=True)
        dfs.append(df)

    full_df = pd.concat(dfs, ignore_index=True)
    # Replace fill values
    full_df.replace(fill_value, pd.NA, inplace=True)

    # Identify data columns (exclude datetime + source_file if present)
    data_cols = [
        c for c in full_df.columns
        if c not in {"datetime", "source_file"}
    ]

    # Drop rows where ALL data columns are NA
    full_df = full_df.dropna(
        subset=data_cols,
        how="all"
    )
    full_df.dropna(subset=target_columns, how="all", inplace=True)

    # Convert remaining columns to numeric
    for col in column_names:
        if col not in datetime_cols:
            full_df[col] = pd.to_numeric(full_df[col], errors="coerce")

    return full_df.reset_index(drop=True)

  sep='\s+',


## AE-C

In [42]:
ae_c = load_lst_folder_with_datetime(
    folder_path="/Users/elliotdable/Documents/PhD/research/fpi_ml/data/atmospheric_explorer/ae_c", 
    column_names=["year","month", "day","hour", "minute", "second", "altitude", "latitude", "longitude",
    "local_time", "local_magnetic_time",  "magnetic_inclination", "solar_zenith_angle", 
    "orbit_number", "mesa_wind_component", "neutral_temp", "vertical_winds"],
    target_columns=["neutral_temp", "vertical_winds", "mesa_wind_component"])

In [43]:
ae_c.sample(5)

Unnamed: 0,altitude,latitude,longitude,local_time,local_magnetic_time,magnetic_inclination,solar_zenith_angle,orbit_number,mesa_wind_component,neutral_temp,vertical_winds,datetime
72782,243.7,12.78,51.43,13.36,13.77,9.661,38.7,5118.0,,945.8,,1975-01-19 10:06:15
16418,283.0,35.13,-78.82,11.11,11.0,66.71,59.74,4657.0,,799.5,,1974-12-21 16:20:00
72394,252.2,-22.64,-90.13,14.79,14.66,-23.1,38.89,5093.0,,1058.0,,1975-01-17 20:58:15
67325,405.3,-58.71,-67.72,22.37,22.28,-54.87,112.0,17490.0,,,-1.0,1977-03-04 03:04:45
66810,279.8,3.937,-71.28,7.443,7.443,30.09,68.63,3408.0,,721.2,,1974-09-30 12:01:45


## AE-D

In [44]:
ae_d = load_lst_folder_with_datetime(
    folder_path="/Users/elliotdable/Documents/PhD/research/fpi_ml/data/atmospheric_explorer/ae_d", 
    column_names=["year","month", "day","hour", "minute", "second", "altitude", "latitude", "longitude",
    "local_time", "local_magnetic_time",  "magnetic_inclination", "solar_zenith_angle", 
    "orbit_number", "mesa_wind_component", "neutral_temp", "zonal_winds"],
    target_columns=["neutral_temp", "zonal_winds", "mesa_wind_component"])

In [45]:
ae_d.sample(5)

Unnamed: 0,altitude,latitude,longitude,local_time,local_magnetic_time,magnetic_inclination,solar_zenith_angle,orbit_number,mesa_wind_component,neutral_temp,zonal_winds,datetime
23338,260.8,-30.05,114.0,7.523,7.211,-64.95,59.6,980.0,,803.5,,1975-12-28 23:57:00
5075,198.1,85.09,175.9,23.12,20.22,,111.6,392.0,,766.3,,1975-11-09 11:07:00
9444,150.7,64.66,-44.32,9.797,11.12,78.89,89.3,603.0,,0.04921,101.4,1975-11-27 12:32:30
4436,169.0,50.83,-69.97,23.21,23.13,77.0,144.1,365.0,,0.003397,-141.4,1975-11-07 03:36:00
3850,219.6,37.12,-76.7,23.34,23.22,68.39,156.8,342.0,,698.3,-128.1,1975-11-05 04:10:30


## AE-E

In [46]:
ae_e = load_lst_folder_with_datetime(
    folder_path="/Users/elliotdable/Documents/PhD/research/fpi_ml/data/atmospheric_explorer/ae_e", 
    column_names=["year","month", "day","hour", "minute", "second", "altitude", "latitude", "longitude",
    "local_time", "local_magnetic_time",  "magnetic_inclination", "solar_zenith_angle", 
    "orbit_number", "mesa_wind_component", "neutral_temp", "meridional_winds", "vertical_winds"],
    target_columns=["neutral_temp", "meridional_winds", "vertical_winds", "mesa_wind_component"])

ae_e['meridional_winds'] = ae_e['meridional_winds']*-1  # Correct sign convention, north needs to be positive

In [47]:
ae_e.sample(5)

Unnamed: 0,altitude,latitude,longitude,local_time,local_magnetic_time,magnetic_inclination,solar_zenith_angle,orbit_number,mesa_wind_component,neutral_temp,meridional_winds,vertical_winds,datetime
128398,220.2,5.972,173.8,23.49,23.67,4.173,165.1,5041.0,,,-15.45,,1976-11-16 11:39:15
143784,248.9,-8.755,35.68,4.694,4.374,-40.75,105.6,6227.0,,703.8,-4.686,,1977-01-29 02:32:00
22182,376.8,9.798,-134.3,17.39,17.21,25.08,80.66,15770.0,,1094.0,84.27,,1978-09-19 02:14:15
170231,260.2,19.48,99.66,23.02,23.04,23.39,147.4,7460.0,,732.0,-40.73,,1977-04-15 16:22:30
113607,188.8,5.249,138.5,2.025,2.03,-5.462,148.5,4055.0,,605.5,,,1976-09-14 16:42:45


## DE-DE2

In [48]:
de_2 = load_lst_folder_with_datetime(
    folder_path="/Users/elliotdable/Documents/PhD/research/fpi_ml/data/atmospheric_explorer/ae_de2", 
    column_names=["year","month", "day","hour", "minute", "second", "altitude", "latitude", "longitude",
    "local_time", "local_magnetic_time",  "magnetic_inclination", "solar_zenith_angle",
    "orbit_number", "mesa_wind_component", "neutral_temp", "zonal_winds", "meridional_winds", "vertical_winds", "neutral_temperature_2"],
    target_columns=["neutral_temp", "zonal_winds", "meridional_winds", "vertical_winds", "mesa_wind_component", "neutral_temperature_2"])

In [49]:
de_2.sample(5)

Unnamed: 0,altitude,latitude,longitude,local_time,local_magnetic_time,magnetic_inclination,solar_zenith_angle,orbit_number,mesa_wind_component,neutral_temp,zonal_winds,meridional_winds,vertical_winds,neutral_temperature_2,datetime
63119,369.95,-3.06,8.94,18.3,18.28,18.637,93.508,1750.0,1294.1,61.352,20.345,,,,1981-11-29 17:32:16
397513,298.0,-79.32,-16.18,2.4533,0.26,66.543,78.184,8088.0,1275.7,-12.349,18.986,285.89,1227.3,,1983-01-19 03:42:08
32802,557.82,10.707,165.87,9.1,8.8933,13.623,48.347,1144.0,1362.3,-59.377,,,,,1981-10-19 21:46:08
369043,491.64,-58.833,-86.533,4.8333,5.4667,47.214,78.63,7588.0,1487.1,-145.61,2.6329,,,,1982-12-18 10:33:20
397162,277.34,-61.613,51.82,2.5,0.59333,65.155,92.581,8085.0,1233.5,-166.25,-3.3225,,,,1983-01-18 23:14:08


In [50]:
dfs = [ae_c, ae_d, ae_e, de_2]
dfs = [df[
        (df['latitude'] > 65) & (df['latitude'] < 75) &
        (df['altitude'] > 100) & (df['altitude'] < 300)]
    for df in dfs
]

ae_c, ae_d, ae_e, de_2 = dfs

In [51]:
de_2

Unnamed: 0,altitude,latitude,longitude,local_time,local_magnetic_time,magnetic_inclination,solar_zenith_angle,orbit_number,mesa_wind_component,neutral_temp,zonal_winds,meridional_winds,vertical_winds,neutral_temperature_2,datetime
140092,299.89,67.047,94.720,11.1,11.527,61.764,70.517,3299.0,1211.5,-15.9570,-10.894,,,,1982-03-13 04:59:28
140093,299.26,65.973,94.660,11.1,11.513,60.736,69.458,3299.0,1184.3,-9.4145,-19.537,,,,1982-03-13 04:59:44
140252,299.71,67.860,-48.980,11.1,12.560,75.166,71.249,3305.0,1371.6,-427.9500,33.480,,,,1982-03-13 14:32:48
140253,299.13,66.780,-49.040,11.1,12.480,74.259,70.199,3305.0,1387.2,-246.7000,11.897,,,,1982-03-13 14:33:04
140254,298.63,65.700,-49.100,11.1,12.400,73.323,69.151,3305.0,1351.0,-221.9100,12.395,,,,1982-03-13 14:33:20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
419573,251.72,71.360,-50.780,12.6,15.700,78.938,84.207,8528.0,,,,175.64,867.88,,1983-02-15 16:13:36
419574,250.21,69.213,-50.873,12.6,15.167,77.065,82.072,8528.0,1059.9,-458.1900,-42.828,,,,1983-02-15 16:14:08
419575,249.44,68.140,-50.920,12.6,14.900,76.066,81.005,8528.0,1012.5,-527.8400,-52.548,137.95,955.63,,1983-02-15 16:14:24
419576,248.66,67.067,-50.967,12.6,14.633,75.032,79.937,8528.0,1033.4,-498.2400,-12.483,,,,1983-02-15 16:14:40


In [None]:
de_2.columns, ae_e.columns, ae_d.columns, ae_c.columns

(Index(['altitude', 'latitude', 'longitude', 'local_time',
        'local_magnetic_time', 'magnetic_inclination', 'solar_zenith_angle',
        'orbit_number', 'mesa_wind_component', 'neutral_temp', 'zonal_winds',
        'meridional_winds', 'vertical_winds', 'neutral_temperature_2',
        'datetime'],
       dtype='object'),
 Index(['altitude', 'latitude', 'longitude', 'local_time',
        'local_magnetic_time', 'magnetic_inclination', 'solar_zenith_angle',
        'orbit_number', 'mesa_wind_component', 'neutral_temp',
        'meridional_winds', 'vertical_winds', 'datetime'],
       dtype='object'),
 Index(['altitude', 'latitude', 'longitude', 'local_time',
        'local_magnetic_time', 'magnetic_inclination', 'solar_zenith_angle',
        'orbit_number', 'mesa_wind_component', 'neutral_temp', 'zonal_winds',
        'datetime'],
       dtype='object'),
 Index(['altitude', 'latitude', 'longitude', 'local_time',
        'local_magnetic_time', 'magnetic_inclination', 'solar_zenith_a

In [53]:
de_2 = de_2[["datetime", "altitude", "latitude", "longitude", "neutral_temp", 'zonal_winds', 'meridional_winds', 'vertical_winds']]

ae_e = ae_e[["datetime", "altitude", "latitude", "longitude", "neutral_temp", 'meridional_winds', 'vertical_winds']]

ae_d = ae_d[["datetime", "altitude", "latitude", "longitude", "neutral_temp", 'zonal_winds']]

ae_c = ae_c[["datetime", "altitude", "latitude", "longitude", "neutral_temp", 'vertical_winds']]

In [54]:
ae_c

Unnamed: 0,datetime,altitude,latitude,longitude,neutral_temp,vertical_winds
8430,1974-02-16 11:27:15,296.9,65.67,-38.960,,2.696000e-33
8431,1974-02-16 11:27:30,286.2,65.18,-36.690,,9.630000e-34
8496,1974-02-17 15:23:15,295.9,65.93,-102.300,870.5,-3.574000e+02
8497,1974-02-17 15:23:30,285.2,65.46,-99.930,878.3,-3.386000e+02
8555,1974-03-10 03:26:15,220.7,67.86,9.151,1008.0,
...,...,...,...,...,...,...
86640,1975-03-30 20:43:30,234.9,66.91,-114.300,939.5,
86641,1975-03-30 20:43:45,234.8,66.58,-112.000,947.0,
86642,1975-03-30 20:44:00,234.8,66.21,-109.700,946.8,
86643,1975-03-30 20:44:15,234.7,65.80,-107.500,921.5,


In [55]:
combined_df = pd.concat(
    [de_2, ae_e, ae_d, ae_c],
    ignore_index=True,
    sort=False
)

In [56]:
combined_df

Unnamed: 0,datetime,altitude,latitude,longitude,neutral_temp,zonal_winds,meridional_winds,vertical_winds
0,1982-03-13 04:59:28,299.89,67.047,94.72,-15.9570,-10.894,,
1,1982-03-13 04:59:44,299.26,65.973,94.66,-9.4145,-19.537,,
2,1982-03-13 14:32:48,299.71,67.860,-48.98,-427.9500,33.480,,
3,1982-03-13 14:33:04,299.13,66.780,-49.04,-246.7000,11.897,,
4,1982-03-13 14:33:20,298.63,65.700,-49.10,-221.9100,12.395,,
...,...,...,...,...,...,...,...,...
9211,1975-03-30 20:43:30,234.90,66.910,-114.30,939.5000,,,
9212,1975-03-30 20:43:45,234.80,66.580,-112.00,947.0000,,,
9213,1975-03-30 20:44:00,234.80,66.210,-109.70,946.8000,,,
9214,1975-03-30 20:44:15,234.70,65.800,-107.50,921.5000,,,


In [60]:
combined_df[(combined_df['longitude'] > 5) & (combined_df['longitude'] < 30)]

Unnamed: 0,datetime,altitude,latitude,longitude,neutral_temp,zonal_winds,meridional_winds,vertical_winds
10,1982-03-14 09:39:12,299.72,70.320,23.660,-84.209,-21.178,,
11,1982-03-14 09:39:28,299.20,69.247,23.607,-76.707,-23.483,,
12,1982-03-14 09:39:44,298.76,68.173,23.553,-64.860,-19.488,,
13,1982-03-14 09:40:00,298.40,67.100,23.500,-61.536,-22.976,,
14,1982-03-14 09:40:16,298.13,66.020,23.433,-61.335,-24.540,,
...,...,...,...,...,...,...,...,...
8760,1975-02-20 00:02:15,246.30,65.180,12.460,854.500,,,
8858,1975-02-27 20:27:15,259.30,68.100,5.630,652.800,,,
8859,1975-02-27 20:27:45,259.30,67.920,10.830,506.800,,,
8860,1975-02-27 20:29:45,258.70,65.610,29.870,741.300,,,
