In [None]:
%pip install fcx-playground

In [None]:
import os
import shutil
import boto3
import numpy as np
import pandas as pd
import xarray as xr
import zarr

from typing import Generator

from fcx_playground.fcx_dataprocess.tiles_rad_range import RadRangeTilesPointCloudDataProcess

In [None]:
class HIWRAPTilesPointCloudDataProcess(RadRangeTilesPointCloudDataProcess):
    def _cleaning(self, data: xr.Dataset) -> xr.Dataset:
      # data extraction
      # scrape necessary data columns 
      extracted_data = data[['time', 'ref', 'lat', 'lon', 'alt', 'roll', 'pitch', 'head', 'range']]
      return extracted_data

    def _transformation(self, data: xr.Dataset) -> pd.DataFrame:
      #  transform the data to a suitable data formatting
      hour = data['time'].values
      lat = data['lat'].values
      lon = data['lon'].values
      alt = data['alt'].values # altitude of aircraft in meters
      roll = data["roll"].values
      pitch = data["pitch"].values
      head = data["head"].values
      ref = data['ref'].values #CRS radar reflectivity #2d data
      rad_range = data["range"].values # has lower count than ref
      
      # time correction and conversion:
      base_time = self._get_date_from_url(self.url)
      hour = self._add24hr(hour)
      delta = (hour * 3600).astype('timedelta64[s]') + base_time
      time = (delta - np.datetime64('1970-01-01')).astype('timedelta64[s]').astype(np.int64)

      # transform ref to 1d array and repeat other columns to match data dimension

      num_col = ref.shape[0] # number of cols
      num_row = ref.shape[1] # number of rows

      time = np.repeat(time, num_row)
      lon = np.repeat(lon, num_row)
      lat = np.repeat(lat, num_row)
      alt = np.repeat(alt, num_row)
      roll = np.repeat(roll * self.to_rad, num_row)
      pitch = np.repeat(pitch * self.to_rad, num_row)
      head = np.repeat(head * self.to_rad, num_row)
      rad_range = np.tile(rad_range, num_col)
      ref = ref.flatten()

      # curtain creation

      x, y, z = self._down_vector(roll, pitch, head)
      x = np.multiply(x, np.divide(rad_range, 111000 * np.cos(lat * self.to_rad)))
      y = np.multiply(y, np.divide(rad_range, 111000))
      z = np.multiply(z, rad_range)
      lon = np.add(-x, lon)
      lat = np.add(-y, lat)
      alt = np.add(z, alt)

      # sort by time

      sort_idx = np.argsort(time)
      lon = lon[sort_idx]
      lat = lat[sort_idx]
      alt = alt[sort_idx]
      ref = ref[sort_idx]
      time = time[sort_idx]

      # remove nan and infinite using mask (dont use masks filtering for values used for curtain creation)

      mask = np.logical_and(np.isfinite(ref), alt > 0)
      time = time[mask]
      ref = ref[mask]
      lon = lon[mask]
      lat = lat[mask]
      alt = alt[mask]


      df = pd.DataFrame(data = {
        'time': time,
        'lon': lon,
        'lat': lat,
        'alt': alt,
        'ref': ref
      })

      return df

    def _get_date_from_url(self, url: str) -> np.datetime64:
      # get date from url
      # date is in the format of YYYYMMDD
      # eg. 20190801
      date = url.split("HS3_HIWRAP_")[1].split("_")[0]
      np_date = np.datetime64('{}-{}-{}'.format(date[:4], date[4:6], date[6:]))
      return np_date

In [None]:
obj = HIWRAPTilesPointCloudDataProcess()

In [None]:
# %pip install netCDF4

In [6]:
data = obj.ingest("../../../../test_data/HS3_HIWRAP_20130925_kuinnerchirp_175902-183052_v03.nc")

In [7]:
data

In [8]:
pre_processed_data = obj.preprocess(data)

IndexError: list index out of range