In [6]:
import pandas as pd
from math import radians, sin, cos, sqrt, atan2
import os
import numpy as np

def haversine(lat1, lon1, lat2, lon2):
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    distance = 6371 * c 
    return distance

def load_data_from_feather(daily_ds):
    if isinstance(daily_ds, bytes):
        daily_ds = daily_ds.decode('utf-8')    
    df = pd.read_feather('model_collocated_10d_filled/'+daily_ds+'.feather')
    asal_height_cols = [col for col in df.columns if col.startswith('ASAL_height')]
    ctemp_height_cols = [col for col in df.columns if col.startswith('CTEMP_height')]

    target_points = [(0, -130), (30, -40), (-50,30), (-35,100)] 
    
    closest_points_df = pd.DataFrame(columns=df.columns)
    
    for target_lat, target_lon in target_points:
        df['Distance_to_point'] = df.apply(lambda row: haversine(row['LATITUDE'], row['LONGITUDE'], target_lat, target_lon), axis=1)
        closest_indices = df[df['Distance_to_point'] == df['Distance_to_point'].min()].index
        closest_points_df = pd.concat([closest_points_df, df.loc[closest_indices]], ignore_index=True)
    
    other_vars = ['LATITUDE', 'LONGITUDE', 'SSS', 'SST', 'SSH', 'MLD', 'UO', 'VO']
    
    new_df = closest_points_df[other_vars]
    
    height_dfs = []
    for asal_col, ctemp_col in zip(asal_height_cols, ctemp_height_cols):
        height_df = new_df.copy()
        height_df['HEIGHT'] = asal_col.split('_height')[-1]
        height_df['ASAL'] = closest_points_df[asal_col] 
        height_df['CTEMP'] = closest_points_df[ctemp_col]  
        height_dfs.append(height_df)
    
    final_df = pd.concat(height_dfs, ignore_index=True)
    final_df = final_df.dropna()

    pivot_table = final_df.pivot_table(index=['LATITUDE', 'LONGITUDE', 'HEIGHT'], values=['SSS', 'SST', 'SSH', 'MLD', 'UO', 'VO', 'ASAL', 'CTEMP'])

    filename = os.path.basename(daily_ds)
    date_str = filename.split('.')[0]  # Remove the file extension
    date = pd.to_datetime(date_str, format='%Y%m%d')
    day_of_year = date.dayofyear

    pivot_table['DATE'] = day_of_year
    pivot_table = pivot_table.reset_index()
    pivot_table = pivot_table.astype(float)
    pivot_table = pivot_table[['LATITUDE', 'LONGITUDE', 'HEIGHT', 'SSS', 'SST', 'SSH', 'MLD', 'UO', 'VO', 'DATE', 'ASAL', 'CTEMP']]

    groups = pivot_table.groupby(['LATITUDE', 'LONGITUDE'])
    
    arrays = []
    
    n_row = 46
    
    with open('min_max_values.json', 'r') as f:
        min_max_values = json.load(f)
        
    for _, group in groups:
        if len(group) == n_row:
            group_sorted = group.sort_values(by='HEIGHT')
            for column in min_max_values['min_values'].keys():
                min_value = min_max_values['min_values'][column]
                max_value = min_max_values['max_values'][column]
                group_sorted[column] = (group_sorted[column] - min_value) / (max_value - min_value)

            array = group_sorted.values
            if (np.any(array)):
                arrays.append(array)
    
    all_arr = np.stack(arrays)
    X = all_arr[:,:,0:10]
    y = all_arr[:,:,10:12]
    return X, y

In [8]:
import pandas as pd
import os

input_data = 'model_collocated_10d_filled/'

import xarray as xr
import pandas as pd
import numpy as np
from keras.models import load_model
import json 

model = load_model('model-3.keras')

date = '20221029'

profiles, _ = load_data_from_feather(date)
print(np.shape(profiles))
p = model.predict(profiles)
p

with open('min_max_values.json', 'r') as f:
        min_max_values = json.load(f)

asal_pred = p[:,:,0] * (min_max_values['max_values']['ASAL'] - min_max_values['min_values']['ASAL']) + min_max_values['min_values']['ASAL']
ctemp_pred = p[:,:,1] * (min_max_values['max_values']['CTEMP'] - min_max_values['min_values']['CTEMP']) + min_max_values['min_values']['CTEMP']

ds = np.concatenate((asal_pred, ctemp_pred), axis=1)
print(np.shape(ds))
np.savetxt('model-3_prof.csv', ds, delimiter=",", fmt='%f', header="")

  closest_points_df = pd.concat([closest_points_df, df.loc[closest_indices]], ignore_index=True)


(4, 46, 10)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 525ms/step
(4, 92)
