In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import boto3
#import configuration location and filepath
from myconfig import *


In [None]:
#get csv files
#out: merged_df.to_csv("../data/4min_pass_wave_salt_temp.csv", index=True)

data_location_01 = output_path_ecco + "trench_dive_temperature.csv"
data_location_02 = output_path_ecco + "trench_dive_salinity.csv"
data_location_03 = output_path_ecco + "trench_dive_current_U.csv"
data_location_04 = output_path_ecco + "trench_dive_current_V.csv"
data_location_05 = output_path_ecco + "trench_dive_current_W.csv"
data_location_06 = output_path_nrt + "buoy_nrt.csv"
data_location_07 = output_path_nrt + "seismic_7200.csv"
data_location_08 = output_path_dolphin + "dolphins_01.csv"
data_location_09 = output_path_nrt + "nrt_raining.csv"
data_location_10 = output_path_nrt + "goes_timeseries_nrt.csv"
data_location_11 = output_path_dolphin + "dolphins.csv"

df_temp = pd.read_csv(data_location_01)
df_salinity = pd.read_csv(data_location_02)
df_U = pd.read_csv(data_location_03)
df_V = pd.read_csv(data_location_04)
df_W = pd.read_csv(data_location_05)
df_buoy = pd.read_csv(data_location_06)
df_seismic = pd.read_csv(data_location_07)
df_creatures = pd.read_csv(data_location_08)
df_rain = pd.read_csv(data_location_09)
df_clouds = pd.read_csv(data_location_10)
df_dolphins = pd.read_csv(data_location_11)

In [None]:
# Function for min-max normalization per column
def min_max_normalize_column(df):
    return df.apply(lambda col: (col - col.min()) / (col.max() - col.min()), axis=0)

# Load the CSV files into DataFrames
df_temp = pd.read_csv(data_location_01)
df_salinity = pd.read_csv(data_location_02)
df_U = pd.read_csv(data_location_03)
df_V = pd.read_csv(data_location_04)
df_W = pd.read_csv(data_location_05)

#get pressure
# Constants
rho = 1025  # kg/m³, density of seawater
g = 9.81  # m/s², gravitational acceleration
P0 = 101325  # Pa, atmospheric pressure at sea level

# Calculate pressure in Pascals using the absolute value of the depth
df_temp['Pressure'] = P0 + rho * g * abs(df_temp['depth'])

# List of the original DataFrames
dfs = [df_temp, df_salinity, df_U, df_V, df_W]

# Process each DataFrame
new_dfs = []
for df in dfs:
    # Normalize each column in the DataFrame using min-max normalization
    normalized_df = min_max_normalize_column(df)
    
    # Get the first row of the original DataFrame
    first_values = normalized_df.iloc[0]
    
    # Create a new DataFrame with 7200 rows
    # Fill the first 1800 rows with the first value of each respective column
    new_df = pd.DataFrame(index=range(7200), columns=df.columns)

    # Assign the first 1800 rows with the first values of the original DataFrame columns
    new_df.iloc[:1800] = first_values

    # Insert the normalized data starting from index 1800
    new_df.iloc[1800:1800 + len(normalized_df), :] = normalized_df.values

    # Eliminate NaN (forward fill)
    new_df = new_df.ffill()
    
    # Append the new DataFrame to the list
    new_dfs.append(new_df)

# Unpack the new DataFrames into respective variables
new_df_temp, new_df_salinity, new_df_U, new_df_V, new_df_W = new_dfs


In [None]:
#df_temp['Pressure'].max()

In [None]:
#new_df_W.head()

In [None]:
import matplotlib.cm as cm

# Define the number of unique colors equal to the number of columns
colors = cm.get_cmap('tab20', len(df_temp.columns))  # Using 'tab10' colormap (or any other colormap)

# Create a single plot for all columns
fig, ax1 = plt.subplots(figsize=(16, 4))

# Loop through each column and plot on the same graph
for idx, column in enumerate(df_temp.columns):
    ax1.plot(df_temp.index, df_temp[column], color=colors(idx), label=column)

# Set labels for the x and y axes
ax1.set_xlabel('Index')
ax1.set_ylabel('Values')

# Add a combined legend for all columns and place it below the plot
ax1.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=len(normalized_df.columns))

# Set a title for the plot
plt.title('All Channels on the Same Graph')

# Adjust layout to make room for the legend
plt.tight_layout()

# Save the figure
#plt.savefig(output_path_figures+"dive.png")

# Display the plot
#plt.show()

In [None]:
#new_df_salinity.head()

In [None]:
#df_buoy.head()

In [None]:
#dolphin sidequest
df_dolphins = df_dolphins[["Lizzy_angle","Perry_angle","BentHighNick_angle"]]
df_dolphins = df_dolphins.astype(float)
df_dolphins.fillna(0.0, inplace=True)
df_dolphins_normalized = min_max_normalize_column(df_dolphins)

# Remove rows where all three specified columns are 0.0
filtered_df_dolphins = df_dolphins_normalized[~((df_dolphins_normalized["Lizzy_angle"] == 0.0) & (df_dolphins_normalized["Perry_angle"] == 0.0) & (df_dolphins_normalized["BentHighNick_angle"] == 0.0))]

# Reset the index to reindex the DataFrame
filtered_df_dolphins = filtered_df_dolphins.reset_index(drop=True)

# Step 1: Create a DataFrame with 7200 rows and 3 columns, all filled with 0.0
buffered_df_dolphins = pd.DataFrame(0.0, index=range(7200), columns=filtered_df_dolphins.columns)

# Step 2: Copy the data from `filtered_df_dolphins` starting at row 2541
buffered_df_dolphins.iloc[2541:2541+len(filtered_df_dolphins)] = filtered_df_dolphins.values

# The final DataFrame `buffered_df_dolphins` will have the required structure
#buffered_df_dolphins.shape

In [None]:
col1 = new_df_temp['depth']  # normalize
col2 = new_df_temp['Pressure']  # normalize
col3 = new_df_temp['temperature']  # normalize
col4 = new_df_salinity['salinity'] # normalize
col5 = new_df_U['U'] 
col6 = new_df_V['current']
col7 = new_df_W['W']

col8 = df_buoy['WVHT'] 
col9 = df_buoy['ATMP'] 
col10 = df_buoy['WSPD'] 
col11 = df_seismic['Sample'] #note space
col12 = df_buoy['sun_exposure']*df_buoy['WSPD']

col13 = df_rain['rain']
# Select the columns from df_clouds
col14 = df_clouds[["N", "NE", "E", "SE", "S", "SW", "W", "NW"]]

col15 = buffered_df_dolphins[["Lizzy_angle","Perry_angle","BentHighNick_angle"]]

merged_df = pd.DataFrame({'depth': col1, 'pressure': col2, 'temperature': col3, 'glitter':col12})
normalized_df = merged_df.apply(lambda x: (x - x.min()) / (x.max() - x.min()))

#add ECCO Model, already normalized: 
normalized_df['salinity'] = col4
normalized_df['U_current'] = col5
normalized_df['V_current'] = col6 
normalized_df['W_current'] = col7

#add buoy, already normalized
normalized_df["buoy_height"] = col8
normalized_df["buoy_temp"] = col9 
normalized_df["buoy_windspeed"] = col10
#add Seismic, already normalized
normalized_df["seismic"] = col11
#add Rain, already normalized
normalized_df["rain"] = col13
#add Clouds, already normalized
# Assign them to normalized_df
normalized_df[["N", "NE", "E", "SE", "S", "SW", "W", "NW"]] = col14

normalized_df[["Lizzy","Perry","BentHighNick"]] = col15

normalized_df.fillna(0.0, inplace=True)

In [None]:
#df_buoy.head(10)

In [None]:
import matplotlib.cm as cm

# Define the number of unique colors equal to the number of columns
colors = cm.get_cmap('tab20', len(normalized_df.columns))  # Using 'tab10' colormap (or any other colormap)

# Create a single plot for all columns
fig, ax1 = plt.subplots(figsize=(16, 4))

# Loop through each column and plot on the same graph
for idx, column in enumerate(normalized_df.columns):
    ax1.plot(normalized_df.index, normalized_df[column], color=colors(idx), label=column)

# Set labels for the x and y axes
ax1.set_xlabel('Index')
ax1.set_ylabel('Values')

# Set a title for the plot
plt.title('All Channels on the Same Graph')

# Adjust layout to make room for the legend
plt.tight_layout()

# Save the figure
plt.savefig(output_path_figures+"dive_nrt.png")

# Display the plot
#plt.show()


In [None]:
plt.figure(figsize=(16, 4))

# Plot the 'glitter' column from normalized_df
plt.plot(normalized_df.index, normalized_df["glitter"], color=colors(idx), label="Glitter")

# Set labels for the x and y axes
plt.xlabel('Index')
plt.ylabel('Values')

# Set a title for the plot
plt.title('Glitter Only')

# Add a legend to the plot
plt.legend()

# Adjust layout to make room for the legend
plt.tight_layout()

# Save the figure (uncomment the line below to save the image)
# plt.savefig(output_path_figures+"dive.png")

# Display the plot
#plt.show()

In [None]:
plt.figure(figsize=(16, 4))

# Plot the 'glitter' column from normalized_df
plt.plot(normalized_df.index, normalized_df["salinity"], color=colors(idx), label="Salinity")

# Set labels for the x and y axes
plt.xlabel('Index')
plt.ylabel('Values')

# Set a title for the plot
plt.title('Glitter Only')

# Add a legend to the plot
plt.legend()

# Adjust layout to make room for the legend
plt.tight_layout()

# Save the figure (uncomment the line below to save the image)
# plt.savefig(output_path_figures+"dive.png")

# Display the plot
#plt.show()

In [None]:
#normalized_df.head(10)

In [None]:
file_name = output_path_nrt + '12min_pass_v08.csv' 
normalized_df.to_csv(file_name, index=True)

In [None]:


# Create an S3 client
s3 = boto3.client('s3')

# Specify your bucket name and file
bucket_name = 'heart-ocean-public'
file_name_out = 'nrt-data/12min_pass_v08.csv' 

# Upload the file
s3.upload_file(file_name, bucket_name, file_name_out)
