# Figure (a)-(b)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import cmocean
import gsw
from scipy.ndimage import uniform_filter1d

# Define the file path
file_path = '/mnt/d/Data/Argo/Argo/PR_PF_7900334.csv'

# Extract the Argo float number from the filename
file_name = os.path.basename(file_path)
argo_float_number = file_name.split('_')[2].split('.')[0]

# Load the CSV file
df = pd.read_csv(file_path)

# Ensure the correct data types
df['DATE (YYYY-MM-DDTHH:MI:SSZ)'] = pd.to_datetime(df['DATE (YYYY-MM-DDTHH:MI:SSZ)'])
df['PRES (decibar)'] = pd.to_numeric(df['PRES_ADJUSTED (decibar)'], errors='coerce')
df['TEMP (degree_Celsius)'] = pd.to_numeric(df['TEMP_ADJUSTED (degree_Celsius)'], errors='coerce')
df['latitude'] = pd.to_numeric(df['LATITUDE (degree_north)'], errors='coerce')
df['longitude'] = pd.to_numeric(df['LONGITUDE (degree_east)'], errors='coerce')
df['PSAL_ADJUSTED (psu)'] = pd.to_numeric(df['PSAL_ADJUSTED (psu)'], errors='coerce')

# Define the rectangle box coordinates
rect_min_lon, rect_max_lon = 20, 70
rect_min_lat, rect_max_lat = -70, -60

# Ensure no missing values for tricontourf
df = df.dropna(subset=['PRES (decibar)', 'TEMP (degree_Celsius)', 'PSAL_ADJUSTED (psu)'])

# Filter the data for the defined rectangle
df = df[(df['longitude'] >= rect_min_lon) & 
        (df['longitude'] <= rect_max_lon) & 
        (df['latitude'] >= rect_min_lat) & 
        (df['latitude'] <= rect_max_lat)]

# Convert dates to numeric values (timestamps) for plotting
df['timestamp'] = df['DATE (YYYY-MM-DDTHH:MI:SSZ)'].apply(lambda x: x.timestamp())
df = df[df['timestamp'] <= pd.Timestamp('2017-03-30').timestamp()]

# Extract unique profiles (timestamps) for marking
unique_profiles = df['timestamp'].drop_duplicates()

# Calculate Absolute Salinity (SA), Conservative Temperature (CT), and Density
df['SA'] = gsw.SA_from_SP(df['PSAL_ADJUSTED (psu)'], 
                          df['PRES (decibar)'], 
                          df['longitude'], 
                          df['latitude'])
df['CT'] = gsw.CT_from_t(df['SA'], 
                         df['TEMP (degree_Celsius)'], 
                         df['PRES (decibar)'])
df['density'] = gsw.sigma0(df['SA'], df['CT'])  # potential density anomaly

# --------------------------
# Calculate Mixed Layer Depth (MLD)
# --------------------------
def calculate_mld(profile):
    # Get the surface density (first value in the profile)
    surface_density = profile['density'].iloc[0]
    # Find the first index where the density exceeds the surface density by 0.03 kg/m^3
    mld_index = (profile['density'] > (surface_density + 0.03)).idxmax()
    return profile['PRES (decibar)'].loc[mld_index] if mld_index else np.nan

# Group profiles by lat-lon (a simple grouping)
df['profile'] = df.groupby(['LATITUDE (degree_north)', 'LONGITUDE (degree_east)']).ngroup()
mld_values = df.groupby('profile').apply(calculate_mld).reset_index(level=0, drop=True)

# Merge MLD values back to the DataFrame (using unique profile rows)
u_profiles = df.drop_duplicates(subset=['profile'])
u_profiles = u_profiles.copy()  # avoid SettingWithCopyWarning
u_profiles['mld'] = u_profiles['profile'].map(mld_values)

# For plotting MLD, get the timestamps and mld values per profile
timestamps = u_profiles['timestamp']
mld = u_profiles['mld']
mld_smoothed = uniform_filter1d(mld, size=5)

# --------------------------
# Define Vertical Lines and Date Ticks
# --------------------------
vertical_lines_dates = ['2016-08-08', '2016-09-02']
vertical_lines_timestamps = [pd.Timestamp(date).timestamp() for date in vertical_lines_dates]

# Generate ticks for months and years from the data timestamps
date_ticks_months = pd.date_range(start=pd.to_datetime(df['timestamp'], unit='s').min(), 
                                  end=pd.to_datetime(df['timestamp'], unit='s').max(), freq='M')
month_timestamps = date_ticks_months.map(lambda x: x.timestamp())

date_ticks_years = pd.date_range(start=pd.to_datetime(df['timestamp'], unit='s').min(), 
                                 end=pd.to_datetime(df['timestamp'], unit='s').max(), freq='YS')
year_timestamps = date_ticks_years.map(lambda x: x.timestamp())
year_labels = date_ticks_years.strftime('%Y')

# --------------------------
# Plot Temperature and Salinity Contours with MLD Overlay
# --------------------------
plt.figure(figsize=(16, 10))

# Temperature contour plot (subplot 1)
plt.subplot(2, 1, 1)
l_temp = np.arange(-1.9, 2, 0.1)
contour_temp = plt.tricontourf(df['timestamp'], df['PRES (decibar)'], df['CT'], levels=l_temp, cmap=cmocean.cm.thermal)
plt.ylim(2000, 0)
cbar_temp = plt.colorbar(contour_temp)
cbar_temp.set_label('Temperature (°C)', fontsize=15)
cbar_temp.ax.tick_params(labelsize=14)
plt.plot(timestamps, mld, color='white', linestyle='-', markersize=4, label='MLD')
plt.ylabel('Depth (dbar)', fontsize=15)
plt.yticks(np.arange(0, 2001, 500), fontsize=15)
for vline in vertical_lines_timestamps:
    plt.axvline(x=vline, color='red', linestyle='--', linewidth=1.5)
plt.xticks(year_timestamps, year_labels, fontsize=14, rotation=0)
plt.tick_params(axis='x', which='minor', direction='out', length=5, color='black')
ax = plt.gca()
ax.set_xticks(month_timestamps, minor=True)
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
ax2.set_xticks(unique_profiles)
ax2.tick_params(axis='x', direction='out', length=5, color='black')
ax2.set_xticklabels([])

# Salinity contour plot (subplot 2)
plt.subplot(2, 1, 2)
# You can adjust levels; here we let tricontourf choose levels (or specify if needed)
contour_sal = plt.tricontourf(df['timestamp'], df['PRES (decibar)'], df['SA'], levels=50, cmap=cmocean.cm.haline)
plt.ylim(2000, 0)
cbar_sal = plt.colorbar(contour_sal)
cbar_sal.set_label('Salinity (psu)', fontsize=15)
cbar_sal.ax.tick_params(labelsize=14)
plt.plot(timestamps, mld, color='white', linestyle='-', label='MLD')
plt.ylabel('Depth (dbar)', fontsize=15)
plt.yticks(np.arange(0, 2001, 500), fontsize=15)
for vline in vertical_lines_timestamps:
    plt.axvline(x=vline, color='red', linestyle='--', linewidth=1.5)
plt.xticks(year_timestamps, year_labels, fontsize=14, rotation=0)
plt.tick_params(axis='x', which='minor', direction='out', length=5, color='black')
ax = plt.gca()
ax.set_xticks(month_timestamps, minor=True)
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
ax2.set_xticks(unique_profiles)
ax2.tick_params(axis='x', direction='out', length=5, color='black')
ax2.set_xticklabels([])

plt.xlabel('Date', fontsize=15)
plt.tight_layout()
# plt.savefig(f'/home/soumya/Backup/Plots/Argo/{argo_float_number}_temp_salinity.png', dpi=300)
plt.show()


# Figure 3 (c)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates
import gsw

# Define the file path
file_path = '/mnt/d/Data/Argo/Argo/PR_PF_7900334.csv'

# Load the CSV file
df = pd.read_csv(file_path)

# Ensure the correct data types
df['DATE (YYYY-MM-DDTHH:MI:SSZ)'] = pd.to_datetime(df['DATE (YYYY-MM-DDTHH:MI:SSZ)'])
df['PRES (decibar)'] = pd.to_numeric(df['PRES (decibar)'], errors='coerce')
df['TEMP (degree_Celsius)'] = pd.to_numeric(df['TEMP_ADJUSTED (degree_Celsius)'], errors='coerce')
df['latitude'] = pd.to_numeric(df['LATITUDE (degree_north)'], errors='coerce')
df['longitude'] = pd.to_numeric(df['LONGITUDE (degree_east)'], errors='coerce')
df['PSAL_ADJUSTED (psu)'] = pd.to_numeric(df['PSAL_ADJUSTED (psu)'], errors='coerce')

# Define the rectangle box coordinates
rect_min_lon, rect_max_lon = 20, 70
rect_min_lat, rect_max_lat = -70, -60

# Filter the DataFrame based on the defined coordinates
df = df[
        (df['longitude'] >= rect_min_lon) & 
        (df['longitude'] <= rect_max_lon) & 
        (df['latitude'] >= rect_min_lat) & 
        (df['latitude'] <= rect_max_lat)
    ]

# Convert dates to numeric values (timestamps) for plotting
df['timestamp'] = df['DATE (YYYY-MM-DDTHH:MI:SSZ)'].apply(lambda x: x.timestamp())
df = df[(df['timestamp'] <= pd.Timestamp('2017-03-30').timestamp())]

# Calculate Absolute Salinity (SA), Conservative Temperature (CT), and Density
df['SA'] = gsw.SA_from_SP(df['PSAL_ADJUSTED (psu)'], df['PRES (decibar)'], df['longitude'], df['latitude'])
df['CT'] = gsw.CT_from_t(df['SA'], df['TEMP (degree_Celsius)'], df['PRES (decibar)'])
df['density'] = gsw.sigma0(df['SA'], df['CT'])  # Calculate potential density anomaly

vertical_lines_dates = ['2016-08-08', '2016-09-02']
vertical_lines_timestamps = [pd.Timestamp(date).timestamp() for date in vertical_lines_dates]

# Function to calculate the Mixed Layer Depth (MLD) based on density
def calculate_mld(profile):
    surface_density = profile['density'].iloc[0]  # Get surface density
    mld_index = (profile['density'] > (surface_density + 0.03)).idxmax()  # Find MLD based on density threshold
    return profile['PRES (decibar)'].loc[mld_index] if mld_index else np.nan

# Calculate MLD for each profile
df['profile'] = df.groupby(['LATITUDE (degree_north)', 'LONGITUDE (degree_east)']).ngroup()  # Group profiles by lat-lon
mld_values = df.groupby('profile').apply(calculate_mld).reset_index(level=0, drop=True)

# Ensure MLD values are correctly mapped back to the profiles
df = df.merge(mld_values.rename('mld'), left_on='profile', right_index=True, how='left')

# Extract unique profiles for the line plot
u_profiles = df.drop_duplicates(subset=['profile'])
timestamps = pd.to_datetime(u_profiles['timestamp'], unit='s')
mld = u_profiles['mld']

# Calculate the salinity at the mixed layer and 250m depth
def get_mixed_layer_and_250m_salinity(profile):
    # Mixed layer salinity (nearest to MLD)
    mld_value = profile['mld'].iloc[0]
    mixed_layer = profile.loc[(profile['PRES (decibar)'] <= mld_value), :]
    mixed_layer_sal = mixed_layer['PSAL_ADJUSTED (psu)'].mean() if not mixed_layer.empty else np.nan

    # Salinity at 250 dbar
    sal_250m = profile.loc[np.abs(profile['PRES (decibar)'] - 250).idxmin()]['PSAL_ADJUSTED (psu)']

    return pd.Series([mixed_layer_sal, sal_250m])

# Apply the function to extract mixed layer and 250m salinity values
salinity_values = df.groupby('profile').apply(get_mixed_layer_and_250m_salinity)
salinity_values.columns = ['mixed_layer_sal', 'sal_250m']

# Merge salinity values with profile data
df = pd.merge(u_profiles, salinity_values, left_on='profile', right_index=True)

# Calculate the salinity difference between MLD and 250m
df['salinity_difference'] = df['mixed_layer_sal'] - df['sal_250m']


    
# Plot the salinity difference
plt.figure(figsize=(16, 6))
plt.plot(timestamps, df['salinity_difference'], color='purple', marker='s', label='Salinity Difference (MLD - 250m)')
plt.ylabel('Salinity Difference (PSU)', fontsize=14)
# plt.xlabel('Date', fontsize=14)
# plt.legend()
# plt.grid(True)
# Add vertical lines for specific dates
for vline in vertical_lines_dates:
    plt.axvline(
        x=pd.Timestamp(vline),
        color='red',
        linestyle='--',
        linewidth=1.5,
        label=f'Event: {vline}' if vline == vertical_lines_dates[0] else None
    )
x_min, x_max = timestamps.min(), timestamps.max()
# Set x-axis limits to avoid extra spaces
plt.xlim([x_min, x_max])
# Format the x-axis to show year-month
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.tick_params(axis='both', which='major', labelsize=14)
plt.xticks(rotation=45)

plt.tight_layout()
plt.savefig('/home/soumya/Backup/Plots/Argo/mld_sal_250m_diff.png', dpi=400)
plt.show()
