# Accessing EM-APEX & ANDRO Data

In [1]:
#All the imports I utalized to allow any code to run across any of my notebooks
import xarray as xr
xr.set_options(display_style="html", display_expand_attrs=False);
from matplotlib import pyplot as plt
plt.style.use('default')
import cartopy
import cartopy.crs as ccrs
import argopy
import numpy as np
import os, shutil
import pandas as pd
from pathlib import Path
import seawater as sw

## Accessing EM-APEX Data

As was mentioned in the .README file, you must first download the EM-APEX data files you wish to use directly to your directory. The following code will instead focus on learning how to navigate through this data, or at least provide a quick overview as to how I structured it to work with each of my graphs!

The EM-APEX Data provided in the google drive link is divided into four different types, each organized and explained here.

### Type 1 - All CTD data of EM-APEX Experiments

One thing to note here is that the CTD files that have all the data for SMILE had more data points in general than the files with velocity data. This is why I use the overall GPS file for trajectories, the overall CTD file for CTD and T/S diagrams, and then the interpolated velocity-gps file for my velocity graphs.

This shows the raw form of the CTD dataset

In [2]:
EM_APEX_CTD = r"C:\Users\lily\Documents\ElizabethFiles2023\DINO SIP Documents\ctd_points_allexperiments.parquet" #Make sure to replace the path with your own depending on where you stored the file in your directories!

EM_APEX_CTDdata_df = pd.read_parquet(EM_APEX_CTD)

EM_APEX_CTDdata_df

Unnamed: 0,deployment,experiment,dive_nbr,P,T,S,pc,UXT,UXT_DT
0,4976a,DIMES,0001,25.30,5.3550,34.0960,39,1293711649,2010-12-30 12:20:49+00:00
1,4976a,DIMES,0001,27.35,5.3550,34.0950,39,1293711664,2010-12-30 12:21:04+00:00
2,4976a,DIMES,0001,29.58,5.3570,34.0950,39,1293711680,2010-12-30 12:21:20+00:00
3,4976a,DIMES,0001,31.74,5.3560,34.0950,39,1293711695,2010-12-30 12:21:35+00:00
4,4976a,DIMES,0001,34.44,5.3540,34.0950,39,1293711714,2010-12-30 12:21:54+00:00
...,...,...,...,...,...,...,...,...,...
4518820,4971n1,NISKINE,0560,10.06,1.6540,33.9020,115,1617404024,2021-04-02 22:53:44+00:00
4518821,4971n1,NISKINE,0560,7.76,0.7556,33.6520,117,1617404049,2021-04-02 22:54:09+00:00
4518822,4971n1,NISKINE,0560,5.56,0.6208,33.5651,119,1617404072,2021-04-02 22:54:32+00:00
4518823,4971n1,NISKINE,0560,3.38,0.5671,33.5349,121,1617404095,2021-04-02 22:54:55+00:00


This is how you slice the dataset to only include all rows whose experiment column has the value, SMILE, meaning limiting the data to only that of SMILE

In [3]:
EM_SMILE_df = EM_APEX_CTDdata_df.loc[EM_APEX_CTDdata_df["experiment"] == 'SMILE'] 
EM_SMILE_df

Unnamed: 0,deployment,experiment,dive_nbr,P,T,S,pc,UXT,UXT_DT
2585963,4968s1,SMILE,0001,36.12,20.413,35.216,9,1488948659,2017-03-08 04:50:59+00:00
2585964,4968s1,SMILE,0001,39.18,20.387,35.207,9,1488948678,2017-03-08 04:51:18+00:00
2585965,4968s1,SMILE,0001,41.48,20.364,35.206,9,1488948693,2017-03-08 04:51:33+00:00
2585966,4968s1,SMILE,0001,44.41,20.340,35.205,9,1488948712,2017-03-08 04:51:52+00:00
2585967,4968s1,SMILE,0001,47.33,20.325,35.205,9,1488948731,2017-03-08 04:52:11+00:00
...,...,...,...,...,...,...,...,...,...
3021752,7808s3,SMILE,0224,11.42,15.252,33.947,152,1490844693,2017-03-30 03:31:33+00:00
3021753,7808s3,SMILE,0224,8.91,15.353,33.946,152,1490844713,2017-03-30 03:31:53+00:00
3021754,7808s3,SMILE,0224,6.47,15.461,33.946,152,1490844733,2017-03-30 03:32:13+00:00
3021755,7808s3,SMILE,0224,4.23,15.482,33.942,152,1490844751,2017-03-30 03:32:31+00:00


In [4]:
#Generally if I use this dataset, I will include the following in the beginning of the notebook:
EM_APEX_CTD = r"C:\Users\lily\Documents\ElizabethFiles2023\DINO SIP Documents\ctd_points_allexperiments.parquet" #Make sure to replace the path with your own depending on where you stored the file in your directories!
EM_APEX_CTDdata_df = pd.read_parquet(EM_APEX_CTD)
EM_SMILE_df = EM_APEX_CTDdata_df.loc[EM_APEX_CTDdata_df["experiment"] == 'SMILE'] 

In [5]:
#Here are some other examples of limiting columns with numeric values:

EM_SMILE_df2 = EM_APEX_CTDdata_df.loc[(EM_APEX_CTDdata_df["experiment"] == 'SMILE') & (EM_APEX_CTDdata_df["S"] >= 33)] #This limits the data to everything with Salinity greater than or equal to 33.
EM_SMILE_df2

Unnamed: 0,deployment,experiment,dive_nbr,P,T,S,pc,UXT,UXT_DT
2585963,4968s1,SMILE,0001,36.12,20.413,35.216,9,1488948659,2017-03-08 04:50:59+00:00
2585964,4968s1,SMILE,0001,39.18,20.387,35.207,9,1488948678,2017-03-08 04:51:18+00:00
2585965,4968s1,SMILE,0001,41.48,20.364,35.206,9,1488948693,2017-03-08 04:51:33+00:00
2585966,4968s1,SMILE,0001,44.41,20.340,35.205,9,1488948712,2017-03-08 04:51:52+00:00
2585967,4968s1,SMILE,0001,47.33,20.325,35.205,9,1488948731,2017-03-08 04:52:11+00:00
...,...,...,...,...,...,...,...,...,...
3021752,7808s3,SMILE,0224,11.42,15.252,33.947,152,1490844693,2017-03-30 03:31:33+00:00
3021753,7808s3,SMILE,0224,8.91,15.353,33.946,152,1490844713,2017-03-30 03:31:53+00:00
3021754,7808s3,SMILE,0224,6.47,15.461,33.946,152,1490844733,2017-03-30 03:32:13+00:00
3021755,7808s3,SMILE,0224,4.23,15.482,33.942,152,1490844751,2017-03-30 03:32:31+00:00


In [6]:
#This is how you split the dataset up into sections of the SMILE experiment.

Section_1 = ["4968s1", "4970s1", "6669s1", "6677s1", "6680s1", "7493s1", "7495s1", "7805s1", "7805s1a", "7806s1"]

Section_2 = ["4392s2", "4968s2", "4970s2", "6668s2", "6669s2", "6670s2", "6677s2", "6679s2", "6680s2", "7493s2", "7494s2", "7495s2", "7800s2", "7802s2", "7804s2", "7805s2", "7806s2"]

Section_3 = ["4392s3", "4968s3", "4970s3", "6665s3", "6668s3", "6669s3", "6670s3", "6673s3", "6676s3", "6677s3", "6679s3", "7494s3", "7493s3", "7800s3", "7801s3", "7802s3", "7803s3", "7804s3", "7805s3", "7806s3", "7807s3", "7808s3"]

EM_SMILECTD1_df = EM_SMILE_df.loc[EM_SMILE_df['deployment'].isin(Section_1)]
EM_SMILECTD2_df = EM_SMILE_df.loc[EM_SMILE_df['deployment'].isin(Section_2)]
EM_SMILECTD3_df = EM_SMILE_df.loc[EM_SMILE_df['deployment'].isin(Section_3)]

### Type 2 - All GPS data of EM-APEX Experiments

In [7]:
#This is how you access and read the file!
EM_APEX_GPS = r"C:\Users\lily\Documents\ElizabethFiles2023\DINO SIP Documents\gps_points_allexperiments.parquet" #Make sure to replace the path with your own depending on where you stored the file in your directories!
EM_APEX_GPSdata_df = pd.read_parquet(EM_APEX_GPS) #This reads the raw dataframe
EM_SMILEGPS_df = EM_APEX_GPSdata_df.loc[EM_APEX_GPSdata_df["experiment"] == 'SMILE'] #This limits the data to only the SMILE experiment

EM_SMILEGPS_df #This represents all GPS data of just the SMILE EM-APEX experiment

Unnamed: 0,deployment,experiment,dive_nbr,LAT,LON,STAT,NSAT,HDOP,ALT,AGE,REFSTN,UXT_GPS,UXT_APF9,UXT_DT
47899,4968s1,SMILE,0002,26.238247,-146.275868,1,3,1.5,,-1,-1,1488952431,1488952439,2017-03-08 05:53:59+00:00
47900,4968s1,SMILE,0002,26.238247,-146.275868,1,3,1.5,,-1,-1,1488952431,1488952439,2017-03-08 05:53:59+00:00
47901,4968s1,SMILE,0002,26.238247,-146.275868,1,3,1.5,,-1,-1,1488952431,1488952440,2017-03-08 05:54:00+00:00
47902,4968s1,SMILE,0002,26.238247,-146.275868,1,3,1.5,,-1,-1,1488952431,1488952440,2017-03-08 05:54:00+00:00
47903,4968s1,SMILE,0002,26.238247,-146.275868,1,3,1.5,,-1,-1,1488952431,1488952441,2017-03-08 05:54:01+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
339220,7808s3,SMILE,0236,35.230860,-139.652022,1,9,0.9,-5.1,-1,-1,1490851905,1490851916,2017-03-30 05:31:56+00:00
339221,7808s3,SMILE,0236,35.230860,-139.652022,1,9,0.9,-5.1,-1,-1,1490851905,1490851916,2017-03-30 05:31:56+00:00
339222,7808s3,SMILE,0236,35.230860,-139.652022,1,9,0.9,-5.1,-1,-1,1490851905,1490851917,2017-03-30 05:31:57+00:00
339223,7808s3,SMILE,0236,35.230860,-139.652022,1,9,0.9,-5.1,-1,-1,1490851905,1490851917,2017-03-30 05:31:57+00:00


In [8]:
#This is how you split the file up into sections of the SMILE experiment.

Section_1 = ["4968s1", "4970s1", "6669s1", "6677s1", "6680s1", "7493s1", "7495s1", "7805s1", "7805s1a", "7806s1"]

Section_2 = ["4392s2", "4968s2", "4970s2", "6668s2", "6669s2", "6670s2", "6677s2", "6679s2", "6680s2", "7493s2", "7494s2", "7495s2", "7800s2", "7802s2", "7804s2", "7805s2", "7806s2"]

Section_3 = ["4392s3", "4968s3", "4970s3", "6665s3", "6668s3", "6669s3", "6670s3", "6673s3", "6676s3", "6677s3", "6679s3", "7494s3", "7493s3", "7800s3", "7801s3", "7802s3", "7803s3", "7804s3", "7805s3", "7806s3", "7807s3", "7808s3"]

EM_SMILEGPS1_df = EM_APEX_GPSdata_df.loc[EM_APEX_GPSdata_df['deployment'].isin(Section_1)]
EM_SMILEGPS2_df = EM_APEX_GPSdata_df.loc[EM_APEX_GPSdata_df['deployment'].isin(Section_2)]
EM_SMILEGPS3_df = EM_APEX_GPSdata_df.loc[EM_APEX_GPSdata_df['deployment'].isin(Section_3)]


### Type 3 - All Velocity data of EM-APEX Experiments

In [9]:
#This represents all velocity data of just the SMILE EM-APEX experiment
EM_APEX_Velocity = r"C:\Users\lily\Documents\ElizabethFiles2023\DINO SIP Documents\vel_points_allexperiments.parquet"
EM_APEX_Vdata1_df = pd.read_parquet(EM_APEX_Velocity)
EM_APEX_Vdata2_df = EM_APEX_Vdata1_df.loc[EM_APEX_Vdata1_df["experiment"] == 'SMILE']

Then I did end up modifying the table a little bit; since there were two measurements of both Eastward and Northward velocity, I averaged them and then plotted the average velocity!

In [10]:
EM_APEX_Vdata2_df['u'] = EM_APEX_Vdata2_df.apply(lambda row: (row['u1'] + row['u2']) / 2, axis=1)
EM_APEX_Vdata2_df['v'] = EM_APEX_Vdata2_df.apply(lambda row: (row['v1'] + row['v2']) / 2, axis=1)
EM_APEX_Vdata2_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  EM_APEX_Vdata2_df['u'] = EM_APEX_Vdata2_df.apply(lambda row: (row['u1'] + row['u2']) / 2, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  EM_APEX_Vdata2_df['v'] = EM_APEX_Vdata2_df.apply(lambda row: (row['v1'] + row['v2']) / 2, axis=1)


Unnamed: 0,deployment,experiment,dive_nbr,P,T,S,u1,v1,verr1,u2,v2,verr2,W,e1mean,e2mean,piston,uxt,UXT_DT,u,v
1150805,4968s1,SMILE,0001,38.4,20.394,35.209,0.237,-0.158,0.008,0.219,-0.115,0.007,0.159,1360.9,-1049.4,9.0,1.488949e+09,2017-03-08 04:51:13+00:00,0.2280,-0.1365
1150806,4968s1,SMILE,0001,42.6,20.355,35.205,0.207,-0.157,0.008,0.202,-0.122,0.007,0.154,1360.3,-1049.3,9.0,1.488949e+09,2017-03-08 04:51:40+00:00,0.2045,-0.1395
1150807,4968s1,SMILE,0001,46.4,20.330,35.205,0.176,-0.150,0.007,0.178,-0.122,0.007,0.154,1359.7,-1049.3,9.0,1.488949e+09,2017-03-08 04:52:05+00:00,0.1770,-0.1360
1150808,4968s1,SMILE,0001,50.2,20.317,35.204,0.175,-0.148,0.005,0.165,-0.119,0.005,0.154,1359.5,-1049.0,9.0,1.488949e+09,2017-03-08 04:52:30+00:00,0.1700,-0.1335
1150809,4968s1,SMILE,0001,54.1,20.300,35.203,0.160,-0.152,0.008,0.154,-0.123,0.007,0.153,1359.4,-1048.4,9.0,1.488949e+09,2017-03-08 04:52:55+00:00,0.1570,-0.1375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1491077,7808s3,SMILE,0224,14.6,15.188,33.948,-0.067,0.065,0.014,-0.083,0.101,0.018,-0.127,1110.7,-97.8,152.0,1.490845e+09,2017-03-30 03:31:08+00:00,-0.0750,0.0830
1491078,7808s3,SMILE,0224,11.3,15.257,33.947,-0.052,0.041,0.020,-0.099,0.042,0.019,-0.125,1111.3,-98.7,152.0,1.490845e+09,2017-03-30 03:31:34+00:00,-0.0755,0.0415
1491079,7808s3,SMILE,0224,8.2,15.385,33.946,-0.030,0.048,0.027,-0.060,-0.006,0.025,-0.123,1110.8,-99.6,152.0,1.490845e+09,2017-03-30 03:31:59+00:00,-0.0450,0.0210
1491080,7808s3,SMILE,0224,5.0,15.475,33.943,-0.022,0.031,0.033,0.011,-0.035,0.032,-0.122,1110.5,-99.9,152.0,1.490845e+09,2017-03-30 03:32:25+00:00,-0.0055,-0.0020


You can also check the maximum and minimum timestamps...

In [11]:
EM_APEX_Vdata2_df = EM_APEX_Vdata2_df.loc[EM_APEX_Vdata2_df['deployment'] == '4968s1'] #Limited the velocity data to section 1 of the SMILE experiment, or Region 1!
print(EM_APEX_Vdata2_df['UXT_DT'].max(), EM_APEX_Vdata2_df['UXT_DT'].min())

2017-03-11 19:31:02+00:00 2017-03-08 04:51:13+00:00


In theory this means that there are no values in 2019, which is good since that would have been bad data! So, I technically could have used this dataset for Region 1 comparisons, but I decided to go with the set that interpolated GPS data that was provided by my mentor.

### Type 4 - Velocity and GPS data of EM-APEX SMILE in 2017

I did the same thing as I did to type 3 to type 4; it came limited to SMILE and 2017 data, but I still needed to add a column with averaged Eastward and Northward velocities!

In [12]:
EM_APEX_Velocity2017 = r"C:\Users\lily\Documents\ElizabethFiles2023\DINO SIP Documents\vel_gps_interpolated_smile2017only.parquet"
EM_APEX_Vdata3_df = pd.read_parquet(EM_APEX_Velocity2017)
EM_APEX_Vdata3_df['u'] = EM_APEX_Vdata3_df.apply(lambda row: (row['u1'] + row['u2']) / 2, axis=1)
EM_APEX_Vdata3_df['v'] = EM_APEX_Vdata3_df.apply(lambda row: (row['v1'] + row['v2']) / 2, axis=1)
EM_APEX_Vdata3_df

Unnamed: 0_level_0,LAT,LON,deployment,experiment,dive_nbr,P,T,S,u1,v1,...,u2,v2,verr2,W,e1mean,e2mean,piston,uxt,u,v
UXT_DT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-03-08 04:51:13+00:00,26.236990,-146.269022,4968s1,SMILE,0001,38.40000,20.394000,35.209000,0.237,-0.158000,...,0.219,-0.115000,0.007,0.159000,1360.9,-1049.4,9.0,1.488949e+09,0.2280,-0.136500
2017-03-08 04:51:40+00:00,26.237970,-146.269244,4968s1,SMILE,0001,42.60000,20.355000,35.205000,0.207,-0.157000,...,0.202,-0.122000,0.007,0.154000,1360.3,-1049.3,9.0,1.488949e+09,0.2045,-0.139500
2017-03-08 04:52:05+00:00,26.238877,-146.269449,4968s1,SMILE,0001,46.40000,20.330000,35.205000,0.176,-0.150000,...,0.178,-0.122000,0.007,0.154000,1359.7,-1049.3,9.0,1.488949e+09,0.1770,-0.136000
2017-03-08 04:52:30+00:00,26.239784,-146.269654,4968s1,SMILE,0001,50.20000,20.317000,35.204000,0.175,-0.148000,...,0.165,-0.119000,0.005,0.154000,1359.5,-1049.0,9.0,1.488949e+09,0.1700,-0.133500
2017-03-08 04:52:55+00:00,26.240218,-146.269709,4968s1,SMILE,0001,54.10000,20.300000,35.203000,0.160,-0.152000,...,0.154,-0.123000,0.007,0.153000,1359.4,-1048.4,9.0,1.488949e+09,0.1570,-0.137500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-03-30 03:31:08+00:00,35.199656,-139.709843,7808s3,SMILE,0224,14.60000,15.188000,33.948000,-0.067,0.065000,...,-0.083,0.101000,0.018,-0.127000,1110.7,-97.8,152.0,1.490845e+09,-0.0750,0.083000
2017-03-30 03:31:34+00:00,35.201414,-139.723036,7808s3,SMILE,0224,11.30000,15.257000,33.947000,-0.052,0.041000,...,-0.099,0.042000,0.019,-0.125000,1111.3,-98.7,152.0,1.490845e+09,-0.0755,0.041500
2017-03-30 03:31:59+00:00,35.203105,-139.735722,7808s3,SMILE,0224,8.20000,15.385000,33.946000,-0.030,0.048000,...,-0.060,-0.006000,0.025,-0.123000,1110.8,-99.6,152.0,1.490845e+09,-0.0450,0.021000
2017-03-30 03:32:25+00:00,35.204864,-139.748914,7808s3,SMILE,0224,5.00000,15.475000,33.943000,-0.022,0.031000,...,0.011,-0.035000,0.032,-0.122000,1110.5,-99.9,152.0,1.490845e+09,-0.0055,-0.002000


I mostly focused on velocity from Region 1 of EM-APEX because that is all I had time to explore and cross reference with ANDRO data, but if I had more time I would encourage more exploration!

In [13]:
EM_APEX_Vdata_dfS1 = EM_APEX_Vdata3_df.loc[(EM_APEX_Vdata3_df["deployment"] == '4968s1')]
EM_APEX_Vdata_dfS1

Unnamed: 0_level_0,LAT,LON,deployment,experiment,dive_nbr,P,T,S,u1,v1,...,u2,v2,verr2,W,e1mean,e2mean,piston,uxt,u,v
UXT_DT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-03-08 04:51:13+00:00,26.236990,-146.269022,4968s1,SMILE,0001,38.4,20.394,35.209,0.237,-0.158,...,0.219,-0.115,0.007,0.159,1360.9,-1049.4,9.0,1.488949e+09,0.2280,-0.1365
2017-03-08 04:51:40+00:00,26.237970,-146.269244,4968s1,SMILE,0001,42.6,20.355,35.205,0.207,-0.157,...,0.202,-0.122,0.007,0.154,1360.3,-1049.3,9.0,1.488949e+09,0.2045,-0.1395
2017-03-08 04:52:05+00:00,26.238877,-146.269449,4968s1,SMILE,0001,46.4,20.330,35.205,0.176,-0.150,...,0.178,-0.122,0.007,0.154,1359.7,-1049.3,9.0,1.488949e+09,0.1770,-0.1360
2017-03-08 04:52:30+00:00,26.239784,-146.269654,4968s1,SMILE,0001,50.2,20.317,35.204,0.175,-0.148,...,0.165,-0.119,0.005,0.154,1359.5,-1049.0,9.0,1.488949e+09,0.1700,-0.1335
2017-03-08 04:52:55+00:00,26.240218,-146.269709,4968s1,SMILE,0001,54.1,20.300,35.203,0.160,-0.152,...,0.154,-0.123,0.007,0.153,1359.4,-1048.4,9.0,1.488949e+09,0.1570,-0.1375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-03-11 19:29:21+00:00,25.668011,-145.547272,4968s1,SMILE,0128,22.9,21.706,35.222,0.092,0.220,...,0.121,0.220,0.012,-0.122,1393.8,-1828.6,138.0,1.489261e+09,0.1065,0.2200
2017-03-11 19:29:46+00:00,25.666689,-145.546777,4968s1,SMILE,0128,19.8,21.782,35.219,0.049,0.231,...,0.095,0.209,0.012,-0.123,1391.9,-1828.3,138.0,1.489261e+09,0.0720,0.2200
2017-03-11 19:30:12+00:00,25.665315,-145.546262,4968s1,SMILE,0128,16.6,21.814,35.218,0.042,0.249,...,0.127,0.229,0.014,-0.126,1390.4,-1827.7,138.0,1.489261e+09,0.0845,0.2390
2017-03-11 19:30:37+00:00,25.663994,-145.545768,4968s1,SMILE,0128,13.5,21.853,35.215,0.061,0.251,...,0.121,0.209,0.012,-0.122,1388.9,-1826.9,138.0,1.489261e+09,0.0910,0.2300


## Accesing ANDRO Data

Remember to download the ANDRO .dat file to your computer, further instructions on this in the .README file, but once you have that done move on to the following:

In [14]:
import netCDF4
import numpy as np
import xarray as xr
import pandas as pd
import re
from matplotlib import pyplot as plt
%matplotlib inline

The Argo data was all formatted in .netCDF where we accessed the data using argopy! Now, we will use pandas to access ANDRO which is read in a .csv data format.

In [16]:
#Make sure to replace the file path with that of your own directory!
ANDROFullData=r"C:\Users\lily\Documents\ElizabethFiles2023\DINO SIP Documents\ANDRO2022-91950.dat"

In [17]:
#Then we restructured the file to be readable with the proper columns! This includes ALL data.
ANDRO_data = pd.read_csv(ANDROFullData, sep='\s+', header=None) #You can add nrows=10 to limit how many rows you see

The next step I followed was to sort through this table in search of any of the nine Argo floats that I identified in AccessingArgoDataP1 that intersected EM-APEX SMILE Region 1 or 2 in 2017, and got the following results, as a refresher these were our focus Argo floats:

- Region #1 = 5903608, 5904128, 5904977, 4902149, 4902947, 4902935, 5903603
- Region #2 = 4900816, 4902251

In [18]:
#We know column 34 represents WMO float IDs...
filtered_rows = ANDRO_data[ANDRO_data[34] == 5903608]
filtered_rows

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,27,28,29,30,31,32,33,34,35,36
1007756,-999.9999,-99.9999,1103.6,3.428,-99.999,-9999.999,-999.99,-999.99,-999.99,-999.99,...,-145.924,27.517,4322.299,-146.002,27.524,4322.793,14,5903608,1,1
1007757,-146.0580,27.5230,1005.6,3.646,-99.999,4327.627,-1.32,-0.03,0.21,0.10,...,-146.114,27.522,4332.461,-146.209,27.488,4332.932,14,5903608,2,2
1007758,-146.2570,27.4860,1004.0,3.652,-99.999,4337.811,-1.13,-0.05,0.11,0.06,...,-146.305,27.484,4342.690,-146.315,27.488,4343.090,14,5903608,3,3
1007759,-146.3970,27.5320,996.3,3.689,-99.999,4347.953,-1.93,1.16,0.06,0.14,...,-146.479,27.576,4352.817,-146.521,27.643,4353.299,15,5903608,4,4
1007760,-146.6065,27.6600,999.8,3.687,-99.999,4358.162,-2.01,0.45,0.20,0.18,...,-146.692,27.677,4363.026,-146.750,27.728,4363.438,11,5903608,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1008012,-143.7605,25.9905,1001.8,3.768,-99.999,6920.182,-1.03,0.04,0.17,0.04,...,-143.804,25.992,6925.068,-143.853,25.980,6925.315,13,5903608,257,257
1008013,-143.8955,25.9685,1001.7,3.769,-99.999,6930.270,-0.99,-0.30,0.05,0.11,...,-143.938,25.957,6935.225,-143.965,25.999,6935.644,10,5903608,258,258
1008014,-144.0005,26.0210,999.2,3.779,-99.999,6940.604,-0.83,0.57,0.03,0.04,...,-144.036,26.043,6945.563,-144.015,26.062,6945.801,9,5903608,259,259
1008015,-144.0415,26.0785,1003.1,3.778,-99.999,6950.677,-0.63,0.43,0.10,0.12,...,-144.068,26.095,6955.554,-144.056,26.112,6955.869,13,5903608,260,260


In [19]:
filtered_rows1 = ANDRO_data[ANDRO_data[34] == 5904128]
filtered_rows1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,27,28,29,30,31,32,33,34,35,36


In [20]:
filtered_rows1 = ANDRO_data[ANDRO_data[34] == 5904977]
filtered_rows1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,27,28,29,30,31,32,33,34,35,36


In [21]:
filtered_rows1 = ANDRO_data[ANDRO_data[34] == 4902149]
filtered_rows1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,27,28,29,30,31,32,33,34,35,36


In [22]:
filtered_rows1 = ANDRO_data[ANDRO_data[34] == 4902947]
filtered_rows1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,27,28,29,30,31,32,33,34,35,36


In [23]:
filtered_rows1 = ANDRO_data[ANDRO_data[34] == 4902935]
filtered_rows1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,27,28,29,30,31,32,33,34,35,36


In [24]:
filtered_rows1 = ANDRO_data[ANDRO_data[34] == 5903603]
filtered_rows1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,27,28,29,30,31,32,33,34,35,36


In [25]:
filtered_rows1 = ANDRO_data[ANDRO_data[34] == 4900816]
filtered_rows1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,27,28,29,30,31,32,33,34,35,36
315513,-999.9999,-99.9999,776.2,4.243,-99.999,-9999.999,-999.99,-999.99,-999.99,-999.99,...,-126.273,39.290,2701.987,-126.193,39.209,2702.371,10,4900816,1,1
315514,-125.9140,39.2840,1001.3,3.658,-99.999,2707.524,5.41,1.87,0.24,0.38,...,-125.635,39.359,2712.677,-125.557,39.305,2713.013,11,4900816,2,2
315515,-125.5440,39.4235,1001.9,3.733,-99.999,2718.172,0.25,2.95,0.05,0.11,...,-125.531,39.542,2723.331,-125.536,39.541,2723.637,9,4900816,3,3
315516,-125.5870,39.7575,1010.2,3.670,-99.999,2728.771,-0.99,5.42,0.08,0.01,...,-125.638,39.974,2733.905,-125.690,40.027,2734.294,10,4900816,4,4
315517,-125.7515,40.1655,1003.8,3.781,-99.999,2739.431,-1.18,3.47,0.07,0.16,...,-125.813,40.304,2744.567,-125.810,40.177,2745.037,14,4900816,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
315855,-138.5750,34.6760,1005.6,3.810,-99.999,6344.880,2.13,-3.12,0.10,0.07,...,-138.471,34.550,6350.064,-138.461,34.557,6350.327,15,4900816,343,343
315856,-138.5645,34.4300,1007.3,3.848,-99.999,6355.485,-2.13,-3.16,0.04,0.01,...,-138.668,34.303,6360.644,-138.669,34.285,6360.982,12,4900816,344,344
315857,-138.9215,34.2350,994.7,3.773,-99.999,6366.081,-5.28,-1.26,0.11,0.13,...,-139.174,34.185,6371.179,-139.204,34.121,6371.696,16,4900816,345,345
315858,-139.3975,34.1915,999.8,3.760,-99.999,6376.825,-4.03,1.76,0.10,0.07,...,-139.591,34.262,6381.954,-139.601,34.347,6382.321,16,4900816,346,346


In [26]:
filtered_rows1 = ANDRO_data[ANDRO_data[34] == 4902251]
filtered_rows1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,27,28,29,30,31,32,33,34,35,36


The Results were as follows:
The ANDRO 2022 dataset, which represents Argo floats which have undergone the ANDRO algorithmn to calculate velocity at the surface and at 1000m, included data for the following Argo floats in 2017:

- Region #1 = 5903608
- Region #3 = 4900816

Then I proceeded to actually label the columns I found most important to my project, and to get rid of those deemed unnecessary of a chosen float that I included in my poster presentation and had time to run comparisons with EM-APEX:

In [27]:
filtered_rows.rename(columns={0:"Longitude XD", 1:"Latitude YD", 6:"EastwardVelocityDeep", 7:"NorthwardVelocityDeep", 8:"EastwardErrorDeep", 9:"NorthwardErrorDeep", 10:"Longitude XS1", 11:"Latitude YS1", 13:"EastwardVelocitySurface1", 14:"NorthwardVelocitySurface1", 15:"EastwardErrorSurface1", 16:"NorthwardErrorSurface1", 17:"Longitude XS2", 18:"Latitude YS2", 20:"EastwardVelocitySurface2", 21:"NorthwardVelocitySurface2", 22:"EastwardErrorSurface2", 23:"NorthwardErrorSurface2", 34:"WMO Float Number"}, inplace=True)
filtered_rows #These were the re-labeled columns
filtered_columns = filtered_rows.loc[:,["Longitude XD", "Latitude YD", "EastwardVelocityDeep", "NorthwardVelocityDeep", "EastwardErrorDeep", "NorthwardErrorDeep", "Longitude XS1", "Latitude YS1", "EastwardVelocitySurface1", "NorthwardVelocitySurface1", "EastwardErrorSurface1", "NorthwardErrorSurface1", "Longitude XS2", "Latitude YS2", "EastwardVelocitySurface2", "NorthwardVelocitySurface2", "EastwardErrorSurface2", "NorthwardErrorSurface2", "WMO Float Number"]]
filtered_columns #These got rid of any un-labeled columns that were still numbers!

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_rows.rename(columns={0:"Longitude XD", 1:"Latitude YD", 6:"EastwardVelocityDeep", 7:"NorthwardVelocityDeep", 8:"EastwardErrorDeep", 9:"NorthwardErrorDeep", 10:"Longitude XS1", 11:"Latitude YS1", 13:"EastwardVelocitySurface1", 14:"NorthwardVelocitySurface1", 15:"EastwardErrorSurface1", 16:"NorthwardErrorSurface1", 17:"Longitude XS2", 18:"Latitude YS2", 20:"EastwardVelocitySurface2", 21:"NorthwardVelocitySurface2", 22:"EastwardErrorSurface2", 23:"NorthwardErrorSurface2", 34:"WMO Float Number"}, inplace=True)


Unnamed: 0,Longitude XD,Latitude YD,EastwardVelocityDeep,NorthwardVelocityDeep,EastwardErrorDeep,NorthwardErrorDeep,Longitude XS1,Latitude YS1,EastwardVelocitySurface1,NorthwardVelocitySurface1,EastwardErrorSurface1,NorthwardErrorSurface1,Longitude XS2,Latitude YS2,EastwardVelocitySurface2,NorthwardVelocitySurface2,EastwardErrorSurface2,NorthwardErrorSurface2,WMO Float Number
1007756,-999.9999,-99.9999,-999.99,-999.99,-999.99,-999.99,-145.9533,27.5265,-19.45,10.77,2.24,1.81,-145.9819,27.5263,-17.90,-3.87,1.85,1.98,5903608
1007757,-146.0580,27.5230,-1.32,-0.03,0.21,0.10,-146.1388,27.5128,-18.79,-13.05,6.83,2.14,-146.1844,27.4941,-20.08,-4.76,4.14,2.05,5903608
1007758,-146.2570,27.4860,-1.13,-0.05,0.11,0.06,-146.3038,27.4824,0.10,-5.33,2.93,2.53,-146.3136,27.4786,-8.58,3.68,3.48,3.71,5903608
1007759,-146.3970,27.5320,-1.93,1.16,0.06,0.14,-146.4866,27.5915,-5.67,21.26,2.27,1.53,-146.5085,27.6304,-14.83,14.20,2.82,1.09,5903608
1007760,-146.6065,27.6600,-2.01,0.45,0.20,0.18,-146.7184,27.6955,-21.97,16.62,2.05,2.05,-146.7374,27.7116,-10.74,15.79,1.39,1.69,5903608
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1008012,-143.7605,25.9905,-1.03,0.04,0.17,0.04,-143.8285,25.9847,-23.38,-6.82,1.38,0.48,-143.8285,25.9847,-23.38,-6.82,1.38,0.48,5903608
1008013,-143.8955,25.9685,-0.99,-0.30,0.05,0.11,-143.9300,25.9617,12.35,25.29,5.13,7.49,-143.9548,26.0012,-17.45,-2.40,15.22,5.98,5903608
1008014,-144.0005,26.0210,-0.83,0.57,0.03,0.04,-144.0283,26.0516,10.76,9.58,0.97,0.62,-144.0283,26.0516,10.76,9.58,0.97,0.62,5903608
1008015,-144.0415,26.0785,-0.63,0.43,0.10,0.12,-144.0609,26.1081,3.89,11.40,1.23,1.55,-144.0592,26.1114,3.33,4.83,1.25,1.74,5903608


But remember, we still have to limit the profiles in this given float, 5903608, to include JUST the profiles that occured within 2017, which I did by taking a look at my own trajectory code and printing the max_lat_1, min_lat_1, max_lon_1, and min_lon_1 values for float 5903608, both steps in which I will include below:

In [28]:
#Determining the bounding box of float 5903608 in 2017.

import matplotlib.pyplot as plt
import matplotlib.patches as patches
from argopy import OceanOPSDeployments
from argopy import DataFetcher as ArgoDataFetcher

argo_loader = ArgoDataFetcher(src='erddap', parallel=True) # This limits the data to the highest quality control flag (QC = 1)

sliced_data_dict = {}
a_values = [5903608] #This was the only part changed from the code above to represent floats in Region 2.

for a in a_values:
    apDS = argo_loader.float(a).load().data
    data1 = apDS.argo.point2profile()
    data2 = data1.set_xindex("TIME")
    sliced_data = data2.sel(TIME=slice('2017-01-01','2017-12-31'))
    sliced_data_dict[a] = sliced_data

    # Get the max and min latitudes and longitudes from the CTD bounds
    max_lat_1 = sliced_data.LATITUDE.max().values
    min_lat_1 = sliced_data.LATITUDE.min().values
    max_lon_1 = sliced_data.LONGITUDE.max().values
    min_lon_1 = sliced_data.LONGITUDE.min().values

    print(max_lat_1, min_lat_1, max_lon_1, min_lon_1)

26.666 25.261 -145.326 -147.485


In [29]:
#And then I went ahead and filtered the ANDRO float data for 5903608 to be within that bounding box range!

lon_min = -147.485
lon_max = -142.057
lat_min = 24.9
lat_max = 27.16

#filtered_GPS = filtered_columns[(filtered_columns['Longitude XD', 'Longitude XS1', 'Longitude XS2'] >= lon_min) & (filtered_columns['Longitude XD', 'Longitude XS1', 'Longitude XS2'] <= lon_max) & (filtered_columns['Latitude XD', 'Latitude XS1', 'Latitude XS2'] >= lat_min) & (filtered_columns['Latitude XD', 'Latitude XS1', 'Latitude XS2'] <= lat_max)]
filtered_GPS = filtered_columns[
    (filtered_columns['Longitude XD'] >= lon_min) &
    (filtered_columns['Longitude XD'] <= lon_max) &
    (filtered_columns['Longitude XS1'] >= lon_min) &
    (filtered_columns['Longitude XS1'] <= lon_max) &
    (filtered_columns['Longitude XS2'] >= lon_min) &
    (filtered_columns['Longitude XS2'] <= lon_max) &
    (filtered_columns['Latitude YD'] >= lat_min) &
    (filtered_columns['Latitude YD'] <= lat_max) &
    (filtered_columns['Latitude YS1'] >= lat_min) &
    (filtered_columns['Latitude YS1'] <= lat_max) &
    (filtered_columns['Latitude YS2'] >= lat_min) &
    (filtered_columns['Latitude YS2'] <= lat_max)
]

filtered_GPS

Unnamed: 0,Longitude XD,Latitude YD,EastwardVelocityDeep,NorthwardVelocityDeep,EastwardErrorDeep,NorthwardErrorDeep,Longitude XS1,Latitude YS1,EastwardVelocitySurface1,NorthwardVelocitySurface1,EastwardErrorSurface1,NorthwardErrorSurface1,Longitude XS2,Latitude YS2,EastwardVelocitySurface2,NorthwardVelocitySurface2,EastwardErrorSurface2,NorthwardErrorSurface2,WMO Float Number
1007937,-147.2250,25.8975,3.05,2.58,0.02,0.02,-147.1019,25.9936,-7.42,-2.90,0.73,0.44,-147.1096,25.9919,-10.02,-1.49,0.98,0.44,5903608
1007938,-147.1445,26.1890,-0.58,5.22,0.12,0.03,-147.1887,26.3914,-11.72,6.46,5.43,1.57,-147.1730,26.3884,12.49,-14.38,1.70,4.61,5903608
1007939,-147.2025,26.5230,-0.94,3.98,0.08,0.02,-147.2440,26.7047,-0.90,26.29,0.97,1.55,-147.2380,26.7357,10.21,25.86,2.58,1.94,5903608
1007940,-147.1535,26.8325,1.54,1.88,0.01,0.18,-147.0975,26.9105,-9.26,7.45,0.88,1.23,-147.1017,26.9139,-10.78,12.32,0.90,0.58,5903608
1007941,-147.0530,26.9045,1.45,-0.56,0.05,0.14,-146.9933,26.8877,4.86,10.58,2.79,1.04,-146.9705,26.8968,3.66,4.64,1.84,0.48,5903608
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1008012,-143.7605,25.9905,-1.03,0.04,0.17,0.04,-143.8285,25.9847,-23.38,-6.82,1.38,0.48,-143.8285,25.9847,-23.38,-6.82,1.38,0.48,5903608
1008013,-143.8955,25.9685,-0.99,-0.30,0.05,0.11,-143.9300,25.9617,12.35,25.29,5.13,7.49,-143.9548,26.0012,-17.45,-2.40,15.22,5.98,5903608
1008014,-144.0005,26.0210,-0.83,0.57,0.03,0.04,-144.0283,26.0516,10.76,9.58,0.97,0.62,-144.0283,26.0516,10.76,9.58,0.97,0.62,5903608
1008015,-144.0415,26.0785,-0.63,0.43,0.10,0.12,-144.0609,26.1081,3.89,11.40,1.23,1.55,-144.0592,26.1114,3.33,4.83,1.25,1.74,5903608


The next step I took to alter the ANDRO dataset to make it graphable was to add two columns for pressure, one for the deeper pressure of 1000m and one for surface pressure at 0m.

In [30]:
filtered_GPS['DeepPressure'] = [1000] * 76
filtered_GPS['SurfacePressure'] = [0] * 76
filtered_GPS

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_GPS['DeepPressure'] = [1000] * 76
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_GPS['SurfacePressure'] = [0] * 76


Unnamed: 0,Longitude XD,Latitude YD,EastwardVelocityDeep,NorthwardVelocityDeep,EastwardErrorDeep,NorthwardErrorDeep,Longitude XS1,Latitude YS1,EastwardVelocitySurface1,NorthwardVelocitySurface1,...,NorthwardErrorSurface1,Longitude XS2,Latitude YS2,EastwardVelocitySurface2,NorthwardVelocitySurface2,EastwardErrorSurface2,NorthwardErrorSurface2,WMO Float Number,DeepPressure,SurfacePressure
1007937,-147.2250,25.8975,3.05,2.58,0.02,0.02,-147.1019,25.9936,-7.42,-2.90,...,0.44,-147.1096,25.9919,-10.02,-1.49,0.98,0.44,5903608,1000,0
1007938,-147.1445,26.1890,-0.58,5.22,0.12,0.03,-147.1887,26.3914,-11.72,6.46,...,1.57,-147.1730,26.3884,12.49,-14.38,1.70,4.61,5903608,1000,0
1007939,-147.2025,26.5230,-0.94,3.98,0.08,0.02,-147.2440,26.7047,-0.90,26.29,...,1.55,-147.2380,26.7357,10.21,25.86,2.58,1.94,5903608,1000,0
1007940,-147.1535,26.8325,1.54,1.88,0.01,0.18,-147.0975,26.9105,-9.26,7.45,...,1.23,-147.1017,26.9139,-10.78,12.32,0.90,0.58,5903608,1000,0
1007941,-147.0530,26.9045,1.45,-0.56,0.05,0.14,-146.9933,26.8877,4.86,10.58,...,1.04,-146.9705,26.8968,3.66,4.64,1.84,0.48,5903608,1000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1008012,-143.7605,25.9905,-1.03,0.04,0.17,0.04,-143.8285,25.9847,-23.38,-6.82,...,0.48,-143.8285,25.9847,-23.38,-6.82,1.38,0.48,5903608,1000,0
1008013,-143.8955,25.9685,-0.99,-0.30,0.05,0.11,-143.9300,25.9617,12.35,25.29,...,7.49,-143.9548,26.0012,-17.45,-2.40,15.22,5.98,5903608,1000,0
1008014,-144.0005,26.0210,-0.83,0.57,0.03,0.04,-144.0283,26.0516,10.76,9.58,...,0.62,-144.0283,26.0516,10.76,9.58,0.97,0.62,5903608,1000,0
1008015,-144.0415,26.0785,-0.63,0.43,0.10,0.12,-144.0609,26.1081,3.89,11.40,...,1.55,-144.0592,26.1114,3.33,4.83,1.25,1.74,5903608,1000,0


Finally, I had to convert all velocities from cm/s to m/s!

In [31]:
#Have to convert velocities to m/s from cm/s because EM-APEX data is in m/s!
columns_to_convert = ['EastwardVelocityDeep','NorthwardVelocityDeep', 'EastwardErrorDeep', 'NorthwardErrorDeep', 'EastwardVelocitySurface1','NorthwardVelocitySurface1', 'EastwardErrorSurface1',
       'NorthwardErrorSurface1','EastwardVelocitySurface2', 'NorthwardVelocitySurface2',
       'EastwardErrorSurface2', 'NorthwardErrorSurface2']

filtered_GPS[columns_to_convert] = filtered_GPS[columns_to_convert] / 100

filtered_GPS

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_GPS[columns_to_convert] = filtered_GPS[columns_to_convert] / 100


Unnamed: 0,Longitude XD,Latitude YD,EastwardVelocityDeep,NorthwardVelocityDeep,EastwardErrorDeep,NorthwardErrorDeep,Longitude XS1,Latitude YS1,EastwardVelocitySurface1,NorthwardVelocitySurface1,...,NorthwardErrorSurface1,Longitude XS2,Latitude YS2,EastwardVelocitySurface2,NorthwardVelocitySurface2,EastwardErrorSurface2,NorthwardErrorSurface2,WMO Float Number,DeepPressure,SurfacePressure
1007937,-147.2250,25.8975,0.0305,0.0258,0.0002,0.0002,-147.1019,25.9936,-0.0742,-0.0290,...,0.0044,-147.1096,25.9919,-0.1002,-0.0149,0.0098,0.0044,5903608,1000,0
1007938,-147.1445,26.1890,-0.0058,0.0522,0.0012,0.0003,-147.1887,26.3914,-0.1172,0.0646,...,0.0157,-147.1730,26.3884,0.1249,-0.1438,0.0170,0.0461,5903608,1000,0
1007939,-147.2025,26.5230,-0.0094,0.0398,0.0008,0.0002,-147.2440,26.7047,-0.0090,0.2629,...,0.0155,-147.2380,26.7357,0.1021,0.2586,0.0258,0.0194,5903608,1000,0
1007940,-147.1535,26.8325,0.0154,0.0188,0.0001,0.0018,-147.0975,26.9105,-0.0926,0.0745,...,0.0123,-147.1017,26.9139,-0.1078,0.1232,0.0090,0.0058,5903608,1000,0
1007941,-147.0530,26.9045,0.0145,-0.0056,0.0005,0.0014,-146.9933,26.8877,0.0486,0.1058,...,0.0104,-146.9705,26.8968,0.0366,0.0464,0.0184,0.0048,5903608,1000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1008012,-143.7605,25.9905,-0.0103,0.0004,0.0017,0.0004,-143.8285,25.9847,-0.2338,-0.0682,...,0.0048,-143.8285,25.9847,-0.2338,-0.0682,0.0138,0.0048,5903608,1000,0
1008013,-143.8955,25.9685,-0.0099,-0.0030,0.0005,0.0011,-143.9300,25.9617,0.1235,0.2529,...,0.0749,-143.9548,26.0012,-0.1745,-0.0240,0.1522,0.0598,5903608,1000,0
1008014,-144.0005,26.0210,-0.0083,0.0057,0.0003,0.0004,-144.0283,26.0516,0.1076,0.0958,...,0.0062,-144.0283,26.0516,0.1076,0.0958,0.0097,0.0062,5903608,1000,0
1008015,-144.0415,26.0785,-0.0063,0.0043,0.0010,0.0012,-144.0609,26.1081,0.0389,0.1140,...,0.0155,-144.0592,26.1114,0.0333,0.0483,0.0125,0.0174,5903608,1000,0


And thus, the final dataset I ended up with for ANDRO data was this ANDROArgoVelocity with limited columns, float ID, time to 2017, and unit conversion (not to mention I added in a pressure column to expand the capability of my velocity graphs).

In [32]:
ANDROArgoVelocity = filtered_GPS

From here on out you will see each of these steps in one large block:

In [34]:
import netCDF4
import numpy as np
import xarray as xr
import pandas as pd
import re
from matplotlib import pyplot as plt
%matplotlib inline

ANDROFullData=r"C:\Users\lily\Documents\ElizabethFiles2023\DINO SIP Documents\ANDRO2022-91950.dat"
ANDRO_data = pd.read_csv(ANDROFullData, sep='\s+', header=None)
filtered_rows = ANDRO_data[ANDRO_data[34] == 5903608]

filtered_rows.rename(columns={0:"Longitude XD", 1:"Latitude YD", 6:"EastwardVelocityDeep", 7:"NorthwardVelocityDeep", 8:"EastwardErrorDeep", 9:"NorthwardErrorDeep", 10:"Longitude XS1", 11:"Latitude YS1", 13:"EastwardVelocitySurface1", 14:"NorthwardVelocitySurface1", 15:"EastwardErrorSurface1", 16:"NorthwardErrorSurface1", 17:"Longitude XS2", 18:"Latitude YS2", 20:"EastwardVelocitySurface2", 21:"NorthwardVelocitySurface2", 22:"EastwardErrorSurface2", 23:"NorthwardErrorSurface2", 34:"WMO Float Number"}, inplace=True)

filtered_columns = filtered_rows.loc[:,["Longitude XD", "Latitude YD", "EastwardVelocityDeep", "NorthwardVelocityDeep", "EastwardErrorDeep", "NorthwardErrorDeep", "Longitude XS1", "Latitude YS1", "EastwardVelocitySurface1", "NorthwardVelocitySurface1", "EastwardErrorSurface1", "NorthwardErrorSurface1", "Longitude XS2", "Latitude YS2", "EastwardVelocitySurface2", "NorthwardVelocitySurface2", "EastwardErrorSurface2", "NorthwardErrorSurface2", "WMO Float Number"]]

#And then I went ahead and filtered the ANDRO float data for 5903608 to be within that bounding box range!

lon_min = -147.485
lon_max = -142.057
lat_min = 24.9
lat_max = 27.16

#filtered_GPS = filtered_columns[(filtered_columns['Longitude XD', 'Longitude XS1', 'Longitude XS2'] >= lon_min) & (filtered_columns['Longitude XD', 'Longitude XS1', 'Longitude XS2'] <= lon_max) & (filtered_columns['Latitude XD', 'Latitude XS1', 'Latitude XS2'] >= lat_min) & (filtered_columns['Latitude XD', 'Latitude XS1', 'Latitude XS2'] <= lat_max)]
filtered_GPS = filtered_columns[
    (filtered_columns['Longitude XD'] >= lon_min) &
    (filtered_columns['Longitude XD'] <= lon_max) &
    (filtered_columns['Longitude XS1'] >= lon_min) &
    (filtered_columns['Longitude XS1'] <= lon_max) &
    (filtered_columns['Longitude XS2'] >= lon_min) &
    (filtered_columns['Longitude XS2'] <= lon_max) &
    (filtered_columns['Latitude YD'] >= lat_min) &
    (filtered_columns['Latitude YD'] <= lat_max) &
    (filtered_columns['Latitude YS1'] >= lat_min) &
    (filtered_columns['Latitude YS1'] <= lat_max) &
    (filtered_columns['Latitude YS2'] >= lat_min) &
    (filtered_columns['Latitude YS2'] <= lat_max)
]

filtered_GPS['DeepPressure'] = [1000] * 76
filtered_GPS['SurfacePressure'] = [0] * 76

#Have to convert velocities to m/s from cm/s because EM-APEX data is in m/s!
columns_to_convert = ['EastwardVelocityDeep','NorthwardVelocityDeep', 'EastwardErrorDeep', 'NorthwardErrorDeep', 'EastwardVelocitySurface1','NorthwardVelocitySurface1', 'EastwardErrorSurface1',
       'NorthwardErrorSurface1','EastwardVelocitySurface2', 'NorthwardVelocitySurface2',
       'EastwardErrorSurface2', 'NorthwardErrorSurface2']

filtered_GPS[columns_to_convert] = filtered_GPS[columns_to_convert] / 100

ANDROArgoVelocity = filtered_GPS

ANDROArgoVelocity

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_rows.rename(columns={0:"Longitude XD", 1:"Latitude YD", 6:"EastwardVelocityDeep", 7:"NorthwardVelocityDeep", 8:"EastwardErrorDeep", 9:"NorthwardErrorDeep", 10:"Longitude XS1", 11:"Latitude YS1", 13:"EastwardVelocitySurface1", 14:"NorthwardVelocitySurface1", 15:"EastwardErrorSurface1", 16:"NorthwardErrorSurface1", 17:"Longitude XS2", 18:"Latitude YS2", 20:"EastwardVelocitySurface2", 21:"NorthwardVelocitySurface2", 22:"EastwardErrorSurface2", 23:"NorthwardErrorSurface2", 34:"WMO Float Number"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filter

Unnamed: 0,Longitude XD,Latitude YD,EastwardVelocityDeep,NorthwardVelocityDeep,EastwardErrorDeep,NorthwardErrorDeep,Longitude XS1,Latitude YS1,EastwardVelocitySurface1,NorthwardVelocitySurface1,...,NorthwardErrorSurface1,Longitude XS2,Latitude YS2,EastwardVelocitySurface2,NorthwardVelocitySurface2,EastwardErrorSurface2,NorthwardErrorSurface2,WMO Float Number,DeepPressure,SurfacePressure
1007937,-147.2250,25.8975,0.0305,0.0258,0.0002,0.0002,-147.1019,25.9936,-0.0742,-0.0290,...,0.0044,-147.1096,25.9919,-0.1002,-0.0149,0.0098,0.0044,5903608,1000,0
1007938,-147.1445,26.1890,-0.0058,0.0522,0.0012,0.0003,-147.1887,26.3914,-0.1172,0.0646,...,0.0157,-147.1730,26.3884,0.1249,-0.1438,0.0170,0.0461,5903608,1000,0
1007939,-147.2025,26.5230,-0.0094,0.0398,0.0008,0.0002,-147.2440,26.7047,-0.0090,0.2629,...,0.0155,-147.2380,26.7357,0.1021,0.2586,0.0258,0.0194,5903608,1000,0
1007940,-147.1535,26.8325,0.0154,0.0188,0.0001,0.0018,-147.0975,26.9105,-0.0926,0.0745,...,0.0123,-147.1017,26.9139,-0.1078,0.1232,0.0090,0.0058,5903608,1000,0
1007941,-147.0530,26.9045,0.0145,-0.0056,0.0005,0.0014,-146.9933,26.8877,0.0486,0.1058,...,0.0104,-146.9705,26.8968,0.0366,0.0464,0.0184,0.0048,5903608,1000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1008012,-143.7605,25.9905,-0.0103,0.0004,0.0017,0.0004,-143.8285,25.9847,-0.2338,-0.0682,...,0.0048,-143.8285,25.9847,-0.2338,-0.0682,0.0138,0.0048,5903608,1000,0
1008013,-143.8955,25.9685,-0.0099,-0.0030,0.0005,0.0011,-143.9300,25.9617,0.1235,0.2529,...,0.0749,-143.9548,26.0012,-0.1745,-0.0240,0.1522,0.0598,5903608,1000,0
1008014,-144.0005,26.0210,-0.0083,0.0057,0.0003,0.0004,-144.0283,26.0516,0.1076,0.0958,...,0.0062,-144.0283,26.0516,0.1076,0.0958,0.0097,0.0062,5903608,1000,0
1008015,-144.0415,26.0785,-0.0063,0.0043,0.0010,0.0012,-144.0609,26.1081,0.0389,0.1140,...,0.0155,-144.0592,26.1114,0.0333,0.0483,0.0125,0.0174,5903608,1000,0
