# Phytoplankton data

In [2]:
import xarray as xr
import pandas as pd
import numpy as np

In [3]:
phyto = xr.open_dataset("../../VeryNEWESTCariacoData/phytoplankton.netcdf")#, decode_cf=True)

In [4]:
phyto.data_vars

Data variables:
    Cruise                   (unlimited) |S513 ...
    Cruise_ID_2              (unlimited) |S513 ...
    Leg                      (unlimited) |S513 ...
    Cast                     (unlimited) |S513 ...
    Day_local                (unlimited) |S513 ...
    Month_local              (unlimited) |S513 ...
    Year_local               (unlimited) |S513 ...
    Latitude                 (unlimited) |S513 ...
    Longitude                (unlimited) |S513 ...
    Time_start_local         (unlimited) |S513 ...
    Time_end_local           (unlimited) |S513 ...
    Datetime_local           (unlimited) |S513 ...
    Datetime_UTC             (unlimited) |S513 ...
    SpeciesNameOriginal      (unlimited) |S513 ...
    SpeciesNameCleaned       (unlimited) |S513 ...
    ScientificName_accepted  (unlimited) |S513 ...
    AphiaID                  (unlimited) |S513 ...
    d_1m                     (unlimited) |S513 ...
    d_7m                     (unlimited) |S513 ...
    d_15m      

# Convert and clean up datasets

In [5]:
def convert_str_data(data, var):
    return data[var].str.decode('utf-8')#, errors='coerce')

In [6]:
def convert_numeric_data(data, var):
    return pd.to_numeric(data[var], errors='coerce')

In [7]:
phyto['Cruise_ID_2'] = phyto.Cruise_ID_2.astype('str')
phyto['Time_start_local'] = phyto.Time_start_local.astype('str')
phyto['Time_end_local'] = phyto.Time_end_local.astype('str')

In [8]:
phyto['SpeciesNameOriginal'] = convert_str_data(phyto, 'SpeciesNameOriginal')
phyto['SpeciesNameCleaned'] = convert_str_data(phyto, 'SpeciesNameCleaned')
phyto['ScientificName_accepted'] = convert_str_data(phyto, 'ScientificName_accepted')

In [9]:
phyto['Cruise'] = phyto.Cruise.astype('int')
phyto['Leg'] = phyto.Leg.astype('int')
phyto['Cast'] = phyto.Cast.astype('int')

phyto['Day_local'] = phyto.Day_local.astype('int')
phyto['Month_local'] = phyto.Month_local.astype('int')
phyto['Year_local'] = phyto.Year_local.astype('int')

In [10]:
phyto.Datetime_local.values = pd.to_datetime(phyto.Datetime_local.astype('str').values)
phyto.Datetime_UTC.values = pd.to_datetime(phyto.Datetime_UTC.astype('str').values)

phyto.Datetime_local.values = pd.to_datetime(phyto.Datetime_local.astype('str').values)
phyto.Datetime_UTC.values = pd.to_datetime(phyto.Datetime_UTC.astype('str').values)

In [11]:
phyto['AphiaID'].values = convert_numeric_data(phyto, 'AphiaID').astype(int)

phyto['Latitude'].values = convert_numeric_data(phyto, 'Latitude')
phyto['Longitude'].values = convert_numeric_data(phyto, 'Longitude')

phyto['d_1m'].values = convert_numeric_data(phyto, 'd_1m')
phyto['d_7m'].values = convert_numeric_data(phyto, 'd_7m')
phyto['d_15m'].values = convert_numeric_data(phyto, 'd_15m')
phyto['d_25m'].values = convert_numeric_data(phyto, 'd_25m')
phyto['d_35m'].values = convert_numeric_data(phyto, 'd_35m')
phyto['d_55m'].values = convert_numeric_data(phyto, 'd_55m')
phyto['d_75m'].values = convert_numeric_data(phyto, 'd_75m')
phyto['d_100m'].values = convert_numeric_data(phyto, 'd_100m')
phyto['Total_sum'].values = convert_numeric_data(phyto, 'Total_sum')

In [12]:
phyto

In [13]:
phyto_df = phyto.to_dataframe()

In [16]:
#phyto_df['Datetime_local']

In [17]:
phyto_df.stack()

unlimited             
0          Cruise               1
           Cruise_ID_2    CAR-001
           Leg                  2
           Cast                 2
           Day_local            8
                           ...   
96840      d_35m                0
           d_55m                0
           d_75m                0
           d_100m               0
           Total_sum            0
Length: 2516627, dtype: object

In [18]:
phyto_df.to_csv("phytoplankton_newest_full.csv")