# Fetch Argovis data

Using functions from https://github.com/sanil72900/argo_notebook/blob/main/lesson_one.ipynb

## Import packages

In [1]:
import requests
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

## Import Argovis Data Class

In [2]:
class ArgovisDataImporter:
    
    def __init__(self, startDate, endDate, presRange=None, shape=None, region=None):
        
        """
        - `startDate`: string formatted as 'YYYY-MM-DD'
        - `endDate`: string formatted as 'YYYY-MM-DD'
        - `shape`: list of lists containing [lon, lat] coordinates
        - `presRange`: a string of a list formatted as '[minimum pres,maximum pres]' (no spaces)
        """
        
        # startDate='2020-4-10'
        # endDate='2020-4-29'
        # presRange ='[0,500]'
        # region = atlantic_coords
        
        # lon range -180 to 180
        # lat range -90 to 90
        # shape is a geometry of coords [[ [lon1, lat1], [lon2, lat2] ]]
        
        self.startDate = startDate
        self.endDate = endDate
        self.presRange = presRange
        self.shape = shape
        self.region= region
        
        if self.region is not None and self.shape is None:
            self.shape = self.get_regions(region)
            
        self.strShape = str(self.shape).replace(' ', '')
            

    def get_selection_profiles(self, printUrl=True):
        """
        To query results from a specific space and time selection, the following Argo API functions 
        will format your specifications into a URL that requests the data from the Argovis website and 
        return a file with all of the data. 

        The parameters that will be adjusted:
        - `startDate`: string formatted as 'YYYY-MM-DD'
        - `endDate`: string formatted as 'YYYY-MM-DD'
        - `shape`: list of lists containing [lon, lat] coordinates
        - `presRange`: a string of a list formatted as '[minimum pres,maximum pres]' (no spaces)
        - `printUrl`: boolean (True/False option) that prints url output if equal to True
        
        """
        
        url = 'https://argovis.colorado.edu/selection/profiles'
        url += '?startDate={}'.format(self.startDate)
        url += '&endDate={}'.format(self.endDate)
        url += '&shape={}'.format(self.strShape)
        
        if self.presRange:
            pressRangeQuery = '&presRange=' + self.presRange
            url += pressRangeQuery                         #compose URL with selection parameters
        
        if printUrl:
            print(url)
            
        resp = requests.get(url)
        
        # Consider any status other than 2xx an error
        #if not resp.status_code // 100 == 2:
            #return "Error: Unexpected response {}".format(resp)
        selectionProfiles = resp.json()
        
        return selectionProfiles
    
    def parse_into_df(self, profiles):
        
        """
        In `parse_into_df()` the argument `profiles` will be the URL from the previous function. 
        The given data file will be be cleaned and formatted into a dataframe with the following columns: 
        
        - Pressure [dbar]
        - Temperature [Celsius] 
        - Salinity [psu]
        - Cycle Number
        - Profile ID
        - Latitude
        - Longitude
        - Date of input
        """
        
        meas_keys = profiles[0]['measurements'][0].keys()
        
        df = pd.DataFrame(columns=meas_keys)      #create dataframe for profiles 
        
        for profile in profiles:                  #specify columns for profile measurements
        
            profileDf = pd.DataFrame(profile['measurements'])    
            profileDf['cycle_number'] = profile['cycle_number']
            profileDf['profile_id'] = profile['_id']
            profileDf['lat'] = profile['lat']
            profileDf['lon'] = profile['lon']
            profileDf['date'] = profile['date']
            
            df = pd.concat([df, profileDf], sort=False)
            
        return df
    
    def get_regions(self, region):
        
        #options for regions to examine
        south_coords = [[[-149.238281,-36.456636],[-141.879737,-37.077133],[-134.445218,-37.237608],
                         [-127.024817,-36.93345],[-119.707031,-36.173357],[-120.058594,-59.977005],
                         [-127.546527,-60.582449],[-135.216859,-60.756782],[-142.865732,-60.492308],
                         [-150.292969,-59.800634],[-149.238281,-36.456636]]]
        gulf_coords = [[[-94.35249,27.365753],[-97.097603,24.402577],[-93.332877,20.489146],
                        [-87.124507,22.099636],[-80.783791,23.47067],[-86.195584,29.161741],
                        [-94.35249,27.365753]]]
        pacific_coords = [[[164.355469,29.840644],[164.882812,-29.840644],[172.623113,-29.990522],
                           [-180,-29.701812],[-180,-29.701812],[-179.648438,-29.688053],
                           [179.648437,29.688053],[172.007811,29.985384],[164.355469,29.840644]]]
        atlantic_coords = [[[-40.078125,29.840644],[-33.368671,30.338837],[-26.614528,30.492027],
                            [-19.863281,30.297018],[-20.039063,-30.145127],[-26.724822,-30.384017],
                            [-33.419918,-30.281826],[-40.078125,-29.840644],[-40.078125,29.840644]]]
        labrador_coords = [[[-144.84375,36.031332],[-136.038755,36.210925],[-127.265625,35.746512],
                            [-128.144531,22.755921],[-136.543795,24.835311],[-145.195313,26.431228],
                            [-144.84375,36.031332]]]
        
        if region == 'south_coords':
            return south_coords
        
        elif region == 'gulf_coords':
            return gulf_coords
        
        elif region == 'pacific_coords':
            return pacific_coords
        
        elif region == 'atlantic_coords':
            return atlantic_coords
        
        elif region == 'labrardor_coords':
            return labrador_coords
        else:
            print('Region is not one of south_coords, gulf_coords, pacific_coords, atlantic_coords, labrador_coords')
            return None
          
    def create_selection_df(self, selectionProfiles):
        
        if len(selectionProfiles) > 0:  

            selectionDf = self.parse_into_df(selectionProfiles)

            return selectionDf
        else:
            return pd.DataFrame()           
            
    def data_cleaning(self, selectionDf):
                         
        #replace -999 with NaN
        selectionDf.replace(-999, np.nan, inplace=True) 
                                                                                 
        return selectionDf
                                             
    def get_cleaned_profile(self):
                                             
        selectionProfiles = self.get_selection_profiles()
                                             
        selectionDf = self.create_selection_df(selectionProfiles)
        
        cleanedProfile = self.data_cleaning(selectionDf)
                                             
        cleanedProfile['year'] = pd.DatetimeIndex(cleanedProfile['date']).year
        cleanedProfile['month'] = pd.DatetimeIndex(cleanedProfile['date']).month
        cleanedProfile['day'] = pd.DatetimeIndex(cleanedProfile['date']).day
                                             
        self.cleanedProfile = cleanedProfile
                               
        return cleanedProfile
    
    def get_number_recorded_observations(self):
                                                                                          
        return len(self.cleanedProfile)

    def get_number_of_profiles(self):
                                                                                 
        return len(np.unique(self.cleanedProfile['profile_id']))


