## **ATOC4500 Data Science Lab: Final Project**
## **Using rapid ice loss events to predict when CESM1 ensemble members go ice free**
#### **Author: Daphne Quint, daqu2831@colorado.edu**
#### **Last updated: April 14, 2022**

---------------------------------------------------------------------------------------

### Import packages

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from random import randint

### Define functions

In [4]:
def define_holdout_data(x, y, verbose):
    """Perform a 80/20 test-train split (80% of data is training, 20% is testing). Split is randomized with each call."""
    random_state = randint(0,1000)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=random_state)
    if verbose==True:
        print("Prior to scaling and rebalacing...")
        print("Shape of training predictors: "+str(np.shape(x_train)))
        print("Shape of testing predictors: "+str(np.shape(x_test)))
        print("Shape of training predictands: "+str(np.shape(y_train)))
        print("Shape of testing predictands: "+str(np.shape(y_test)))
        print(" ")
    return x_train, x_test, y_train, y_test

In [5]:
def find_year(data, member):
    '''
    Finds the year 1 member goes below 1 million square km
    '''
    
    data_ = data.sel(member=member)
    
    year = 2020
    for i in data_:
        if i>1:
            year += 1
        else:
            return year

In [6]:
def create_df(month):
    #### find ice free year for that month
    
    ###first find 5 year mean
    
    # define September SIE
    SIE_sept = SIE['CESM1'].sel(time=SIE['time.month']==month).sel(member=np.arange(1, 41, 1))

    # find the 5 year running mean
    five_year_mean = SIE_sept*0

    for i in range(1, 41):
        five_year_mean[i-1] = SIE_sept.sel(member=i).rolling(time=5).mean()

    five_year_mean = five_year_mean.sel(time=slice('2020', '2100'))
    
    ### then find the ice free year
    ice_free_year = []
    for i in range(1, 41):
        ice_free_year.append(find_year(five_year_mean, i))
    ice_free_year = np.array(ice_free_year)
    
    #### find max amt of ice lost in the month
    
    ice_lost_max = []
    for i in range(1, 41):
        ice_lost_max.append(float(nb_ext_data['RILE Indicator'].sel(member=i).sel(month=month).min().values))
    ice_lost_max = np.array(ice_lost_max)
    
    #### find longest duration for the month
    
    length_max = []
    for i in range(1, 41):
        length_max.append(float(length_data['Length'].sel(member=i).sel(month=month).max().values))
    length_max = np.array(length_max)
    
    #### create dataframe
    
    member = pd.DataFrame(data=np.arange(1, 41), columns=['Member'])
    month = pd.DataFrame(data=(np.zeros(40)+month), columns=['Month'])
    ice_free_yr_df = pd.DataFrame(data=ice_free_year, columns=['Ice Free Year'])
    ice_lost_max_df = pd.DataFrame(data=ice_lost_max, columns=['Max Ice Lost'])*-1
    length_max_df = pd.DataFrame(data=length_max, columns=['Longest Duration'])
    
    this_month_df = pd.concat([member, month, ice_free_yr_df, ice_lost_max_df, length_max_df], axis=1)
    
    return this_month_df

## Step 1: Read in Data

In [3]:
data_path = '/home/daphne/Documents/School/research/icefreeproject/Data/'

# Amount of sea ice lost and Sea ice extent data for each RILE
nb_ext_data = xr.open_dataset(data_path+'RILE_nbext_CESM.nc')

# length data (consecutive years in a row there is a rile for that month)
length_data = xr.open_dataset(data_path+'CESM_rile_length.nc')

# extent data - can be used to find ice free year for each member
SIE = xr.open_dataset(data_path+'CLIVAR_SIE_1850_2100_RCP85.nc')

## Step 2: Munge Data

In [9]:
sept_df = create_df(9)

In [10]:
#sept_df

## Step 3: Apply Data Science Method

In [11]:
x = sept_df.drop(['Month','Member', 'Ice Free Year'],axis=1)
y = sept_df.drop(['Month','Member', 'Longest Duration', 'Max Ice Lost'], axis=1)

## Step 4: Present graphs visually using 2-3 graphs

## Summary