# run with a couple of images from data uploaded on google drive using colab

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import requests
import os
import pandas as pd
import seaborn as sns
import pickle

import tensorflow as tf
from tensorflow import keras
from keras import metrics


from sklearn import tree
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# generate a list of dates
def generate_dates(dates_list, num_dates):
  """
  take in number of dates to create
  return list of year, month, days

  """

  # dates_data = pd.read_csv('/content/drive/My Drive/207_data/saved_data/N_seaice_extent_daily_v3.0.csv', usecols = [0, 1, 2], header = 0, skiprows = 1, names = ['Year', 'Month', 'Day'])
  dates_data = dates_list

  dates_sample = dates_data.sample(n=num_dates, random_state = 0)

  return dates_sample

In [None]:
# dictionary for producing month numbers
months = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}


# define path
path = '207_data/'

# call images from google folder and create list of input image data
def open_images(list_dates):
  """
  take list of generated dates
  crop images to clean data
  return array of image data

  """
  image_data = []

  for i in list_dates.index:
    year = str(list_dates['Year'][i])
    month_num = list_dates['Month'][i]
    month = months[month_num]
    day = list_dates['Day'][i]
    file_name = f"{year}_{month_num}_{day}.png"

    im = Image.open(os.path.join(path, year, month, file_name))
    im_dat = np.array(im.convert('RGB').getdata())
    im_dat = np.reshape(im_dat, (430, 400, 3))
    im_dat = im_dat[2:340, 50:330]

    image_data.append(im_dat)

  image_data = np.reshape(image_data, (len(image_data), 338, 280, 3))
  return image_data


In [None]:
# create a dataframe of reference colors for sea ice concentration percentage
sic_color_mapping = np.load('207_data/saved_data/sic_mapping.npy')

sic_color_mapping = np.concatenate((sic_color_mapping[:, :3], sic_color_mapping[:, -1:]), axis=1)

sic_color_df = pd.DataFrame(sic_color_mapping, columns = ["Red", "Green", "Blue", "Percent"])

# create a reference list from data frame records
list_mst = sic_color_df[['Red', 'Green', 'Blue']].values.tolist()

# sea ice concentration image array
def sic_output(x_data):
  """
  take list of image data with 3 channels
  return list of sea ice concentration image arrays

  """
  sic_images_arr = []

  for i in range(x_data.shape[0]):
    flattened_pix = x_data[i].reshape(94640, 3)

    sc_image = []

    for pixel_to_search in flattened_pix:
      df_idx = np.where(list(pixel_to_search.tolist() == plist for plist in list_mst))[0]
      if df_idx.size == 0:
        percent = 0
      else:
        percent = sic_color_df['Percent'].iloc[df_idx].values[0]
      sc_image.append(percent)

    sc_image = np.reshape(sc_image, (338, 280))

    sic_images_arr.append(sc_image)

  sic_images_arr = np.reshape(sic_images_arr, (len(sic_images_arr), 338, 280))

  return sic_images_arr

In [None]:
# csv of valid dates from NOAA website

dates_data = pd.read_csv('207_data/saved_data/N_seaice_extent_daily_v3.0.csv', usecols = [0, 1, 2], header = 0, skiprows = 1, names = ['Year', 'Month', 'Day'])

In [None]:
# create training data

number_of_images = # whatever you want

train_inputs = generate_dates(dates_data.iloc[:-2961], number_of_images) # need to change to 3500 for training to randonly select (save 1500 most recent images for testing for 5000 total)
train_sic_transform = train_inputs[['Month', 'Day']]
train_sic_transform['Year'] = train_inputs['Year'] + 4


intersection = pd.merge(train_inputs, train_sic_transform, on = ['Year', 'Month', 'Day'])

train_inputs_im_only = pd.concat([train_inputs, intersection]).drop_duplicates(keep=False)
train_inputs_im_only

train_inputs_sic_only = pd.concat([train_sic_transform, intersection]).drop_duplicates(keep=False)
train_inputs_sic_only

train_inputs_sorted = train_inputs.sort_values(by =['Year', 'Month', 'Day'])
train_sic_dates_sorted = train_sic_transform.sort_values(by =['Year', 'Month', 'Day'])

In [None]:
# build ANN

def build_ann():
  """
  add hidden layer to regression model
  """
