<a href="https://colab.research.google.com/github/halldm2000/NOAA-AI-2020-TUTORIAL/blob/master/curve_fitting_neural_net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Curve Fitting with PyTorch


**Download Data**

In [None]:
source = "ftp://sidads.colorado.edu/DATASETS/NOAA/G02135/north/daily/data/N_seaice_extent_daily_v3.0.csv"
dest    = "/content/data/"
!wget  -c -e robots=off -P {dest} {source}

**Data Loading Routine**


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from datetime import datetime, timedelta
import torch, numpy as np

# make directory to store images
!mkdir -p images

def load_data():
  global xscaler, yscaler, X,Y, year

  # read sea-ice extent data from CSV file
  csv    = "/content/data/N_seaice_extent_daily_v3.0.csv"
  data   = np.loadtxt(csv, skiprows=2, delimiter=',', usecols=(0,1,2,3,4))
  extent = data[:,3]
  year   = data[:,0]
  nrows  = data.shape[0]

  # get day of the year
  yday   = np.zeros_like(year) 
  for i in range(nrows):
    yr, month, day = data[i,0:3].astype(int)
    yday[i] = datetime(yr,month,day).timetuple().tm_yday
    
  # put variables in column format
  X    = np.reshape(yday,  (-1,1))
  Y    = np.reshape(extent,(-1,1))
  year = np.reshape(year,  (-1,1))

  # fir data in range [-1,1]
  xscaler = StandardScaler()
  yscaler = StandardScaler()
  x = torch.from_numpy(xscaler.fit_transform(X))
  y = torch.from_numpy(yscaler.fit_transform(Y))

  return x,y
  
def shuffle_blocks(blocksize = 365):
  global x,y,X,Y, year
  
  nrows = len(x)//blocksize
  inds  = torch.arange(0,nrows*blocksize)
  inds  = inds.view(nrows, blocksize)
  inds  = inds[torch.randperm(nrows),:].flatten()
  x,y,X,Y, year = x[inds], y[inds], X[inds], Y[inds], year[inds]
  

**Plotting Routine**

In [None]:
def plot(interval = 1):

  global X,Y, X_train, Y_train, X_val, Y_val

  # exit if it's not time to plot
  if epoch%interval != 0: return

  if(epoch==0):
    X_train = xscaler.inverse_transform(x_train)
    Y_train = yscaler.inverse_transform(y_train)
    X_val   = xscaler.inverse_transform(x_val)
    Y_val   = yscaler.inverse_transform(y_val)

  # plot observations
  plt.figure(figsize=(10,9),dpi=72*1.25)

  plt.subplot(2,1,1)
  plt.scatter(X_train,Y_train,alpha=0.2, s=5, label="training data")
  plt.scatter(X_val,Y_val,alpha=0.2, s=5, label="validation data")
  plt.legend()

  plt.ylim(Y.min(), Y.max())

  # plot curve-fit
  Y_pred = yscaler.inverse_transform(prediction)
  plt.scatter(X_train,Y_pred,c='black',s=5);

  # print equation
  #terms = [f"${w[i]:+.2f} X^{i} $" for i in range(len(w))]
  #eqn   = "y ="+ ''.join(terms)+ f"     $X = (x - {x0[0]:.3f})$"
  #plt.text(0.5, 1.02, eqn, transform=plt.gca().transAxes, fontsize=10, horizontalalignment='center')

  # set title, axis labels
  plt.xlabel("day of the year", fontsize=12)
  plt.ylabel("million sq km ", fontsize=12);
  plt.title(f"Annual Artic Sea-ice Extent   Epoch={epoch}   Training loss={loss.item():.3f}",fontsize=14)

  # plot train and validation loss
  plt.subplot(2,1,2)
  epochs = range( len(train_loss) )
  plt.semilogy(epochs, train_loss, label="training loss")
  plt.semilogy(epochs, val_loss,'g', label="validation loss")
  plt.xlabel('epoch')
  plt.ylabel('average error')
  plt.ylim(bottom=1e-2)
  plt.legend()

  # save figure to file
  plt.savefig(f'./images/img_{epoch//interval:04d}',bbox_inches='tight');
  plt.show()

**Training and Validation**

In [None]:
import torch, numpy as np
torch.manual_seed(0)

# DATA
x,y = load_data()
shuffle_blocks(blocksize=100)
ntrain, nval, ntest = (len(x)*torch.tensor([0.50, 0.25, 0.25])).int()
x_train, x_val, xtest = x[:ntrain], x[ntrain:-ntest], x[-ntest:]
y_train, y_val, ytest = y[:ntrain], y[ntrain:-ntest], y[-ntest:]

# MODEL
def relu(x): return x*(x>0)+ 0.1*x*(x<0)
N = 20
b = torch.randn(N+1, requires_grad=True)
w1= torch.randn(N,   requires_grad=True)
w2= torch.randn(N,   requires_grad=True)
def model(x): return sum(w2[i]*relu(w1[i]*x+b[i]) for i in range(N))+b[N]

# CONFIGURE
optimizer = torch.optim.AdamW(params = [b, w1, w2], lr=1e-2)
loss_fcn  = torch.nn.MSELoss()

train_loss, val_loss=[],[]
for epoch in range(1000+1):

  # TRAIN
  prediction = model(x_train)
  optimizer.zero_grad()
  loss = loss_fcn(prediction, y_train)
  loss.backward()
  optimizer.step()
  train_loss.append(loss.item())

  # VALIDATE
  with torch.no_grad():

    p_val = model(x_val)
    loss_val = loss_fcn(p_val, y_val)
    val_loss.append(loss_val.item())
    plot(interval=20)



**Combine images into a video**

In [None]:
!ffmpeg -loglevel warning -i ./images/img_%04d.png -vf scale=1280:-2 -pix_fmt yuv420p -y out.mp4

**Display video in the notebook**

In [None]:
from IPython.display import HTML
from base64 import b64encode
mp4 = open('out.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

HTML("""
<video width=800 controls><source src="%s" type="video/mp4"></video>
""" % data_url)

In [None]:
# remove images
!rm ./images/img_*.png