# This code is used for Ridge and Lasso regression

In [2]:
import numpy as np
from netCDF4 import Dataset
from sklearn import linear_model
import matplotlib.pyplot as plt

In [3]:
n_features = 10
f1 = Dataset('../multi_eof.nc', 'r')
f2 = Dataset('../gpcc.r.1x1.nc', 'r')
r = f2.variables['r']
x0 = f1.variables['eof_ts'][0:n_features,:]
x = np.transpose(x0)
#np.shape(x)

In [4]:
lat = f2.variables['lat'][:]
lon = f2.variables['lon'][:]
lat_bnds, lon_bnds = [-18, -2], [287, 313]
lat_inds = np.where((lat > lat_bnds[0]) & (lat < lat_bnds[1]))[0]
lon_inds = np.where((lon > lon_bnds[0]) & (lon < lon_bnds[1]))[0]
#y = rain.variables['r'][:,:,np.min(lat_inds):np.max(lat_inds),np.min(lon_inds):np.max(lon_inds)]
y = f2.variables['r'][:,:,lat_inds,lon_inds]

In [5]:
nyear = len(f2.variables['year'])
nmonth = len(f2.variables['month'])
nlat = len(lat_inds)
nlon = len(lon_inds) 

# This is for Ridge regression to find out the best alpha

In [6]:
[i for i in range(0,nyear,3)]

[0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36]

In [11]:
n_alphas = 200
alphas = np.logspace(-1, 15, n_alphas)

k_list = [i for i in range(0,nyear,3)]
k_list[len(k_list)-1] = nyear # make last year

alpha_map = np.zeros((nmonth,nlat,nlon))

for imonth in range(nmonth): # each month has its own model
    for ilat in range(nlat): # each location has its own model
        for ilon in range(nlon):
            tRSS = []
            
            for ia in range(len(alphas)):
                val_RSS = 0.
                
                for ik in range(len(k_list)-1):
                    val_set = [i for i in range(k_list[ik],k_list[ik+1])]
                    train_set = [i for j, i in enumerate(range(nyear)) if j not in val_set]
                    
                    reg = linear_model.Ridge (alpha = alphas[ia])
                    reg.fit(x[train_set,:],y[train_set,imonth,ilat,ilon])
                    predictY = reg.predict(x[val_set,:])
                    
                    val_residuals = y[val_set,imonth,ilat,ilon]-predictY[:]
                    val_RSS = (val_residuals**2).sum() + val_RSS
                    
                    del val_set,train_set,predictY,val_residuals,reg
                
                tRSS.append(val_RSS)
                del val_RSS
                
            alpha_map[imonth,ilat,ilon] = alphas[np.argmin(tRSS)]
            del tRSS
print("completed!")

completed!


In [12]:
predictY = np.zeros((nyear,nmonth,nlat,nlon))
                               
for imonth in range(nmonth):
    for ilat in range(nlat):
        for ilon in range(nlon):
            reg = linear_model.Ridge (alpha = alpha_map[imonth,ilat,ilon])
            reg.fit(x[:,:],y[:,imonth,ilat,ilon])
            predictY[:,imonth,ilat,ilon] = reg.predict(x)
            del reg
print("completed!")

completed!


In [13]:
pr = Dataset('predict.r.ridge.v2.nc', 'w',format='NETCDF3_64BIT')
pr.description = 'predicted rainfall anomalies using Ridge regression'

pr.createDimension('year', nyear)
pr.createDimension('month', nmonth)
pr.createDimension('lat', nlat)
pr.createDimension('lon', nlon)

fyear = pr.createVariable('year', 'f', ('year',))
fmonth = pr.createVariable('month', 'f', ('month',))
flat = pr.createVariable('lat', 'f', ('lat',))
flon = pr.createVariable('lon', 'f', ('lon',))
newr = pr.createVariable('r', 'f4', ('year', 'month','lat','lon'),fill_value=predictY[0,0,0,0])

fyear[:] = f2.variables['year']
fmonth[:] = f2.variables['month']
flat[:] = f2.variables['lat'][lat_inds]
flon[:] = f2.variables['lon'][lon_inds]
newr[:,:,:,:] = predictY[:,:,:,:]
flat.units = "degrees_north"
flat.long_name = "Latitude"
flon.units = "degrees_east"
flon.long_name = "Longitude"
newr.long_name = 'predicted rainfall anomalies using Ridge regression'
pr.close()

# This is for Lasso regression to find out the best alpha

In [None]:
n_alphas = 200
alphas = np.logspace(-10, 4, n_alphas)

k_list = [i for i in range(0,nyear,3)]
k_list[len(k_list)-1] = nyear # make last year

alpha_map = np.zeros((nmonth,nlat,nlon))

for imonth in range(nmonth): # each month has its own model
    for ilat in range(nlat): # each location has its own model
        for ilon in range(nlon):
            tRSS = []
            
            for ia in range(len(alphas)):
                val_RSS = 0.
                
                for ik in range(len(k_list)-1):
                    val_set = [i for i in range(k_list[ik],k_list[ik+1])]
                    train_set = [i for j, i in enumerate(range(nyear)) if j not in val_set]
                    
                    reg = linear_model.Lasso (alpha = alphas[ia])
                    reg.fit(x[train_set,:],y[train_set,imonth,ilat,ilon])
                    predictY = reg.predict(x[val_set,:])
                    
                    val_residuals = y[val_set,imonth,ilat,ilon]-predictY[:]
                    val_RSS = (val_residuals**2).sum() + val_RSS
                    
                    del val_set,train_set,predictY,val_residuals,reg
                
                tRSS.append(val_RSS)
                del val_RSS
                
            alpha_map[imonth,ilat,ilon] = alphas[np.argmin(tRSS)]
            del tRSS
print("completed!")

In [None]:
predictY = np.zeros((nyear,nmonth,nlat,nlon))
                               
for imonth in range(nmonth):
    for ilat in range(nlat):
        for ilon in range(nlon):
            reg = linear_model.Lasso (alpha = alpha_map[imonth,ilat,ilon])
            reg.fit(x[:,:],y[:,imonth,ilat,ilon])
            predictY[:,imonth,ilat,ilon] = reg.predict(x)
            del reg
print("completed!")

In [None]:
pr = Dataset('predict.r.lasso.v2.nc', 'w',format='NETCDF3_64BIT')
pr.description = 'predicted rainfall anomalies using Lasso regression'

pr.createDimension('year', nyear)
pr.createDimension('month', nmonth)
pr.createDimension('lat', nlat)
pr.createDimension('lon', nlon)

fyear = pr.createVariable('year', 'f', ('year',))
fmonth = pr.createVariable('month', 'f', ('month',))
flat = pr.createVariable('lat', 'f', ('lat',))
flon = pr.createVariable('lon', 'f', ('lon',))
newr = pr.createVariable('r', 'f4', ('year', 'month','lat','lon'),fill_value=predictY[0,0,0,0])

fyear[:] = f2.variables['year']
fmonth[:] = f2.variables['month']
flat[:] = f2.variables['lat'][lat_inds]
flon[:] = f2.variables['lon'][lon_inds]
newr[:,:,:,:] = predictY[:,:,:,:]
flat.units = "degrees_north"
flat.long_name = "Latitude"
flon.units = "degrees_east"
flon.long_name = "Longitude"
newr.long_name = 'predicted rainfall anomalies using Ridge regression'
pr.close()