# COMPOSITE ANALYSIS

<div class="alert alert-block alert-success" style="font-size:16px">In this exercise, you will composite wintertime surface temperature anomalies across the Northern Hemisphere that are associated with the North Atlantic Oscillation (NAO), a meridional pressure dipole between Iceland and the Azores. You will choose composite criteria and then compute the composite difference in surface temperatures, along with significance testing.

More information on the NAO can be found here:
(1) https://en.wikipedia.org/wiki/North_Atlantic_oscillation
(2) https://www.cpc.ncep.noaa.gov/products/precip/CWlink/pna/nao.shtml</div>

<div style="font-size:16px"><b>Make sure to follow the directions written in the document. <font color=green>Green</font> boxes contain important information and have <u>completed</u> code after them. <font color=gold>Yellow</font> boxes contain options that you need to select or make to continue on with the problem. <font color=blue>Blue</font> boxes contain instructions, and the cells after them are for YOU to complete in-class.</b></div>

In [None]:
#=========================================================================
# Libraries to import.
#=========================================================================
import os,warnings
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from netCDF4 import Dataset,num2date
from datetime import datetime
from dateutil.rrule import rrule, MONTHLY
from scipy import stats
from mpl_toolkits.basemap import Basemap,maskoceans,interp,shiftgrid

# LOAD THE MONTHLY-MEAN NAO INDEX

<div class="alert alert-block alert-success" style="font-size:16px">Load the time series and put the result into a dictionary called NAO.</div>

In [None]:
NAOFile = 'NAO_Monthly_Index_1950_Present.txt'

NAOIndex = np.loadtxt(NAOFile,usecols=np.arange(1,13)) # These values are already standardized.
NAOIndex = NAOIndex.ravel() # Make the index from a 2-D array into a single vector. 

<div class="alert alert-block alert-success" style="font-size:16px">Make arrays for date information for the NAO index.</div>

In [None]:
numValues = NAOIndex.size

start_date = datetime(1950, 1, 1)
NAODate = np.asarray(list(rrule(freq=MONTHLY, count=numValues, dtstart=start_date))) # Makes an array of monthly dates
NAOMonth = np.asarray([d.month for d in NAODate]) # Get the month value for each date.
NAOYear = np.asarray([d.year for d in NAODate]) # Get the year value for each date.

<div class="alert alert-block alert-success" style="font-size:16px">Make a plot of the index to make sure it looks "good."</div>

In [None]:
fig = plt.figure(figsize=(12,12))
ax = fig.add_subplot(111)
ax.set_position([0.1,0.1,0.9,0.6])
ax.plot(NAODate,NAOIndex,'r-',lw=2)
ax.set_title('Monthly-Mean NAO Index',name='Arial',size=18,weight='bold')
ax.set_xlabel('Year',name='Arial',size=16,weight='bold')
ax.set_ylabel('Index - [std]',name='Arial',size=16,weight='bold')

years = mdates.YearLocator(5)   # every year
months = mdates.MonthLocator(2)  # every month
years_fmt = mdates.DateFormatter('%Y')

ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(years_fmt)
ax.xaxis.set_minor_locator(months)

for i in ax.get_xticklabels() + ax.get_yticklabels():
    i.set_family('Arial')
    i.set_size(14)
    
fig.autofmt_xdate()

# LOAD THE MONTHLY-MEAN SURFACE TEMPERATURES
<div class="alert alert-block alert-success" style="font-size:16px">Load these from the provided netCDF file.</div>

In [None]:
ncFile = 'surfT.mon.mean.nc'
surfT = {}
with Dataset(ncFile,'r') as nc: # Opens the netCDF file for reading.
    lons = nc.variables['lon'][:]-360.
    lats = nc.variables['lat'][:]
    
    ilat = np.where(lats>=10)[0] # Only load data for the Northern Hemisphere outside of tropics.
    
    lat,lon = np.meshgrid(lats[ilat],lons) # Meshgrid needed for plotting.
    
    # Get the time information for the file.
    time = nc.variables['time'][:]
    timeUnits = nc.variables['time'].units
    
    # Make dates for the surface temperature file.
    sfcTDate = num2date(time,timeUnits,calendar='standard')
    sfcTMonth = np.asarray([d.month for d in sfcTDate])
    sfcYear = np.asarray([d.year for d in sfcTDate])
    
    # Extract the necessary data and its units.
    sfcT = nc.variables['surfT'][:,ilat,:] #time x lat x lon
    sfcTUnits = nc.variables['surfT'].units


<div class="alert alert-block alert-success" style="font-size:16px">Get a sense of the data structure.</div>

In [None]:
print(sfcT.shape) # time x lat x lon
print(sfcTDate[-50:]) # Print the last 50 dates in the file.

# LOAD THE MONTHLY-MEAN SURFACE AIR TEMPERATURE CLIMATOLOGY
<div class="alert alert-block alert-success" style="font-size:16px">Data are dimensioned 12 (Jan-Dec) x lat x lon</div>

In [None]:
ncFile = 'surfT.mon.ltm.nc'
with Dataset(ncFile,'r') as nc:
    lats = nc.variables['lat'][:]
    
    ilat = np.where(lats>=10)[0] # Only load data for the Northern Hemisphere outside of tropics.
    climo = nc.variables['surfT'][:,ilat,:] # Array for long-term mean.
    
print(climo.shape) # time x lat x lon

# CALCULATE MONTHLY ANOMALIES
<div class="alert alert-block alert-success" style="font-size:16px">
e.g., Subtract the long-term January value (surfT['climo'][0,:,:]) from ALL January values in surfT['data'])</div>

In [None]:
ano = np.ones_like(sfcT)*np.nan
for i in range(1,13):
    x = np.where(sfcTMonth==i)[0] # Search for all cases when the month is i.
    ano[x,:,:] = sfcT[x,:,:] - climo[i-1,:,:] # Subtract the long-term mean from ALL 
                                                                         #values of that particular month.

# SET THE CRITERIA FOR THE COMPOSITES AND FIND EVENTS
<div class="alert alert-block alert-warning" style="font-size:16px"> Define positive NAO conditions to be when the index exceeds some value, and define negative NAO conditions to be when the index is less than a value. You can start with +1 and -1 for now, but go back later and change these to see how sensitive your results are to the values chosen. Note that they don't have to be "symmetric" (e.g., you can choose +2 and -1). Whatever you choose, ONLY CONSIDER VALUES FROM November through April from the NAO index (and hence temperature). Use np.where() to find the indices in the NAO index where is this true. </div>

In [None]:
posNAO = ?
negNAO = ?

print('Number of Positive NAO Months: %d'%len(posNAO))
print('Number of Negative NAO Months: %d'%len(negNAO))

<div class="alert alert-block alert-info" style="font-size:16px"> Find the corresponding dates for each composite type in the surface temperature array. You can use a loop OR look up the numpy method in1d(). </div>

In [None]:
posCompDates = ?
negCompDates = ?

# COMPOSITES AND SIGNIFICANCE TESTING
<div class="alert alert-block alert-info" style="font-size:16px"> Use np.nanmean() to calculate the composite mean surface temperature ANOMALIES for positive and for negative NAO cases.</div>

In [None]:
posComp = ?
negComp = ? 

<div class="alert alert-block alert-info" style="font-size:16px"> Calculate the composite DIFFERENCE (positive - negative)</div>

In [None]:
compDiff = ?

<div class="alert alert-block alert-info" style="font-size:16px">Calculate the significance of your composite difference using the t-test (difference of means). This will be done grid point by grid point. See notes for the equations & definitions.</div>

In [None]:
n1 = ? # Number of positive NAO events
n2 = ? # Number of negative NAO events
dof = ? # Degrees of freedom

s1 = ? # Sample standard deviation of temperature for positive NAO events.
s2 = ? # Sample standard deviation of temperature for negative NAO events.
sigma = ? # Pooled variance --> See notes.

tStatistic = ? # See notes. Will be dimensioned lat x lon (i.e., a "map" of t-scores)

# Returns the p-value (lat x lon) for a two-sided t-test corresponding to each t-score.
pval = 2*stats.t.cdf(-abs(tStatistic),dof*np.ones(tStatistic.shape))


# PLOT YOUR RESULTS
<div class="alert alert-block alert-success" style="font-size:16px">Function to plot surface air temperatures and significance only over land.</div>

In [None]:
#=========================================================================
# Function to plot surface air temperatures and significance only over land.
#=========================================================================
def LandOnly(field,lon,lat):
    data,lon1 = shiftgrid(180.,field.T,lon[:,0]+360.,start=False)
    data = data[::-1]
    lat1 = lat[0,::-1]

    lon2 = np.linspace(-180.,178.5,lon1.size*3)
    lat2 = np.linspace(-90,90.,lat1.size*5)
    lon2,lat2 = np.meshgrid(lon2,lat2)

    data2 = interp(data,lon1,lat1,lon2,lat2,checkbounds=False,masked=False,order=1)
    mdata = maskoceans(lon2,lat2,data2,resolution='c',grid=10,inlands=True)

    return mdata,lon2,lat2


<div class="alert alert-block alert-success" style="font-size:16px">Apply the LandOnly function to the composite difference and p-value arrays for plotting.</div>

In [None]:
landT,lon2,lat2 = LandOnly(compDiff.T,lon,lat)
landPval = LandOnly(pval.T,lon,lat)[0]

<div class="alert alert-block alert-warning" style="font-size:16px">Set a significance value (0.01, 0.05, or 0.1) that you wish to show on your plot.</div>

In [None]:
alpha = ?

# Boolean variable that to plot stippling where significant over land 
# (i.e., where the condition is true).
stipple = (landPval<=alpha) & (~landPval.mask) 

<div class="alert alert-block alert-info" style="font-size:16px">Make the plot.</div>

In [None]:
# Contour information for the colorbar. You may need to change this, depending on your composite criteria chosen.
cmin = -3; cmax = 3.1; cint = 0.2; clevs = np.round(np.arange(cmin,cmax,cint),1)
nlevs = len(clevs)-1

# Feel free to change the actual colorbar to whatever you want. Make sure to use a diverging colorbar.
cmap = plt.get_cmap(name='seismic',lut=nlevs) 

# Add an appropriate title.
titleText = ? 


plt.figure()

# Set up the projection and map.
m = Basemap(projection='npstere',lon_0=-100.,boundinglat=10.,round=True)
x2,y2 = m(lon2,lat2)
m.drawcoastlines(linewidth=3)
m.drawmapboundary(linewidth=2)

# Contour compDiff. You may need to transpose the array (compDiff.T) for plotting.
cs = m.contourf(?)
cmap._lut[nlevs//2-1:nlevs//2+1] = [1.,1.,1.,1.] # Puts white in the middle (i.e., 0 value)

# Adds stippling over statistically significant composite difference values.
m.plot(x2[stipple][::3],y2[stipple][::3],'o',color='Gold',markersize=1.5) #

plt.title(titleText,name='Arial',weight='bold',size=20)

cbar = m.colorbar(cs,size='4%')

# Set the proper units for the composite field.
cbar.ax.set_ylabel(?) 