http://ww2.amstat.org/publications/jse/v21n1/witt.pdf

https://noaadata.apps.nsidc.org/NOAA/G02135/north/monthly/data/N_08_extent_v4.0.csv

In [None]:
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt

import pandas as pd

from scipy import stats

## Read Data

In [None]:
data = pd.read_csv('N_08_extent_v4.0.csv',
                   dtype={'year': np.int32, 'extent': np.double})

In [None]:
data

In [None]:
data.dtypes

In [None]:
year = data['year']
extent = data[' extent']

In [None]:
plt.figure(figsize=(8,4))
plt.plot(year, extent, 'o')

## Try a linear fit

In [None]:
year = data['year']
extent = data[' extent']
slope, intercept, rvalue, pvalue, stderr = stats.linregress(year, extent)
yfit = slope*year + intercept

plt.figure(figsize=(6,3))
plt.plot(year, yfit, 'r-')
plt.plot(year, extent, 'o')
print(slope)
print(intercept)
print(rvalue)
print(rvalue**2)

### How far off is this fit?

In [None]:
plt.figure(figsize=(6,3))

plt.plot(year, extent - yfit, 'o')
slope
intercept

In [None]:
stderr

### How did the linear fit "fit" as time time progresses?

In [None]:
plt.figure(figsize=(20,8))
res = []
for y in range(22, len(extent)+1):
    slope, intercept, rvalue, pvalue, stderr = \
    stats.linregress(year[:y], extent[:y])
    yfit = slope*year[:y] + intercept

    plt.plot(year[:y], yfit, '-', label='%d' % (1979+y))

plt.plot(year[:y], extent[:y], 'o')
plt.legend()

### Let's try a quadratic fit

In [None]:
quadratic, linear, intercept = np.polyfit(year, extent, 2)
yfit = quadratic*year**2 + linear*year + intercept

plt.figure(figsize=(6,3))
plt.plot(year, yfit, 'r-')
plt.plot(year, extent, 'o')

rvalue = np.sqrt(1 - ((yfit - extent)**2).sum() / ((extent - np.mean(extent))**2).sum())

print(rvalue)

In [None]:
plt.plot(year, extent - yfit, 'o')

## What about a cubic?

What goes wrong here?

In [None]:
year = year
cubic, quadratic, linear, intercept = np.polyfit(year, extent, 3)
yfit = cubic*year**3 + quadratic*year**2 + linear*year + intercept

plt.figure(figsize=(4,3))
plt.plot(year, yfit, 'r-')
plt.plot(year, extent, 'o')

rvalue = np.sqrt(1 - ((yfit - extent)**2).sum() / ((extent - np.mean(extent))**2).sum())

print(rvalue)