In [None]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

%matplotlib inline

plt.rcParams['figure.figsize'] = (8, 8)

credits for baseline data and code:  Marc Harper, GA LA <br>
ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_mm_mlo.txt <br>
http://earthobservatory.nasa.gov/blogs/climateqa/mauna-loa-co2-record/


In [None]:
filename = "ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_mm_mlo.txt"
columns = ["year", "month", "decimal date", "average", "interpolated", "trend", "?"]

co2 = pd.read_csv(filename, delim_whitespace=True, skiprows=72, names=columns)
co2.dropna(inplace=True)
co2.head()

In [None]:
def make_date(row):
    year = str(int(row['year']))
    month = str(int(row['month']))
    if len(month) < 2:
        month = '0' + month
    day = '01'
    return "{}-{}-{}".format(year, month, day)

co2['Date'] = co2.apply(make_date, axis=1)
co2['Date'] = pd.to_datetime(co2['Date'], format='%Y-%m-%d')
co2['date'] = co2['Date']
co2.set_index('Date', inplace=True)
co2.head()

In [None]:
plt.plot(co2.index, co2["interpolated"])
plt.ylabel("CO_2")
plt.xlabel("Date")
plt.show()

## Build DataFrame

In [None]:
df = pd.DataFrame()
df['y'] = co2["interpolated"]
df['x'] = co2["date"].apply(lambda x: x.toordinal())
df['Date'] = co2["date"]
df.set_index('Date', inplace=True)

df.head()

## Modeling

In [None]:
X = np.vander(df['x'], 3)
y = np.log(df['y'])

from sklearn import linear_model

model = linear_model.LinearRegression()

model.fit(X, y)

plt.plot(df['x'], df['y'], label="Raw Data")
plt.plot(df['x'], np.exp(model.predict(X)), label="Predicted", color='r', linewidth='2')
plt.ylabel("CO_2")
plt.xlabel("Date")
plt.legend()

## Subtract off the Predicted Trend

In [None]:
df['y2'] = df['y'] - np.exp(model.predict(X))
plt.plot(df['x'], df['y2'])

## Analyzing the Time Series

### We can use autocorrelation to measure the self-similarity of the time series. In this case we expect a yearly cycle.



In [None]:
# Monthly
df['y2'].autocorr(lag=1)

In [None]:
# Yearly
df['y2'].autocorr(lag=12)

### As you can see there is a very strong yearly autocorrelation, as expected. We can take a look at a range of lag values with a plot from statsmodels.

In [None]:
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(df['y2'], lags=13)
plt.show()