# Example data analysis notebook

This notebook downloads and analyses some surface air temperature anomaly data from [Berkeley Earth](http://berkeleyearth.org/).

Import the required libraries.

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import requests

from data_analysis import download_data, moving_average

Use the [requests](http://docs.python-requests.org/) library to download the data file for Australia.

In [2]:
def build_url(country):
    "Build a Berkeley Earth URL to download data"
    url = f'http://berkeleyearth.lbl.gov/auto/Regional/TAVG/Text/{country.lower()}-TAVG-Trend.txt'
    return url

Make a function to download and load the data for a given country.

In [3]:
data = download_data("Australia")
data

array([[ 1.852e+03,  7.000e+00, -1.840e-01, ...,        nan,        nan,
               nan],
       [ 1.852e+03,  8.000e+00, -5.900e-01, ...,        nan,        nan,
               nan],
       [ 1.852e+03,  9.000e+00, -4.500e-01, ...,        nan,        nan,
               nan],
       ...,
       [ 2.013e+03,  7.000e+00,  1.633e+00, ...,        nan,        nan,
               nan],
       [ 2.013e+03,  8.000e+00,  1.828e+00, ...,        nan,        nan,
               nan],
       [ 2.013e+03,  9.000e+00,        nan, ...,        nan,        nan,
               nan]])

Extract the monthly temperature anomaly and calculate an approximate "decimal year" to use in plotting.

In [None]:
def extract_monthly_anomaly(data):
    "Extract the monthly anomaly from the data array and calculate a decimal year."
    decimal_year = data[:, 0] + 1/12*(data[:, 1] - 1)
    anomaly = data[:, 2]
    return decimal_year, anomaly

In [None]:
decimal_year, temperature_anomaly = extract_monthly_anomaly(data)

Plot the data so we can see what it's like.

In [None]:
plt.figure(figsize=(10, 6))
plt.title("Temperature anomaly for Australia")
plt.plot(decimal_year, temperature_anomaly)
plt.xlabel('year')
plt.ylabel('temperature anomaly (C)')
plt.grid()
plt.xlim(decimal_year.min(), decimal_year.max())

The data are kind of noisy at this scale so let's calculate a 12-month moving average for a smoother time series.

In [None]:
def moving_average(data, window_size):
    "Calculate a moving average over 1D data using the given window size"
    average = np.full(data.size, np.nan)
    half_window = window_size // 2
    for i in range(half_window, data.size - half_window):
        average[i] = np.mean(data[i - half_window : i + half_window])
    return average

In [None]:
moving_avg = moving_average(temperature_anomaly, window_size=12)

In [None]:
plt.figure(figsize=(10, 6))
plt.title("Temperature anomaly for Australia")
plt.plot(decimal_year, temperature_anomaly, label="anomaly")
plt.plot(decimal_year, moving_avg, label="12-month moving average", linewidth=3)
plt.xlabel('year')
plt.ylabel('temperature anomaly (C)')
plt.legend()
plt.grid()
plt.xlim(decimal_year.min(), decimal_year.max())

In [None]:
def test_moving_average():
    avg = moving_average(np.ones(10),4)
    assert np.any(np.isnan(avg))
    assert np.allclose(avg[3],1.0)
    return

In [None]:
test_moving_average()