In [None]:
import pandas as pd
import datacube
import matplotlib.pyplot as plt
import xarray as xr
import datetime as dt
import numpy as np

In [None]:
yield_nsw = pd.read_csv("ancillary_data/NSW_Yield_Data.csv", parse_dates=['time'])

In [None]:
yield_nsw

In [None]:
harden_lat = (-34.7, -34.6)
harden_lon = (148.3, 148.4)
time_range = (1989, 2017)

In [None]:
# Connect to the datacube
dc = datacube.Datacube(app='Coastal erosion')

# Create the 'query' dictionary object, which contains the longitudes, latitudes and time provided above
query = {
    'y': harden_lat,
    'x': harden_lon,
    'time': time_range,
    'output_crs': 'EPSG:28352',
    'resolution': (-25, 25)
}

# Load Landsat 8 data for the time and area in the query. This may take several minutes, so please be patient.
landsat8_ds = dc.load(
    product='ls8_nbart_geomedian_annual',
    **query,
    measurements=['red', 'green', 'blue', 'nir']
)

# Load Landsat 7 data for the time and area in the query. This may take several minutes, so please be patient.
landsat7_ds = dc.load(
    product='ls7_nbart_geomedian_annual',
    **query,
    measurements=['red', 'green', 'blue', 'nir']
)

# Load Landsat 5 data for the time and area in the query. This may take several minutes, so please be patient.
landsat5_ds = dc.load(
    product='ls5_nbart_geomedian_annual',
    **query,
    measurements=['red', 'green', 'blue', 'nir']
)

In [None]:
print(landsat5_ds.time)
print(landsat7_ds.time)
print(landsat8_ds.time)

In [None]:
landsat5_filtered_1 = landsat5_ds.sel(time=landsat5_ds.time < np.datetime64('2001-01-01'))
landsat5_filtered_2 = landsat5_ds.sel(time=landsat5_ds.time > np.datetime64('2003-01-01'))
landsat7_filtered = landsat7_ds.sel(time=landsat7_ds.time < np.datetime64('2004-01-01'))

landsat_combined = xr.concat([landsat5_filtered_1, landsat7_filtered, landsat5_filtered_2, landsat8_ds], dim='time')

In [None]:
np.asarray(landsat_combined.time)

In [None]:
landsat_combined['ndvi'] = (landsat_combined.nir - landsat_combined.red)/(landsat_combined.nir + landsat_combined.red)
landsat_mean = landsat_combined.mean(dim=['x','y'])

In [None]:
ndvi_df = ndvi_array.to_dataframe()

In [None]:
ndvi_yield = pd.merge(yield_nsw, ndvi_df, on='time')
ndvi_yield

In [None]:
ndvi_yield.plot.scatter(x='ndvi', y='Yield_per_hectare_t')

In [None]:
from sklearn import linear_model

X = np.asarray(ndvi_yield['ndvi']).reshape(-1, 1)
y = ndvi_yield['Yield_per_hectare_t']

lm = linear_model.LinearRegression()
model = lm.fit(X,y)

predictions = lm.predict(X)
print(predictions)

In [None]:
plt.plot(X, predictions)

In [None]:
lm.score(X,y)

In [None]:
fc_query  = {
    'y': harden_lat,
    'x': harden_lon,
    'time': time_range
}

fc = dc.load(
    product='ls8_fc_albers',
    **fc_query,
)