In [None]:
!pip install sklearn

Collecting sklearn
  Downloading sklearn-0.0.tar.gz (1.1 kB)
Collecting scikit-learn
  Downloading scikit_learn-0.24.2-cp38-cp38-manylinux2010_x86_64.whl (24.9 MB)
[K     |████████████████████████████████| 24.9 MB 12.7 MB/s eta 0:00:01
[?25hCollecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-2.1.0-py3-none-any.whl (12 kB)
Collecting joblib>=0.11
  Downloading joblib-1.0.1-py3-none-any.whl (303 kB)
[K     |████████████████████████████████| 303 kB 74.1 MB/s eta 0:00:01
Building wheels for collected packages: sklearn
  Building wheel for sklearn (setup.py) ... [?25ldone
[?25h  Created wheel for sklearn: filename=sklearn-0.0-py2.py3-none-any.whl size=1316 sha256=2fd7e58442a1c424caea62eda204f3385305ceafd3d32ab978d1d72d5f995fb2
  Stored in directory: /root/.cache/pip/wheels/22/0b/40/fd3f795caaa1fb4c6cb738bc1f56100be1e57da95849bfc897
Successfully built sklearn
Installing collected packages: threadpoolctl, joblib, scikit-learn, sklearn


In [58]:
### Environment setup
import sys
sys.path.append('/content')
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import geopandas as gpd
from pystac_client import Client
import planetary_computer as pc
import os
from tqdm import tqdm
import matplotlib.pyplot as plt

env_vars = !cat /content/.env

for var in env_vars:
    key, value = var.split(' = ')
    os.environ[key] = value

storage_options={'account_name':os.environ['ACCOUNT_NAME'],\
                 'account_key':os.environ['BLOB_KEY']}


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [95]:
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.optim as optim

### Load the data

#d = pd.read_csv("https://donaldpinckney.com/books/pytorch/book/ch2-linreg/code/linreg-multi-synthetic-2.csv", header=None)
d = pd.read_csv("az://modeling-data/fluvius_data.csv", storage_options=storage_options)
d = d[d['Chip Cloud Pct']<20]
ds = d[['sentinel-2-l2a_R','sentinel-2-l2a_G','sentinel-2-l2a_B','julian_date','SSC (mg/L)']]
D = torch.tensor(ds.values, dtype=torch.float)

dims = 4
# We extract all rows and the first 4 columns, and then transpose it
x_dataset = D[:, 0:dims].t()

# We extract all rows and the last column, and transpose it
y_dataset = D[:, dims].t()

# And make a convenient variable to remember the number of input columns
n = dims 

### Feature Scaling computations

# Pre-compute the means and standard deviations of independent variables
means = x_dataset.mean(1, keepdim=True)
deviations = x_dataset.std(1, keepdim=True)

### Model definition ###

# First we define the trainable parameters A and b 
A = torch.randn((1, n), requires_grad=True)
b = torch.randn(1, requires_grad=True)

# Then we define the prediction model
#def model(x_input):
#    return A.mm(x_input) + b

# Then we define the prediction model
def model(x_input):
    x_transformed = (x_input - means) / deviations
    return A.mm(x_transformed) + b

### Loss function definition ###

def loss(y_predicted, y_target):
    return ((y_predicted - y_target)**2).sum()
### Training the model ###

# Setup the optimizer object, so it optimizes a and b.
optimizer = optim.Adam([A, b], lr=0.01)

# Main optimization loop
for t in tqdm(range(20000)):
    # Set the gradients to 0.
    optimizer.zero_grad()
    # Compute the current predicted y's from x_dataset
    y_predicted = model(x_dataset)
    # See how far off the prediction is
    current_loss = loss(y_predicted, y_dataset)
    # Compute the gradient of the loss with respect to A and b.
    current_loss.backward()
    # Update A and b accordingly.
    optimizer.step()
    #print(f"t = {t}, loss = {current_loss}, A = {A.detach().numpy()}, b = {b.item()}")

100%|██████████| 20000/20000 [00:06<00:00, 3114.79it/s]


In [96]:
from sklearn.metrics import r2_score
r2_score(y_true=np.expand_dims(y_dataset.numpy(),axis=0)[0], y_pred=y_predicted.detach().numpy()[0])

0.25049799665750605