In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

features_filepath = '/content/drive/MyDrive/LakeRegression/340_Veri_toplam_temiz.xlsx'
feats_df = pd.read_excel(features_filepath)
selected_feature = 'Klorofil-a (µg/L)'
feats_df = feats_df[['Date', 'Station', selected_feature, 'X', 'Y']]
feats_df.head()

Unnamed: 0,Date,Station,Klorofil-a (µg/L),X,Y
0,2017-04-27,1,86.14,235,537
1,2017-04-27,2,61.24,280,427
2,2017-04-27,3,48.4,325,340
3,2017-04-27,4,39.7,345,263
4,2017-04-27,5,72.52,398,165


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
feats_df[selected_feature] = scaler.fit_transform(feats_df[selected_feature].values.reshape(-1, 1))
feats_df.head()

Unnamed: 0,Date,Station,Klorofil-a (µg/L),X,Y
0,2017-04-27,1,2.643996,235,537
1,2017-04-27,2,1.361115,280,427
2,2017-04-27,3,0.699582,325,340
3,2017-04-27,4,0.251347,345,263
4,2017-04-27,5,1.942276,398,165


In [None]:
import os
import h5py
import numpy as np
from tqdm import tqdm

np.random.seed(0)

unique_dates = feats_df['Date'].unique()

X_train = []
y_train = []
X_val = []
y_val = []
X_test = []
y_test = []
patch_size = 9
for i in tqdm(range(1, 35)):
    if i != 22 and i != 23:
        folder_path = f'/content/drive/MyDrive/LakeRegression/data/{i}'
        for j in range(10):
            file_path = f'{folder_path}/station_{j}_{patch_size}.h5'
            if os.path.isfile(file_path):
                with h5py.File(file_path, 'r') as f:
                    a_group_key = list(f.keys())[0]
                    image = np.array(f[a_group_key])

                    unique_date = unique_dates[i - 1]
                    unique_station = j + 1
                    label = feats_df.loc[(feats_df['Date'] == unique_date) & (feats_df['Station'] == unique_station)][selected_feature].values[0]

                    date = pd.to_datetime(unique_date)
                    if date < pd.to_datetime('2019-03-15'):
                        X_train.append(image)
                        y_train.append(label)
                    elif date >= pd.to_datetime('2019-03-15') and date < pd.to_datetime('2019-05-01'):
                        X_val.append(image)
                        y_val.append(label)
                    else:
                        X_test.append(image)
                        y_test.append(label)

print(f'Train % {len(X_train) / (len(X_train) + len(X_val) + len(X_test))} | Val % {len(X_val) / (len(X_train) + len(X_val) + len(X_test))} | Test % {len(X_test) / (len(X_train) + len(X_val) + len(X_test))}')

X_train = np.array(X_train)
y_train = np.array(y_train)
X_val = np.array(X_val)
y_val = np.array(y_val)
X_test = np.array(X_test)
y_test = np.array(y_test)

print(f'Train shape: {X_train.shape} | {y_train.shape} | Val shape: {X_val.shape} | {y_val.shape} | Test shape: {X_test.shape} | {y_test.shape}')

100%|██████████| 34/34 [03:57<00:00,  7.00s/it]

Train % 0.8125 | Val % 0.09375 | Test % 0.09375
Train shape: (260, 12, 9, 9) | (260,) | Val shape: (30, 12, 9, 9) | (30,) | Test shape: (30, 12, 9, 9) | (30,)





In [None]:
import torch
from torch.utils.data import Dataset, DataLoader, SequentialSampler
from torchvision import transforms

torch.manual_seed(0)

class LakeDataset(Dataset):
    def __init__(self, X, y, transform=None):
        self.X = X.astype(np.float32)
        self.y = y.astype(np.float32)
        self.transform = transform

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        image = self.X[idx]
        label = self.y[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

means = []
stds = []
for i in range(12):
    means.append(np.mean(X_train[:, i, :, :]))
    stds.append(np.std(X_train[:, i, :, :]))

transform = transforms.Compose([
    transforms.Lambda(lambda x: torch.from_numpy(x).float()),
    transforms.Normalize(means, stds)
])

train_dataset = LakeDataset(X_train, y_train, transform=transform)
val_dataset = LakeDataset(X_val, y_val, transform=transform)
test_dataset = LakeDataset(X_test, y_test, transform=transform)

# use sequential sampler to preserve the date order
train_loader = DataLoader(train_dataset, batch_size=32, sampler=SequentialSampler(train_dataset))
val_loader = DataLoader(val_dataset, batch_size=32, sampler=SequentialSampler(val_dataset))
test_loader = DataLoader(test_dataset, batch_size=32, sampler=SequentialSampler(test_dataset))

print(f'Train loader: {len(train_loader)} | Val loader: {len(val_loader)} | Test loader: {len(test_loader)}')

Train loader: 9 | Val loader: 1 | Test loader: 1


In [None]:
import torch
from torch import nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=12, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.dropout = nn.Dropout(0.1)
        self.fc1 = nn.Linear(20736, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

In [None]:
from torch import optim
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

def train(model, train_loader, val_loader, criterion, optimizer, epochs, device):
    print(f'Using device: {device}')
    for epoch in range(epochs):
        train_loss = 0.0
        train_predictions = []
        train_actuals = []
        model.train()
        for X, y in train_loader:
            X = X.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            y_hat = model(X)
            loss = criterion(y_hat, y.unsqueeze(1))
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            train_predictions.extend(y_hat.cpu().detach().numpy())
            train_actuals.extend(y.cpu().numpy())
        train_loss /= len(train_loader)

        train_r2 = r2_score(train_actuals, train_predictions)
        train_mse = mean_squared_error(train_actuals, train_predictions)
        train_rmse = mean_squared_error(train_actuals, train_predictions, squared=False)

        val_loss = 0.0
        val_predictions = []
        val_actuals = []
        model.eval()
        with torch.no_grad():
            for X, y in val_loader:
                X = X.to(device)
                y = y.to(device)
                y_hat = model(X)
                loss = criterion(y_hat, y.unsqueeze(1))
                val_loss += loss.item()
                val_predictions.extend(y_hat.cpu().detach().numpy())
                val_actuals.extend(y.cpu().numpy())
        val_loss /= len(val_loader)

        val_r2 = r2_score(val_actuals, val_predictions)
        val_mse = mean_squared_error(val_actuals, val_predictions)
        val_rmse = mean_squared_error(val_actuals, val_predictions, squared=False)

        print(f'Epoch {epoch + 1}/{epochs} | Train loss: {train_loss:.4f} R2: {train_r2:.4f} MSE: {train_mse:.4f} RMSE: {train_rmse:.4f} | Val loss: {val_loss:.4f} R2: {val_r2:.4f} MSE: {val_mse:.4f} RMSE: {val_rmse:.4f}')
    return model

model = CNN().to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=3e-5)

epochs = 50
model = train(model, train_loader, val_loader, criterion, optimizer, epochs, device)

Using device: cuda
Epoch 1/50 | Train loss: 1.1595 R2: -0.0257 MSE: 1.1874 RMSE: 1.0897 | Val loss: 0.5123 R2: -3.9842 MSE: 0.5123 RMSE: 0.7157
Epoch 2/50 | Train loss: 1.0489 R2: 0.0615 MSE: 1.0865 RMSE: 1.0423 | Val loss: 0.4198 R2: -3.0845 MSE: 0.4198 RMSE: 0.6479
Epoch 3/50 | Train loss: 0.9648 R2: 0.1187 MSE: 1.0202 RMSE: 1.0101 | Val loss: 0.2963 R2: -1.8829 MSE: 0.2963 RMSE: 0.5443
Epoch 4/50 | Train loss: 0.8656 R2: 0.1873 MSE: 0.9408 RMSE: 0.9699 | Val loss: 0.1614 R2: -0.5699 MSE: 0.1614 RMSE: 0.4017
Epoch 5/50 | Train loss: 0.8070 R2: 0.2292 MSE: 0.8923 RMSE: 0.9446 | Val loss: 0.0898 R2: 0.1267 MSE: 0.0898 RMSE: 0.2996
Epoch 6/50 | Train loss: 0.7837 R2: 0.2503 MSE: 0.8679 RMSE: 0.9316 | Val loss: 0.0799 R2: 0.2224 MSE: 0.0799 RMSE: 0.2827
Epoch 7/50 | Train loss: 0.7506 R2: 0.2824 MSE: 0.8308 RMSE: 0.9115 | Val loss: 0.0884 R2: 0.1395 MSE: 0.0884 RMSE: 0.2974
Epoch 8/50 | Train loss: 0.7183 R2: 0.3134 MSE: 0.7949 RMSE: 0.8916 | Val loss: 0.0878 R2: 0.1456 MSE: 0.0878 RMSE:

In [None]:
def test(model, test_loader, device):
    test_predictions = []
    test_actuals = []
    model.eval()
    with torch.no_grad():
        for X, y in test_loader:
            X = X.to(device)
            y = y.to(device)
            y_hat = model(X)
            test_predictions.extend(y_hat.cpu().detach().numpy())
            test_actuals.extend(y.cpu().numpy())
    test_r2 = r2_score(test_actuals, test_predictions)
    test_mse = mean_squared_error(test_actuals, test_predictions)
    test_rmse = mean_squared_error(test_actuals, test_predictions, squared=False)
    print(f'Test R2: {test_r2:.4f} MSE: {test_mse:.4f} RMSE: {test_rmse:.4f}')

test(model, test_loader, device)

Test R2: -1.0636 MSE: 0.4916 RMSE: 0.7012
