In [None]:
from sklearn.preprocessing import LabelEncoder
#Importing libraries and loading dataset
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
from tqdm import tqdm
import torch
import torch.nn as nn

In [None]:
df = pd.read_csv("/content/df_train_watering.csv")

In [None]:
dataset_columns_df = df.columns.tolist()
dataset_columns_df

['Hour',
 'Minute',
 'Light',
 'Temp',
 'Humid',
 'Soil',
 'Light2',
 'Temp2',
 'Humid2',
 'Soil2',
 'Light3',
 'Temp3',
 'Humid3',
 'Soil3',
 'Light4',
 'Temp4',
 'Humid4',
 'Soil4']

In [None]:


# Encode non categorical values
values_org = df.values

hours = values_org[:, 0]
minutes = values_org[:, 1]
combined_time = hours + minutes / 60

In [None]:
values = np.column_stack((combined_time, values_org[:, 2:]))
values, values.shape

In [None]:
#Normalising data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_dataset = scaler.fit_transform(values)

In [None]:
df_test = pd.read_csv("/content/df_test_watering.csv")

values_org_test = df_test.values

hours = values_org_test[:, 0]
minutes = values_org_test[:, 1]
combined_time = hours + minutes / 60

data_of_test = values_org_test[:, 2:]
data_of_test = data_of_test.reshape((-1,1,4))
data_of_test = np.repeat(data_of_test, 4, axis=1)
data_of_test = data_of_test.reshape((-1,16))

values_test = np.column_stack((combined_time, data_of_test))

In [None]:
test_scaled_dataset = scaler.transform(values_test)

In [None]:
import numpy as np
from scipy.stats import spearmanr, kendalltau

class Graph:
    def __init__(self, num_vertices):
        self.num_vertices = num_vertices
        self.adj_matrix = [[0 for _ in range(num_vertices)] for _ in range(num_vertices)]

    def add_edge(self, src, dest, weight=1):
        if 0 <= src < self.num_vertices and 0 <= dest < self.num_vertices:
            self.adj_matrix[src][dest] = weight
            self.adj_matrix[dest][src] = weight  # Assuming the graph is undirected

    def remove_edge(self, src, dest):
        if 0 <= src < self.num_vertices and 0 <= dest < self.num_vertices:
            self.adj_matrix[src][dest] = 0
            self.adj_matrix[dest][src] = 0  # Assuming the graph is undirected

    def get_weight(self, src, dest):
        if 0 <= src < self.num_vertices and 0 <= dest < self.num_vertices:
            return self.adj_matrix[src][dest]
        return None

    def get_adj_matrix(self):
        return np.array(self.adj_matrix, dtype=np.float32)


def corr_sensor(sen1, sen2):
    correlation_12, _ = kendalltau(sen1, sen2)
    return correlation_12

def corr_station(station1, station2):
    ligh_corr = corr_sensor(station1[:,0], station2[:,0])
    temp_corr = corr_sensor(station1[:,1], station2[:,1])

    humd_corr = corr_sensor(station1[:,2], station2[:,2])
    soil_corr = corr_sensor(station1[:,3], station2[:,3])

    corr_result = [ligh_corr, temp_corr, humd_corr, soil_corr]

    return sum(corr_result)/len(corr_result)

from itertools import combinations
# List chứa 4 phần tử
elements = [1, 2, 3, 4]

# Tìm tổ hợp chập 2 của các phần tử
combinations_2 = list(combinations(elements, 2))

# In kết quả
print(combinations_2)


data_sensor1 = scaled_dataset[:,-4:]
data_sensor2 = scaled_dataset[:,-8:-4]
data_sensor3 = scaled_dataset[:,-12:-8]
data_sensor4 = scaled_dataset[:,-16:-12]

data_sensors = [data_sensor1, data_sensor2, data_sensor3, data_sensor4]

set_of_sensors = [(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]

num_vertices = 4
graph = Graph(num_vertices)

for set_of_sensor in set_of_sensors:
    index1 ,index2 = set_of_sensor[0]-1, set_of_sensor[1]-1

    corr_station_result = corr_station(data_sensors[index1], data_sensors[index2])

    graph.add_edge(index1, index2, corr_station_result)

adj_matrix_np = graph.get_adj_matrix()

In [None]:
def to_supervised(train):
  window_size = 6
  X = []
  Y = []
  for i in range(window_size, len(train)):
    X.append(train[i-window_size:i,:])
    Y.append(train[i,1:])

  return X,Y

In [None]:
X_train, Y_train = to_supervised(scaled_dataset)
X_test, Y_test = to_supervised(test_scaled_dataset)
X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_test = np.array(X_test)
Y_test = np.array(Y_test)
print('X_train' ,X_train.shape)
print('Y_train' ,Y_train.shape)
print('X_test' ,X_test.shape)
print('Y_test' ,Y_test.shape)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim
import math

def scaled_dot_product(q, k, v, mask=None):
    d_k = q.size()[-1]
    attn_logits = torch.matmul(q, k.transpose(-2, -1))
    attn_logits = attn_logits / math.sqrt(d_k)
    if mask is not None:
        attn_logits = attn_logits.masked_fill(mask == 0, -9e15)
    attention = F.softmax(attn_logits, dim=-1)
    values = torch.matmul(attention, v)
    return values, attention

class MultiheadAttention(nn.Module):

    def __init__(self, input_dim, embed_dim, num_heads):
        super().__init__()
        assert embed_dim % num_heads == 0, "Embedding dimension must be 0 modulo number of heads."
        self.input_dim = input_dim
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads

        # Stack all weight matrices 1...h together for efficiency
        # Note that in many implementations you see "bias=False" which is optional
        self.qkv_proj = nn.Linear(input_dim, 3*embed_dim)
        self.o_proj = nn.Linear(embed_dim, embed_dim)

        self._reset_parameters()

    def _reset_parameters(self):
        # Original Transformer initialization, see PyTorch documentation
        nn.init.xavier_uniform_(self.qkv_proj.weight)
        self.qkv_proj.bias.data.fill_(0)
        nn.init.xavier_uniform_(self.o_proj.weight)
        self.o_proj.bias.data.fill_(0)

    def forward(self, x, mask=None, return_attention=False, dot_att=False):
        batch_size, seq_length, _ = x.size()
        if mask is not None:
            mask = expand_mask(mask)
        qkv = self.qkv_proj(x)
        # print(x.shape)
        # Separate Q, K, V from linear output
        qkv = qkv.reshape(batch_size, seq_length, self.num_heads, 3*self.head_dim)
        qkv = qkv.permute(0, 2, 1, 3) # [Batch, Head, SeqLen, Dims]
        q, k, v = qkv.chunk(3, dim=-1)
        # print(v.shape)
        # Determine value outputs
        if dot_att:
            # x_other =
            x_other = x.reshape(batch_size, seq_length, self.num_heads, -1)
            x_other = x_other.permute(0, 2, 1, 3)
            values, attention = scaled_dot_product(q, k, x_other, mask=mask)
            values = values.permute(0, 2, 1, 3) # [Batch, SeqLen, Head, Dims]
            values = values.reshape(batch_size, seq_length, -1)
            if return_attention:
                return values, attention
            else:
                return values
        else:
            values, attention = scaled_dot_product(q, k, v, mask=mask)
            values = values.permute(0, 2, 1, 3) # [Batch, SeqLen, Head, Dims]
            values = values.reshape(batch_size, seq_length, -1)
            o = self.o_proj(values)

            if return_attention:
                return o, attention
            else:
                return o

class GraphConvolutionLayer(nn.Module):
    def __init__(self, in_features, out_features):
        super(GraphConvolutionLayer, self).__init__()
        self.projection = nn.Linear(in_features, out_features)

    def forward(self, x, adj_matrix):
        # Num neighbours = number of incoming edges
        num_neighbours = adj_matrix.sum(dim=-1, keepdims=True)
        node_feats = self.projection(x)
        node_feats = torch.bmm(adj_matrix, node_feats)
        node_feats = node_feats / num_neighbours
        return node_feats

class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.gc1 = GraphConvolutionLayer((input_dim-1)//4+1, hidden_dim)
        self.gc2 = GraphConvolutionLayer(hidden_dim, output_dim)

        self.output_dim = output_dim

    def forward(self, x_seq, adj_matrix):

        x_seq = self._preprocessing(x_seq)

        outputs = []

        for t in range(x_seq.shape[1]):
            x = x_seq[:, t]
            x = F.relu(self.gc1(x, adj_matrix))
            x = F.dropout(x, training=self.training)
            x = F.relu(self.gc2(x, adj_matrix))
            outputs.append(x)

        output_seq = torch.stack(outputs, dim=1)
        return output_seq.view(*output_seq.shape[:2],-1)

    def _preprocessing(self, x):
        b, t, c = x.shape
        x_time = x[:, :, :1].view(b, t, 1, 1)
        x_feat = x[:, :, 1:].view(b, t, 4, 4)

        # Repeat x_time along axis 2 to match the shape of x_feat
        x_time_repeated = x_time.repeat(1, 1, x_feat.shape[2], 1)

        # Concatenate x_time and x_feat along axis 3
        x_concatenated = torch.cat((x_time_repeated, x_feat), dim=3)
        # print(x_concatenated.shape)
        return x_concatenated



class SqueezeExcitationBlock(nn.Module):
    def __init__(self, in_channels, reduction_ratio=16):
        super(SqueezeExcitationBlock, self).__init__()

        self.in_channels = in_channels
        self.reduction_ratio = reduction_ratio

        # Squeeze operation to obtain global channel-wise information
        self.squeeze = nn.AdaptiveAvgPool1d(1)

        # Excitation operation to learn channel-wise importance
        self.excitation = nn.Sequential(
            nn.Linear(in_channels, in_channels // reduction_ratio),
            nn.ReLU(inplace=True),
            nn.Linear(in_channels // reduction_ratio, in_channels),
            nn.Sigmoid()
        )

    def forward(self, x):
        batch_size, channels, _= x.size()

        # Squeeze: Global information across channels
        squeezed = self.squeeze(x)
        squeezed = squeezed.view(batch_size, channels)

        # Excitation: Learn channel-wise importance
        excitations = self.excitation(squeezed)
        excitations = excitations.view(batch_size, channels, 1)

        # Scale the input feature map
        scaled_x = x * excitations

        return scaled_x


class CNNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, use_batchnorm=True, use_se_block=True, use_residual=True, dropout_prob=0.2):
        super(CNNBlock, self).__init__()

        # First Convolutional layer
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)

        # Second Convolutional layer
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)

        # Batch normalization (optional)
        self.use_batchnorm = use_batchnorm
        if use_batchnorm:
            self.batchnorm1 = nn.BatchNorm1d(out_channels)
            self.batchnorm2 = nn.BatchNorm1d(out_channels)

        # Activation function
        self.activation = nn.ReLU(inplace=True)  # ReLU is commonly used, but other activations can be used too

        self.use_se_block = use_se_block
        if use_se_block:
            self.se_block = SqueezeExcitationBlock(out_channels)

        self.dropout_prob = dropout_prob
        self.dropout = nn.Dropout2d(p=dropout_prob)

        self.use_residual = use_residual

    def forward(self, x):
        x = self.conv1(x)

        # Apply batch normalization if enabled
        if self.use_batchnorm:
            x = self.batchnorm1(x)

        x = self.activation(x)



        x = self.conv2(x)

        # Apply batch normalization if enabled
        if self.use_batchnorm:
            x = self.batchnorm2(x)

        x = self.activation(x)

        if self.dropout_prob > 0:
            x = self.dropout(x)

        identity = x

        if self.use_se_block:
            x = self.se_block(x)

        if self.use_residual:
            x = x + identity

        return x

class GCNCNNSELSTMATTModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, drop_out):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gcn = GCN(input_size, hidden_size, hidden_size)
        self.cnn = CNNBlock(hidden_size*4, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, dropout=drop_out, batch_first=True)


        num_heads = 8
        self.multihead_attn = MultiheadAttention(hidden_size, hidden_size, num_heads)



        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_size*6, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 16)

    def forward(self, x, adjs):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        x = self.gcn(x, adjs)

        x = x.permute(0,2,1)
        x = self.cnn(x)
        x = x.permute(0,2,1)

        out, _ = self.lstm(x, (h0, c0))
        out = self.multihead_attn(out)

        out = self.fc(torch.flatten(out, 1))

        out = self.dropout(out)
        out = self.fc2(torch.relu(out))
        return out


import torch
from torch.utils.data import Dataset, DataLoader

# Custom Dataset class
class MyDataset(Dataset):
    def __init__(self, X, y, adj_matrix):
        self.X = X
        self.y = y
        self.adj_matrix = adj_matrix

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx], torch.from_numpy(self.adj_matrix)




In [None]:

# Creating an instance of the LSTMModel
input_size = X_train.shape[2]
hidden_size = 64
num_layers = 3

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNNSELSTMATTModel(input_size, hidden_size, num_layers, drop_out=0.2)

# Defining the loss function and optimizer
mse_loss_fun = nn.MSELoss()
mae_loss = nn.L1Loss()
def criterion(x,y):
    # return mse_loss_fun
    loss_value = mse_loss_fun(x,y)
    return loss_value

optimizer = torch.optim.Adam(model.parameters())

# Converting the data to PyTorch tensors
X_train_tensor = torch.from_numpy(X_train).float()
y_train_tensor = torch.from_numpy(Y_train).float()

X_test_tensor = torch.from_numpy(X_test).float().to(device)
y_test_tensor = torch.from_numpy(Y_test).float().to(device)
adj_matrix_test = torch.from_numpy(adj_matrix_np).view(1,*adj_matrix_np.shape)
adj_matrix_test = adj_matrix_test.repeat(X_test_tensor.shape[0], 1, 1).to(device)

# Creating an instance of the custom Dataset
dataset = MyDataset(X_train_tensor, y_train_tensor, adj_matrix_np)

# Creating the DataLoader
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

model = model.to(device)

# Training the model
num_epochs = 50
best_valid_loss = float('inf')  # Initialize with a large value

for epoch in tqdm(range(num_epochs)):

    running_loss = 0.0
    model.train()

    for inputs, labels, adjs in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        adjs = adjs.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs, adjs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Validation
    model.eval()
    # valid_loss = 0.0

    with torch.no_grad():

            outputs = model(X_test_tensor, adj_matrix_test)
            valid_loss = criterion(outputs, y_test_tensor)
    #         valid_loss += loss.item()

    # valid_loss /= len(valid_dataloader)


    epoch_loss = running_loss / len(dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.6f}, valid_loss: {valid_loss:.6f}")

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        # Save the model checkpoint
        torch.save(model.state_dict(), 'best_model_checkpoint.pt')
        print("Best model checkpoint saved!")


print("Training completed!")

# if best_model_state_dict is not None:
model.load_state_dict(torch.load("best_model_checkpoint.pt"))
print("Best model state dictionary loaded!")