In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F


In [2]:
import numpy as np 
import pandas as pd

In [3]:
import sklearn as sk

In [5]:
from sklearn.model_selection import train_test_split

In [8]:
sk.model_selection.check_cv

<function sklearn.model_selection._split.check_cv(cv=5, y=None, *, classifier=False)>

# 准备训练数据

In [10]:
data_feature = pd.read_csv('D:/data/crop_ai/ILLINOIS_data_feature_expanded_CORN.csv')
data_label = pd.read_csv('D:/data/crop_ai/ILLINOIS_data_label_expanded_CORN.csv')

print('Data Feature Shape:',data_feature.shape) # Verify Shape
print('Data Label Shape:',data_label.shape) # Verify Shape

train_X = data_feature
train_y = data_label.T

train_X = np.nan_to_num(train_X)
train_y = np.nan_to_num(train_y)

print(train_X.shape)
print(train_y.shape)

x = np.asarray(train_X, dtype=np.float32)
y = np.asarray(train_y).flatten()

# Training/Validation split 67%, 33% split
data_feature, X_test, y_train, y_test = sk.model_selection.train_test_split(x, y, test_size=0.33, random_state=42)
print(X_test.shape)
print(data_label.shape)
print(data_feature.shape)

Data Feature Shape: (924, 87312)
Data Label Shape: (924, 1)
(924, 87312)
(1, 924)
(305, 87312)
(924, 1)
(619, 87312)


In [15]:
test1 = pd.read_csv('D:/data/crop_ai/ILLINOIS_data_feature_expanded_CORN.csv')
test1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,87302,87303,87304,87305,87306,87307,87308,87309,87310,87311
0,1893.571429,830.095238,-640.714286,-1203.380952,-1524.333333,-1938.095238,-2273.190476,-2129.52381,-2538.761905,-2763.904762,...,-32738.238095,-32738.095238,-32737.285714,-32736.619048,-32736.619048,-32736.619048,-32736.285714,-32735.714286,-32735.714286,-32735.714286
1,5037.714286,2290.571429,1840.238095,1567.809524,1436.047619,1306.714286,1322.857143,2277.904762,1996.952381,1959.380952,...,-32765.0,-32765.0,-32765.0,-32765.0,-32765.0,-32765.0,-32765.0,-32765.0,-32765.0,-32765.0
2,4286.238095,2491.809524,351.380952,-1746.095238,-2871.238095,-3344.0,-3540.0,-3955.142857,-3930.571429,-3837.619048,...,-32635.904762,-32586.142857,-32542.952381,-32506.238095,-32456.285714,-32409.428571,-32335.714286,-32175.238095,-31976.333333,-31766.0
3,-945.047619,-2167.952381,-3301.095238,-4338.190476,-5225.333333,-5914.142857,-6538.52381,-7752.380952,-8355.0,-8738.428571,...,-32758.52381,-32758.52381,-32758.52381,-32758.52381,-32758.52381,-32758.52381,-32758.52381,-32758.52381,-32758.52381,-32757.904762
4,3246.904762,2738.380952,1847.52381,1301.380952,1021.714286,849.714286,867.857143,663.238095,582.809524,721.095238,...,-29825.904762,-29741.619048,-29664.619048,-29592.47619,-29540.761905,-29538.761905,-29533.952381,-29531.761905,-29531.714286,-29531.571429


## reshape 训练数据

In [11]:
def dataReshape(dataIn,debug=True,runOnce=False):
  """Takes the 1D feature array and reshapes to 270x16"""
  print(dataIn.shape)

  dataOut = []
  for idx in range(len(dataIn)):
    dataTemp = dataIn[idx].reshape(5457, 16) #(216,15) 3240
    if debug: print(dataTemp.shape)
    dataOut.append(dataTemp)

    if runOnce: return -1

  return np.array(dataOut)

data_feature_rs = dataReshape(data_feature,debug=False,runOnce=False)
print(data_feature_rs.shape)

#Reshape test
X_test = dataReshape(X_test,debug=False,runOnce=False)
print(X_test.shape)

(619, 87312)
(619, 5457, 16)
(305, 87312)
(305, 5457, 16)


## 转化为 torch 数据集，设置训练超参数

In [12]:
from torch.utils.data import DataLoader, TensorDataset

# 网络参数
BATCH_SIZE = 32
NUM_EPOCHS = 200
SHUFFLE_BUFFER_SIZE = 64

# 数据标准化
scaler = sk.preprocessing.StandardScaler()
data_feature_norm = scaler.fit_transform(data_feature_rs)
X_test_norm = scaler.transform(X_test)

# 转换为 PyTorch 张量
data_feature_norm = torch.tensor(data_feature_norm, dtype=torch.float32)
y_train = torch.tensor(y_train.T, dtype=torch.float32)
X_test_norm = torch.tensor(X_test_norm, dtype=torch.float32)
y_test = torch.tensor(y_test.T, dtype=torch.float32)

# 创建 PyTorch 数据集
train_dataset = TensorDataset(data_feature_norm, y_train)
test_dataset = TensorDataset(X_test_norm, y_test)

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# 数据集大小
DATASET_SIZE = len(train_dataset)

# 打印数据加载器信息
print(f"Train dataset size: {len(train_dataset)}, Test dataset size: {len(test_dataset)}")

ValueError: Found array with dim 3. StandardScaler expected <= 2.

In [16]:
data_feature_rs.shape

(619, 5457, 16)

In [None]:
# 假设数据形状为 (n_samples, n_timesteps, n_features)
n_samples, n_timesteps, n_features = data_feature_rs.shape

# 初始化 StandardScaler
scaler = StandardScaler()

# 将数据 reshape 为 (n_samples * n_timesteps, n_features)
data_reshaped = data_feature_rs.reshape(-1, n_features)

# 标准化
data_scaled = scaler.fit_transform(data_reshaped)

# 将数据 reshape 回原始形状
data_feature_norm = data_scaled.reshape(n_samples, n_timesteps, n_features)

# simple

In [None]:
class SimpleResNetModel(nn.Module):
    def __init__(self):
        super(SimpleResNetModel, self).__init__()
        self.input_channels = 16

        # First Pathway
        self.cnn1 = nn.Conv1d(16, 80, kernel_size=2)
        self.dense1 = nn.Linear(80, 64)
        self.pool1 = nn.MaxPool1d(kernel_size=128)

        # Second Pathway
        self.cnn2 = nn.Conv1d(16, 64, kernel_size=2)
        self.dense2 = nn.Linear(64, 64)
        self.pool2 = nn.MaxPool1d(kernel_size=128)

        # Third Pathway
        self.cnn3 = nn.Conv1d(16, 32, kernel_size=2)
        self.dense3 = nn.Linear(32, 64)
        self.pool3 = nn.MaxPool1d(kernel_size=128)

        # Fourth Pathway
        self.cnn4 = nn.Conv1d(16, 16, kernel_size=2)
        self.dense4 = nn.Linear(16, 64)
        self.pool4 = nn.MaxPool1d(kernel_size=128)

        # Fully connected layers
        self.fc1 = nn.Linear(64 * 4, 16)
        self.fc2 = nn.Linear(16, 1)

    def forward(self, x):
        # Reshape input to match PyTorch's (batch_size, channels, sequence_length)
        x = x.permute(0, 2, 1)

        # First pathway
        x1 = F.relu(self.cnn1(x))
        x1 = x1.permute(0, 2, 1)  # Switch dimensions for Linear layer
        x1 = F.relu(self.dense1(x1))
        x1 = x1.permute(0, 2, 1)  # Switch back for pooling
        x1 = self.pool1(x1)

        # Second pathway
        x2 = F.relu(self.cnn2(x))
        x2 = x2.permute(0, 2, 1)
        x2 = F.relu(self.dense2(x2))
        x2 = x2.permute(0, 2, 1)
        x2 = self.pool2(x2)

        # Third pathway
        x3 = F.relu(self.cnn3(x))
        x3 = x3.permute(0, 2, 1)
        x3 = F.relu(self.dense3(x3))
        x3 = x3.permute(0, 2, 1)
        x3 = self.pool3(x3)

        # Fourth pathway
        x4 = F.relu(self.cnn4(x))
        x4 = x4.permute(0, 2, 1)
        x4 = F.relu(self.dense4(x4))
        x4 = x4.permute(0, 2, 1)
        x4 = self.pool4(x4)

        # Concatenate along the channel dimension
        x = torch.cat((x1, x2, x3, x4), dim=1)

        # Flatten and pass through fully connected layers
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate the model
model = SimpleResNetModel()
print(model)

# resnet

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, filters, kernel_size=2):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv1d(filters, filters, kernel_size=kernel_size, padding='same')
        self.bn1 = nn.BatchNorm1d(filters)
        self.conv2 = nn.Conv1d(filters, filters, kernel_size=kernel_size, padding='same')
        self.bn2 = nn.BatchNorm1d(filters)
        self.relu = nn.ReLU()

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        return self.relu(out)

class ResNetModel(nn.Module):
    def __init__(self):
        super(ResNetModel, self).__init__()
        self.input_channels = 16

        # First ResNet Pathway
        self.cnn1 = nn.Conv1d(16, 80, kernel_size=2, padding='same')
        self.res1 = ResidualBlock(80)
        self.pool1 = nn.MaxPool1d(kernel_size=128)

        # Second ResNet Pathway
        self.cnn2 = nn.Conv1d(16, 64, kernel_size=2, padding='same')
        self.res2 = ResidualBlock(64)
        self.pool2 = nn.MaxPool1d(kernel_size=128)

        # Third ResNet Pathway
        self.cnn3 = nn.Conv1d(16, 32, kernel_size=2, padding='same')
        self.res3 = ResidualBlock(32)
        self.pool3 = nn.MaxPool1d(kernel_size=128)

        # Fourth ResNet Pathway
        self.cnn4 = nn.Conv1d(16, 16, kernel_size=2, padding='same')
        self.res4 = ResidualBlock(16)
        self.pool4 = nn.MaxPool1d(kernel_size=128)

        # Fully connected layers
        self.fc1 = nn.Linear(80 + 64 + 32 + 16, 16)
        self.fc2 = nn.Linear(16, 1)

    def forward(self, x):
        # Reshape input to match PyTorch's (batch_size, channels, sequence_length)
        x = x.permute(0, 2, 1)

        # First pathway
        x1 = F.relu(self.cnn1(x))
        x1 = self.res1(x1)
        x1 = self.pool1(x1)

        # Second pathway
        x2 = F.relu(self.cnn2(x))
        x2 = self.res2(x2)
        x2 = self.pool2(x2)

        # Third pathway
        x3 = F.relu(self.cnn3(x))
        x3 = self.res3(x3)
        x3 = self.pool3(x3)

        # Fourth pathway
        x4 = F.relu(self.cnn4(x))
        x4 = self.res4(x4)
        x4 = self.pool4(x4)

        # Concatenate along the channel dimension
        x = torch.cat((x1, x2, x3, x4), dim=1)

        # Flatten and pass through fully connected layers
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate the model
model = ResNetModel()
print(model)