# U-Net Training

In [1]:
# Mount to Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Move to your current working directory
%cd drive/MyDrive/image_processing

Mounted at /content/drive
/content/drive/MyDrive/image_processing


In [2]:
import torch
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')
print('Device', device)

Device cuda


In [3]:
import os
import pandas as pd
import numpy as np
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as T
import torch.optim as optim
import matplotlib.pyplot as plt
from PIL import Image
from torch.utils.data import Dataset, DataLoader

## Load Images

In [4]:
class CustomDataset(Dataset):
  def __init__(self, image_dir, transform):
    super().__init__()
    self.image_dir = image_dir
    self.transform = transform
    self.images = os.listdir(self.image_dir)

  def __len__(self):
    return len(self.images)

  def __getitem__(self, i):
    image_path = os.path.join(self.image_dir, self.images[i])
    image = Image.open(image_path).convert('L')
    image = self.transform(image)
    label = int(self.images[i][1])
    return (image, label)

In [5]:
training_data = CustomDataset('img_span12/training', T.Compose([T.Resize((224, 224)), T.ToTensor()]))
testing_data = CustomDataset('img_span12/testing', T.Compose([T.Resize((224, 224)), T.ToTensor()]))

In [6]:
print('training images:', len(training_data))
print('training images:', len(testing_data))

training images: 7746
training images: 2350


In [7]:
print(training_data[0])
print(testing_data[0])

(tensor([[[0.0000, 0.7059, 0.4902,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.7059, 0.4902,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.7059, 0.4902,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.7059, 0.4902,  ..., 0.4902, 0.7059, 0.0000],
         [0.0000, 0.7059, 0.4902,  ..., 0.4902, 0.7059, 0.0000],
         [0.0000, 0.7059, 0.4902,  ..., 0.4902, 0.7059, 0.0000]]]), 0)
(tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.7059, 0.4902,  ..., 0.2118, 0.3020, 0.0000],
         [0.0000, 0.7059, 0.4902,  ..., 0.4196, 0.6039, 0.0000],
         [0.0000, 0.7059, 0.4902,  ..., 0.4902, 0.7059, 0.0000]]]), 0)


## Build U-Net Model

In [8]:
class UNetModel(nn.Module):
  def __init__(self):
    super().__init__()

    def conv_block(in_channels, out_channels):
      return nn.Sequential(
          nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
          nn.BatchNorm2d(out_channels),
          nn.ReLU(inplace=True),
          nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
          nn.BatchNorm2d(out_channels),
          nn.ReLU(inplace=True)
      )
    self.encoder1 = conv_block(1, 64)
    self.encoder2 = conv_block(64, 128)
    self.encoder3 = conv_block(128, 256)
    self.encoder4 = conv_block(256, 512)

    self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
    self.middle = conv_block(512, 1024)

    self.upconv4 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
    self.decoder4 = conv_block(1024, 512)
    self.upconv3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
    self.decoder3 = conv_block(512, 256)
    self.upconv2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
    self.decoder2 = conv_block(256, 128)
    self.upconv1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
    self.decoder1 = conv_block(128, 64)

    self.final_conv = nn.Conv2d(64, 10, kernel_size=1)
    self.dropout = nn.Dropout(0.5)
    self.fc1 = nn.Linear(200704, 10)
    self.fc2 = nn.Linear(200704, 2)

  def forward(self, x):
    enc1 = self.encoder1(x)
    enc2 = self.encoder2(self.pool(enc1))
    enc3 = self.encoder3(self.pool(enc2))
    enc4 = self.encoder4(self.pool(enc3))

    middle = self.middle(self.pool(enc4))

    dec4 = self.upconv4(middle)
    dec4 = torch.cat((enc4, dec4), dim=1)
    dec4 = self.decoder4(dec4)
    dec3 = self.upconv3(dec4)
    dec3 = torch.cat((enc3, dec3), dim=1)
    dec3 = self.decoder3(dec3)
    dec2 = self.upconv2(dec3)
    dec2 = torch.cat((enc2, dec2), dim=1)
    dec2 = self.decoder2(dec2)
    dec1 = self.upconv1(dec2)
    dec1 = torch.cat((enc1, dec1), dim=1)
    dec1 = self.decoder1(dec1)

    final_output = self.final_conv(dec1)

    # Flatten the middle output for the fully connected layers
    middle_flat = torch.flatten(middle, 1)
    middle_flat = self.dropout(middle_flat)
    out1 = self.fc1(middle_flat)
    out2 = self.fc2(middle_flat)
    return out1, out2

In [9]:
model = UNetModel()
print(model)

model = model.cuda()

UNetModel(
  (encoder1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
  )
  (encoder2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
  )
  (encoder3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, aff

## Training

In [10]:
# Constants for model training process
BATCH_SIZE = 128
NUM_EPOCHS = 10
PRINT_EVERY = 10

In [11]:
train_loader = DataLoader(training_data, batch_size=BATCH_SIZE, shuffle=True)

In [12]:
next(iter(train_loader))

[tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           ...,
           [0.0000, 0.0000, 0.0000,  ..., 0.4902, 0.7059, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.4902, 0.7059, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.4902, 0.7059, 0.0000]]],
 
 
         [[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           ...,
           [0.0000, 0.7059, 0.4902,  ..., 0.4902, 0.7059, 0.0000],
           [0.0000, 0.7059, 0.4902,  ..., 0.4902, 0.7059, 0.0000],
           [0.0000, 0.7059, 0.4902,  ..., 0.4902, 0.7059, 0.0000]]],
 
 
         [[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000

In [13]:
# Define loss function and optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [14]:
for epoch in range(NUM_EPOCHS):
  training_acc = 0
  total = 0
  for counter, (x, y) in enumerate(train_loader):
    model.train()
    x, y = x.to(device), y.to(device)
    _, scores = model(x)
    loss = loss_function(scores, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    training_acc += scores.max(1)[1].eq(y).sum().item()
    total += y.size(0)
    if counter % PRINT_EVERY == 0:
      print(f'Epoch[{epoch+1}], Batch[{counter}], Loss: {loss.item()}, Training acc: {training_acc/total}')
  print(f'Epoch[{epoch+1}] completed. Training acc: {training_acc/total}')

Epoch[1], Batch[0], Loss: 0.8423539400100708, Training acc: 0.453125
Epoch[1], Batch[10], Loss: 5.606042861938477, Training acc: 0.5113636363636364
Epoch[1], Batch[20], Loss: 5.078327178955078, Training acc: 0.5145089285714286
Epoch[1], Batch[30], Loss: 3.4083056449890137, Training acc: 0.5158770161290323
Epoch[1], Batch[40], Loss: 2.2339415550231934, Training acc: 0.510670731707317
Epoch[1], Batch[50], Loss: 2.825643539428711, Training acc: 0.5065870098039216
Epoch[1], Batch[60], Loss: 2.997227430343628, Training acc: 0.513426284533953
Epoch[1] completed. Training acc: 0.513426284533953
Epoch[2], Batch[0], Loss: 2.2904162406921387, Training acc: 0.46875
Epoch[2], Batch[10], Loss: 1.9679107666015625, Training acc: 0.5007102272727273
Epoch[2], Batch[20], Loss: 2.4872095584869385, Training acc: 0.4929315476190476
Epoch[2], Batch[30], Loss: 1.7026805877685547, Training acc: 0.4972278225806452
Epoch[2], Batch[40], Loss: 0.8819288015365601, Training acc: 0.49028201219512196
Epoch[2], Batch[

## Output Result

In [None]:
class OutputDataset(Dataset):
  def __init__(self, image_dir, transform):
    super().__init__()
    self.image_dir = image_dir
    self.transform = transform
    self.images = os.listdir(self.image_dir)

  def __len__(self):
    return len(self.images)

  def __getitem__(self, i):
    image_path = os.path.join(self.image_dir, self.images[i])
    image = Image.open(image_path).convert('L')
    image = self.transform(image)
    label = int(self.images[i][1])
    stock = self.images[i].split('_')[1]
    date = self.images[i].split('_')[2].replace('.png', '')
    return (image, label, stock, date)

In [None]:
training_data = OutputDataset('img_span12/training', T.Compose([T.Resize((224, 224)), T.ToTensor()]))
testing_data = OutputDataset('img_span12/testing', T.Compose([T.Resize((224, 224)), T.ToTensor()]))

In [17]:
train_loader = DataLoader(training_data, batch_size=32, shuffle=False)
test_loader = DataLoader(testing_data, batch_size=32, shuffle=False)

In [18]:
def extract_vectors(model, dataloader):
  model.eval()
  all_records = []
  total_correct = 0
  total_samples = 0

  with torch.no_grad():
    for x, y, stocks, dates in dataloader:
      x, y = x.to(device), y.to(device)
      outputs, scores = model(x)
      outputs = outputs.cpu().numpy()

      predictions = scores.max(1)[1]
      total_correct += predictions.eq(y).sum().item()
      total_samples += y.size(0)

      for i in range(len(dates)):
        record = {
            'stock': stocks[i],
            'date': dates[i],
            'vector': ','.join(map(str, outputs[i])),
            'label': y[i].item()
        }
        all_records.append(record)
    print('Acc:', total_correct/total_samples)
  return all_records

In [19]:
# training datasets
records = extract_vectors(model, train_loader)

df = pd.DataFrame(records)
df.sort_values(by=['stock', 'date'])
print(df)

# Save the DataFrame to a CSV file
df.to_csv('output/UNet_output_vectors_training.csv', index=True)

Acc: 0.530596436870643
     stock        date                                             vector  \
0     2912  2017-02-10  0.0040679267,-0.00562575,0.00015535986,0.00173...   
1     2912  2017-03-02  0.0034180658,-0.004421537,0.0013091173,0.00111...   
2     2912  2017-03-20  0.0023538657,-0.0060548135,0.0021280174,0.0016...   
3     2912  2017-04-07  0.001751636,-0.0055192774,0.00029996253,5.1394...   
4     2912  2017-04-25  0.0010175621,-0.0065764887,-0.00026208215,-0.0...   
...    ...         ...                                                ...   
7741  1303  2018-05-24  0.0010178457,-0.00555434,0.0017703191,0.000347...   
7742  1303  2018-06-11  0.0022953446,-0.0051463256,0.002837468,0.00059...   
7743  1303  2018-06-28  0.0013173671,-0.004885475,0.0014933206,0.00092...   
7744  1303  2018-07-16  -0.000334862,-0.0051391097,0.0009242861,0.0004...   
7745  1303  2018-08-01  0.0017744921,-0.004698321,0.0012983327,0.00067...   

      label  
0         0  
1         1  
2         

In [20]:
# testing datasets
records = extract_vectors(model, test_loader)

df = pd.DataFrame(records)
df.sort_values(by=['stock', 'date'])
print(df)

# Save the DataFrame to a CSV file
df.to_csv('output/UNet_output_vectors_testing.csv', index=True)

Acc: 0.5093617021276595
     stock        date                                             vector  \
0     2615  2022-01-10  0.0020350954,-0.0053087,0.0013109308,-5.084907...   
1     2615  2022-01-26  0.0013939823,-0.0053972863,0.00015613076,0.001...   
2     2615  2022-02-22  0.001311478,-0.004333101,0.0006387511,0.000852...   
3     2615  2022-03-11  0.0025455626,-0.004579952,0.0024768005,-0.0003...   
4     2615  2022-03-29  0.0024324942,-0.0053463234,0.0014761207,0.0001...   
...    ...         ...                                                ...   
2345  1402  2024-04-09  0.0012798586,-0.0053905686,-0.00045118504,0.00...   
2346  1402  2024-04-25  -0.003468797,-0.006193837,0.009829329,0.006602...   
2347  1402  2024-05-14  0.0014957535,-0.0037313546,0.0012079848,0.0009...   
2348  1402  2024-05-30  0.0011740539,-0.004813588,0.003112194,0.000392...   
2349  1402  2024-06-18  0.001144834,-0.0050929664,0.0017370739,-2.3618...   

      label  
0         0  
1         1  
2        