<a href="https://colab.research.google.com/github/alexeyphilippov/NN_Grid_search/blob/master/earthquake_VGG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# from google.colab import drive
# drive.mount('/content/gdrive')

# !cp gdrive/'My Drive'/Python_for_colab/quake/wavelet_img/both.tar .
# !tar -xf both.tar

# !cp gdrive/'My Drive'/Python_for_colab/quake/wavelet_img/image_map.tar .
# !tar -xf image_map.tar

In [0]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from random import shuffle
import matplotlib.image as mpimg
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from torch.utils.data.sampler import SubsetRandomSampler


In [0]:
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7fb0cc3d14b0>

In [0]:
train_capacity = 8040
test_capacity = 1400
root_for_imgs = 'Users/aleksejfilippov/Desktop/Python_projects/NN_for_Tanurkov/wavelet_img/both'
root_for_csv = 'Users/aleksejfilippov/Desktop/Python_projects/NN_for_Tanurkov/wavelet_img/image_map.csv'
class EarthQuakeDataset(Dataset):

    def __init__(self, train:bool, both_path = None, transform=None):
        """
        Args:
            pos_dir (string): Directory with all the images with earthquakes.
            neg_dir (string): Directory with all the images without earthquakes.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.transform = transform
          
        self.map_ = pd.read_csv(root_for_csv, sep = '±')
                
        if train:
            self.img_paths = self.map_.path[:train_capacity]
            self.labels = self.map_.label[:train_capacity]
        elif train != True:
            self.img_paths = self.map_.path[train_capacity:train_capacity+test_capacity].tolist()
            self.labels = self.map_.label[train_capacity:train_capacity+test_capacity].tolist()
        else:
            raise Exception('Parameter train must be set bull')
    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        
        image = mpimg.imread(os.path.join(root_for_imgs, self.img_paths[idx]))[:-3, :-3]
        label = self.labels[idx]

        return image, label

In [0]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,),(0.5,))])
train_set = EarthQuakeDataset(train = True, transform = transform)
test_set = EarthQuakeDataset(train = False, transform = transform)




In [0]:
#DataLoader takes in a dataset and a sampler for loading (num_workers deals with system level memory) 
def get_train_loader(batch_size):
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, num_workers=2)
    return(train_loader)

In [0]:
#Test loader has constant batch sizes, so we can define it directly
test_loader = torch.utils.data.DataLoader(test_set, batch_size=20, num_workers=2)

In [0]:
import torch.optim as optim

def createLossAndOptimizer(net, learning_rate=0.001):
    
    #Loss function
    loss = torch.nn.BCELoss()
    
    #Optimizer
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    
    return(loss, optimizer)

In [0]:
from torch.autograd import Variable
import torch.nn.functional as F
class VGG(torch.nn.Module):
    def __init__(self):
        super(VGG, self).__init__()
        # 80x80 
        self.conv1_1 = torch.nn.Conv2d(1,64, kernel_size=3, stride=1, padding=1)
        self.conv1_2 = torch.nn.Conv2d(64,64, kernel_size=3, stride=1, padding=1)
        self.pool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        # 40x40
        self.conv2_1 = torch.nn.Conv2d(64,128, kernel_size=3, stride=1, padding=1)
        self.conv2_2 = torch.nn.Conv2d(128,128, kernel_size=3, stride=1, padding=1)
        self.pool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        # 20x20
        self.conv3_1 = torch.nn.Conv2d(128,256, kernel_size=3, stride=1, padding=1)
        self.conv3_2 = torch.nn.Conv2d(256,256, kernel_size=3, stride=1, padding=1)
        self.pool3 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        # 10x10
        self.conv4_1 = torch.nn.Conv2d(256,512, kernel_size=3, stride=1, padding=1)
        self.conv4_2 = torch.nn.Conv2d(512,512, kernel_size=3, stride=1, padding=1)
        self.pool4 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        # 5x5
        self.fc1 = torch.nn.Linear(5 * 5 * 512, 5 * 5 * 512)
        self.fc2 = torch.nn.Linear(5 * 5 * 512, 1)
        
        self.softmax = torch.nn.Softmax()
    def forward(self, x):

        x = self.conv1_1(x)
        x = self.conv1_2(x)
        x = self.pool1(x)

        x = self.conv2_1(x)
        x = self.conv2_2(x)
        x = self.pool2(x)

        x = self.conv3_1(x)
        x = self.conv3_2(x)
        x = self.pool3(x)

        x = self.conv4_1(x)
        x = self.conv4_2(x)
        x = self.pool4(x)

        x = x.view(-1, 5 * 5 * 512)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.softmax(x)

        return x

In [0]:
from torch.autograd import Variable
import torch.nn.functional as F

class SimpleCNN(torch.nn.Module):
        
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        # 80x80 
        self.conv1_1 = torch.nn.Conv2d(1,64, kernel_size=3, stride=1, padding=1)
        self.conv1_2 = torch.nn.Conv2d(64,64, kernel_size=3, stride=1, padding=1)
        self.pool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        # 40x40
        self.conv2_1 = torch.nn.Conv2d(64,128, kernel_size=3, stride=1, padding=1)
        self.conv2_2 = torch.nn.Conv2d(128,128, kernel_size=3, stride=1, padding=1)
        self.pool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        # 20x20
        self.fc1 = torch.nn.Linear(128 * 20 * 20, 128)        
        self.dropout1 = torch.nn.Dropout(p = 0.6)
        self.fc2 = torch.nn.Linear(128, 2)
        
        self.softmax = torch.nn.Softmax()
        
    def forward(self, x):
        
        x = self.conv1_1(x)
        x = self.conv1_2(x)
        x = self.pool1(x)

        x = self.conv2_1(x)
        x = self.conv2_2(x)
        x = self.pool2(x)
        
        x = x.view(-1, 128 * 20 * 20)
        
        x = F.relu(self.fc1(x))
        
        x = self.fc2(self.dropout1(x))
        
        x = self.softmax(x)
        return(x)

In [0]:
def ohe(inp:torch.tensor):
    x = []
    for v in inp:
        if v == 1:
            x.append([1,0])
        elif v==0:
            x.append([0,1])
    return torch.tensor(x)

In [0]:
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def trainNet(net, batch_size, n_epochs, learning_rate):
    
    #Print all of the hyperparameters of the training iteration:
    print("===== HYPERPARAMETERS =====")
    print("batch_size=", batch_size)
    print("epochs=", n_epochs)
    print("learning_rate=", learning_rate)
    print("=" * 30)
    
    #Get training data
    train_loader = get_train_loader(batch_size)
    n_batches = len(train_loader)
    
    #Create our loss and optimizer functions
    loss, optimizer = createLossAndOptimizer(net, learning_rate)
    
    #Time for printing
    training_start_time = time.time()
    
    #Loop for n_epochs
    test_losses = []
    train_losses = []
    for epoch in range(n_epochs):
        
        running_loss = 0.0
        print_every = n_batches // 10
        start_time = time.time()
        total_train_loss = 0
        
        for i, data in enumerate(train_loader):
            
            #Get inputs
            inputs, labels = data
            labels = ohe(labels)
            inputs = inputs.view((inputs.shape[0],1, inputs.shape[2], inputs.shape[2]))
            
            #Wrap them in a Variable object
            inputs, labels = Variable(inputs), Variable(labels.type(torch.FloatTensor))
            
            #Set the parameter gradients to zero
            optimizer.zero_grad()
            
            #Forward pass, backward pass, optimize
            inputs = inputs.to(device)
            outputs = net(inputs)
            labels = labels.to(device)
            outputs = outputs.to(device)
            loss_size = loss(outputs, labels)
            loss_size.backward()
            optimizer.step()
            
            #Print statistics
            running_loss += loss_size.data.item()
            total_train_loss += loss_size.data.item()
            
            #Print every 10th batch of an epoch
            if (i + 1) % (print_every + 1) == 0:
                print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
                        epoch+1, int(100 * (i+1) / n_batches), running_loss / print_every, time.time() - start_time))
                #Reset running loss and time
                running_loss = 0.0
                start_time = time.time()
            
        train_losses.append(total_train_loss/len(train_loader))
        #At the end of the epoch, do a pass on the validation set
        total_test_loss = 0
        for inputs, labels in test_loader:
            
            labels = ohe(labels)
            #Wrap tensors in Variables
            inputs = inputs.view((inputs.shape[0],1, inputs.shape[2], inputs.shape[2]))
            inputs, labels = Variable(inputs), Variable(labels.type(torch.FloatTensor))
            
            #Forward pass
            inputs = inputs.to(device)
            labels = labels.to(device)
            test_outputs = net(inputs)
            test_outputs = test_outputs.to(device)
            test_loss_size = loss(test_outputs, labels)
            total_test_loss += test_loss_size.data.item()
            
        print("Test loss = {:.2f}".format(total_test_loss / len(test_loader)))
        test_losses.append(total_test_loss / len(test_loader))
        
    print("Training finished, took {:.2f}s".format(time.time() - training_start_time))
    plt.plot(range(len(train_losses)), train_losses)
    plt.plot(range(len(train_losses)), test_losses)
    plt.show()
    return train_losses, test_losses, optimizer

In [13]:
cnn = SimpleCNN()
cnn.to(device)
tr, te ,optimis = trainNet(cnn, batch_size=80, n_epochs=150, learning_rate=0.0001)

===== HYPERPARAMETERS =====
batch_size= 80
epochs= 150
learning_rate= 0.0001




Epoch 1, 10% 	 train_loss: 0.72 took: 67.27s
Epoch 1, 21% 	 train_loss: 0.72 took: 65.49s
Epoch 1, 32% 	 train_loss: 0.69 took: 65.61s
Epoch 1, 43% 	 train_loss: 0.65 took: 65.44s
Epoch 1, 54% 	 train_loss: 0.61 took: 65.08s
Epoch 1, 65% 	 train_loss: 0.63 took: 65.28s
Epoch 1, 76% 	 train_loss: 0.60 took: 65.22s
Epoch 1, 87% 	 train_loss: 0.61 took: 65.55s
Epoch 1, 98% 	 train_loss: 0.57 took: 65.65s
Test loss = 0.51
Epoch 2, 10% 	 train_loss: 0.55 took: 66.40s
Epoch 2, 21% 	 train_loss: 0.56 took: 65.26s
Epoch 2, 32% 	 train_loss: 0.55 took: 65.53s
Epoch 2, 43% 	 train_loss: 0.51 took: 65.65s
Epoch 2, 54% 	 train_loss: 0.50 took: 65.38s
Epoch 2, 65% 	 train_loss: 0.52 took: 65.46s
Epoch 2, 76% 	 train_loss: 0.52 took: 65.44s
Epoch 2, 87% 	 train_loss: 0.52 took: 70.57s
Epoch 2, 98% 	 train_loss: 0.49 took: 70.75s
Test loss = 0.44
Epoch 3, 10% 	 train_loss: 0.47 took: 66.42s
Epoch 3, 21% 	 train_loss: 0.48 took: 65.27s
Epoch 3, 32% 	 train_loss: 0.47 took: 65.96s
Epoch 3, 43% 	 train_

Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, b

KeyboardInterrupt: ignored

In [0]:
# vgg = VGG()
# vgg.to(device)
# tr, te ,optimis = trainNet(vgg, batch_size=80, n_epochs=150, learning_rate=0.0001)

In [0]:
model_save_name = 'cnn.pt'
path = F"/content/gdrive/My Drive/Python_for_colab/quake/models/{model_save_name}" 
torch.save(cnn.state_dict(), path)