In [1]:
# import necessary libraries
import os
import math
import pyautogui
import urllib.parse
import time
import pyperclip
import quopri
from bs4 import BeautifulSoup
from email import policy
from email.parser import BytesParser
import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import math
import rembg
import pickle
import random
import warnings
from collections import OrderedDict
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as dset
import torchvision.transforms as T
from torchvision import models
from torch.utils.data import DataLoader, sampler, random_split
warnings.filterwarnings("ignore")

# 1. Data Collection

#### 1. Download the webpages

In [2]:
# simulate the operation
def auto_download(url,file_name):
    pyautogui.hotkey('ctrl', '2') # switch Google Chrome tabs
    time.sleep(0.5)
    
    pyautogui.click(207,55) # click the browser's address bar.
    time.sleep(0.5)
    
    pyautogui.hotkey('ctrl', 'a') # select all the content in address bar.
    pyperclip.copy(url) # copy the url from pyperclip
    time.sleep(0.5)
    
    pyautogui.hotkey('ctrl', 'v') # paste the url
    time.sleep(0.5)
    
    pyautogui.hotkey('enter') # access the url
    time.sleep(3)
    
    pyautogui.click(1910,1020)
    pyautogui.mouseDown() # scroll down the page to look through all guitars
    time.sleep(30)
    pyautogui.mouseUp()# release the left mouse button
    
    pyautogui.hotkey('ctrl', 's') # save the page to local path
    time.sleep(2)
    
    pyperclip.copy(file_name) # copy the file name
    pyautogui.hotkey('ctrl', 'v') # paste the file name
    time.sleep(2)
    
    pyautogui.hotkey('enter') # save the page with given name

In [3]:
# url for guitarcenter
base_url = "https://www.guitarcenter.com/6-String-Acoustic-Guitars.gc?N=1076+18154&Ns=bM&pageName=subcategory-page&recsPerPage=96&profileCountryCode=US&profileCurrencyCode=USD&SPA=true&Nao="

# create this path in advance for downloading the webpages
path = 'guitar_pages'

if not os.path.exists(path):
    # If it does not exist, create it
    os.makedirs(path)

# download pages
def download_pages_guitarcenter(base_url, pages,step):
    for i in range(0, pages * step, step):
        url = base_url + str(i)
        auto_download(url, "guitar_" + str(int(i/step)))
        time.sleep(5)

# actual data
# download_pages_guitarcenter(44,96)

# use sample data for demonstration
download_pages_guitarcenter(base_url, 1, 96)

#### 2. Parse the webpages to read the url of guitar images

In [4]:
# parse the guitarcenter mhtml files
def parse_mhtml_guitarcenter(file_path):
    # Open the MHTML file in binary mode and parse it
    with open(file_path, 'rb') as file:
        msg = BytesParser(policy=policy.default).parse(file)

    # Decode the HTML part correctly
    html_part = None
    for part in msg.walk():
        content_type = part.get_content_type()
        if content_type == 'text/html':
            html_part = part.get_payload(decode=True)
            break

    charset = 'utf-8'
    decoded_html = html_part.decode(charset)

    # Use BeautifulSoup to parse the decoded HTML
    soup = BeautifulSoup(decoded_html, 'html.parser')
    
    # Get the div with the class of jsx-1611966181 flex flex-auto flex-wrap
    div = soup.find(class_="jsx-1611966181 flex flex-auto flex-wrap")

    # get the item list from that class
    item_list = div.find_all("section",class_="plp-product-grid py-[19px] md:p-1.5 flex flex-none flex-col md:flex-row md:border-none border-b border-solid border-[#BBBBBB] w-full md:w-1/2 lg:w-1/3 xl:w-1/4")
    
    return item_list

In [5]:
# extract the information of item
def instru_info_guitarcenter(item_list):
    # create a empty dictionary
    item_dict = {}

    for i in item_list:
        # image class
        img = i.find(class_ = "jsx-406435821 w-[264px] mt-5 md:mt-0")
        # title of guitar
        title = img.find_all("img")[1]["alt"]
        # url of guitar image
        imgurl = img.find_all("img")[1]["src"]
        # id for the guitar
        item_id = imgurl.split("/")[-1].split("-")[0]
        # price of guitar
        price = i.find(class_ = "jsx-2420341498 sale-price gc-font-bold text-[#2d2d2d]").text
        # location of the seller of the guitar
        if i.find(class_ = "jsx-3430979785 store-name-text"):
            location = i.find(class_ = "jsx-3430979785 store-name-text").text
        else:
            location = None
        # condition of the guitar
        condition = i.find(class_ = "jsx-3430979785 gc-font-light mb-2 text-xs").text.replace("Condition:","").strip()
        # add the info into dict
        item_dict[item_id] = {}
        item_dict[item_id]['image'] = imgurl
        item_dict[item_id]['title'] = title
        item_dict[item_id]['location'] = location
        item_dict[item_id]['price'] = price
        item_dict[item_id]['condition'] = condition
        
    return item_dict

#### 3. Download the guitar images

In [6]:
# download the images
def image_download(info,path):
    for key,value in info.items():
        # download the image that does not exist
        if 'image_status' not in info[key]:
            # URL of the image
            image_url = value['image']
            # name of image
            file_name = key + ".jpg"
            # the download path
            save_path = os.path.join(path, file_name)
            # Send a GET request to the image URL
            response = requests.get(image_url)

            # Check if the request was successful
            if response.status_code == 200:
                with open(save_path, "wb") as file:
                    file.write(response.content)
                info[key]['image_status'] = 'Success'
            
            time.sleep(1.7)

In [7]:
# file path
path = 'guitar_pages'

if not os.path.exists(path):
    # If it does not exist, create it
    os.makedirs(path)

# info of electric guitar
guitarcenter_guitar_info = {}

# walk through all acoustic guitar files
for root, dirs, files in os.walk(path):
    for file in files:
        file_path = os.path.join(root, file)
        item_list = parse_mhtml_guitarcenter(file_path)
        item_dict = instru_info_guitarcenter(item_list)
        guitarcenter_guitar_info |= item_dict
        
# get the url of high-resolution images
for key,value in guitarcenter_guitar_info.items():
    guitarcenter_guitar_info[key]["image"] = guitarcenter_guitar_info[key]["image"].replace("264x264","600x600")

In [8]:
# download the high resolution acoustic guitar images
count = 0

# file path
path = 'guitar_images'

if not os.path.exists(path):
    # If it does not exist, create it
    os.makedirs(path)

while sum([int(bool(value.get("image_status",0))) for key,value in guitarcenter_guitar_info.items()]) < 96:
    # handle errors related to unstable internet connection
    try:
        image_download(guitarcenter_guitar_info,path)
    except:
        count += 1
        print(f"retry {count}")
        time.sleep(120)

#### 4. Save the info of guitars (id, location, condition, price...)

In [9]:
# convert the dictionary to dataframe 
df = pd.DataFrame(guitarcenter_guitar_info).T
df.to_csv("guitar_info.csv", index=True, index_label="index")

# 2. Data Processing

In [10]:
# define all necessary functions
def widest_boundary(image,left,right,bottom):
    # determines the y-coordinate of the widest part of the guitar, which is expected to be within the lower half of the guitar's top section. 
    # this part is identified by analyzing each horizontal line (row) from the bottom to the top of the image and finding the row with the longest continuous sequence of non-transparent pixels. 
    # this is done to avoid the influence of any guitar stand in the image by not extending the search to the very bottom.
    # the y-coordinate of the row with this longest sequence is considered to be where the guitar is widest, 
    # which is used as a reference point for further processing such as determining left and right boundaries with reduced impact from the guitar stand.

    # initialize the y location and the number of continuous non-transparent pixels
    max_num_pixel = 0
    y_b = 0
    
    # iterates over each row
    for y in range(bottom):
        # extract the pixel sequence
        column = [image.getpixel((x, y)) for x in range(left,right)]
        
        # initialize variables for calculation of the length of continuous non-transparent pixels
        num_continous_pixels = []
        num_pixel = 0
        
        # count the length of continuous non-transparent pixels
        for pixel in column:
            if pixel[3] > 0:
                num_pixel += 1
            else:
                num_continous_pixels.append(num_pixel)
                num_pixel = 0
        
        # find the maximum of length of continuous non-transparent pixels within a row
        num_pixel = max(num_continous_pixels)
        
        # find the maximum of length of continuous non-transparent pixels throughout all rows
        if num_pixel >= max_num_pixel:  # Count non-transparent pixels
            max_num_pixel = num_pixel
            y_b = y

    return y_b

def left_boundary(image, top, bottom, pixel_count, offset):
    # iterate over columns from the left side
    for x in range(image.width):
        # find the number of non-transparent pixels
        column = [image.getpixel((x, y)) for y in range(top, bottom)]
        num_pixel = sum(1 for pixel in column if pixel[3] > 0)
        
        # if the number is more than a threshold then determine the x as the left boundary
        if num_pixel >= pixel_count:  # Count non-transparent pixels
            left_boundary = x + offset
            return left_boundary

def right_boundary(image, top, bottom, pixel_count, offset):
    # iterate over columns from the right side
    for x in range(image.width-1,-1,-1):
        # find the number of non-transparent pixels
        column = [image.getpixel((x, y)) for y in range(top, bottom)]
        num_pixel = sum(1 for pixel in column if pixel[3] > 0)
        
        # if the number is more than a threshold then determine the x as the right boundary
        if num_pixel >= pixel_count:  # Count non-transparent pixels
            right_boundary = x - offset
            return right_boundary        

def bottom_boundary(image, left, right, pixel_count, offset):
    # iterate over columns from the bottom
    for y in range(image.width-1,-1,-1):
        # find the number of non-transparent pixels
        column = [image.getpixel((x, y)) for x in range(left,right)]
        num_pixel = sum(1 for pixel in column if pixel[3] > 0)
        
        # if the number is more than a threshold then determine the y as the bottom boundary
        if  num_pixel >= pixel_count:  
            right_boundary = y - offset
            return right_boundary         

def top_boundary(image, left, right, bottom, offset):
    # iterate over columns from the bottom
    for y in range(bottom,-1,-1):
        # find the number of non-transparent pixels
        column = [image.getpixel((x, y)) for x in range(left,right)]
        num_pixel = sum(1 for pixel in column if pixel[3] > 0)
        
        # if the number is less than a ratio and the width-height ratio is less than 2 determine the y as the top boundary
        # this is because we think that the length fingerboard of guitar divided by the length of widest part of guitar should be less than a ratio
        # the ratio can vary based on the image, but still be in a range
        for ratio in range(31,10,-1):
            if num_pixel < ratio / 100 * (right - left) and (bottom - y) < 2 * (right - left):
                return y + offset

def crop_image(image):
    # Convert to RGBA if not already in that mode
    if image.mode != 'RGBA':
        image = image.convert('RGBA')

    # Initialize boundaries
    left = 0
    top = 0
    right = 600
    bottom = 600

    # find the widest part of the guitar image
    widest_y = widest_boundary(image,left,right,bottom)
    # find the left boundary of the guitar image
    left_b = left_boundary(image, 0, widest_y, 15, 3)
    # find the right boundary of the guitar image
    right_b = right_boundary(image, 0, widest_y, 15, 3)
    # calculate the midpoint of left and right boundary
    mid = int((left_b + right_b)/2)
    # set an interval centered around the midpoint to search for the bottom boundary
    width = 10
    # find the bottom boundary of the guitar image
    bottom_b = bottom_boundary(image, mid - width, mid + width, 5, 0)
    # find the top boundary of the guitar image
    top_b = top_boundary(image, left_b, right_b, widest_y, 0)

    # crop the image
    cropped_image = image.crop((left_b, top_b, right_b, bottom_b))
    return cropped_image

def resize_pad(image,target_size):
    # resize guitar image to square by downsampling and then padding transparently.
    # get the width and height from original image
    width, height = image.size
    
    # calculate the scale_ratio
    scale_ratio = target_size / height
    
    # calculate the size after downsampling
    new_size =  [math.floor(scale_ratio * width), math.floor(scale_ratio * height)]
    
    # resize the image
    image_resized = image.resize(new_size, Image.LANCZOS)
    width_r, height_r = image_resized.size
    
    # create a transparent background image
    new_image = Image.new("RGB", (target_size, target_size), (255, 255, 255))
    
    # pad the image by pasting the resized image in the middle of background image
    upper_x = (target_size - width_r) // 2
    upper_y = 0
    new_image.paste(image_resized, (upper_x, upper_y),image_resized)
    
    return new_image

#### 1. Remove the background of guitar images

In [11]:
# remove the background of guitar images
input_path = "guitar_images"
output_path = "guitar_no_background_images"

if not os.path.exists(output_path):
    # If it does not exist, create it
    os.makedirs(output_path)

# walk through all guitar images
for root, dirs, files in os.walk(input_path):
    for file in files:
        input_file_path = os.path.join(root, file)
        # Load the input image
        input_image = Image.open(input_file_path)
        # Convert the input image to a numpy array
        input_array = np.array(input_image)

        # Apply background removal using rembg
        output_array = rembg.remove(input_array)

        # Create a PIL Image from the output array
        output_image = Image.fromarray(output_array)
        
        file = file.replace("jpg","png")
        output_file_path = os.path.join(output_path, file)
        
        # Save the output image
        output_image.save(output_file_path)

#### 2. Manually remove invalid images (electric guitars, image missing, etc.)

In [12]:
# manually eliminated invalid images and save the valid ones to the folder guitar_manually_filtered_images

#### 3. Crop the images to keep the top soundboard of guitars exclusively

In [13]:
input_path = "guitar_manually_filtered_images"
output_path = "guitar_cropped_images"
output_path_failed = "guitar_failed_cropped_images"

if not os.path.exists(output_path):
    # If it does not exist, create it
    os.makedirs(output_path)

if not os.path.exists(output_path_failed):
    # If it does not exist, create it
    os.makedirs(output_path_failed)
    
# crop the images to keep the top soundboard of guitars exclusively
# walk through all guitar images
for root, dirs, files in os.walk(input_path):
    for file in files:
        input_file_path = os.path.join(root, file)
        # Load the input image
        input_image = Image.open(input_file_path)
        # crop the image
        try:
            output_image = crop_image(input_image)
            output_file_path = os.path.join(output_path, file)
            # Save the output image
            output_image.save(output_file_path)
        except:
            # if the cropping algorithm fails, save the original image to another path.
            output_image = input_image
            output_file_path = os.path.join(output_path_failed, file)
            # Save the output image
            output_image.save(output_file_path)

#### 4. Resize and pad the guitar images

In [14]:
input_path = "guitar_cropped_images"
output_path = "guitar_resized_images"
output_path_failed = "guitar_failed_resized_images"

if not os.path.exists(output_path):
    # If it does not exist, create it
    os.makedirs(output_path)

if not os.path.exists(output_path_failed):
    # If it does not exist, create it
    os.makedirs(output_path_failed)


# walk through all guitar images
for root, dirs, files in os.walk(input_path):
    for file in files:
        input_file_path = os.path.join(root, file)
        # Load the input image
        input_image = Image.open(input_file_path)
        target_size = 200
        # get the height of image
        _, height = input_image.size
        
        # resize the image which has the height over 200
        if height >= 200:
            output_image = resize_pad(input_image,target_size)
            output_file_path = os.path.join(output_path, file)
            # Save the output image
            output_image.save(output_file_path)
        # for the images that are too small, save them to another path
        else:
            output_image = input_image
            output_file_path = os.path.join(output_path_failed, file)
            # Save the output image
            output_image.save(output_file_path)

# 3. Data Modeling

#### 1. Load the guitar info

In [15]:
# read the guitar info
df = pd.read_csv("guitar_info.csv")

# load the valid guitar image indice
train_data_id = []
file_path = "guitar_resized_images"

# walk through all guitar images
for root, dirs, files in os.walk(file_path):
    for file in files:
        train_data_id.append(int(file.replace(".png","")))
        
# select guitar info for valid images
df = df[df['index'].isin(train_data_id)]
df['price'] = df['price'].apply(lambda x:math.ceil(float(x.replace("$","").replace(",",""))))

df

Unnamed: 0,index,image,title,location,price,condition,image_status
0,119781100,https://media.guitarcenter.com/is/image/MMGS7/...,Used Taylor 2011 410CE Acoustic Electric Guitar,"Cherry Hill, NJ",1300,Excellent,Success
1,119779356,https://media.guitarcenter.com/is/image/MMGS7/...,Used Ovation 2002 CC57 Celebrity Acoustic Elec...,"Round Rock, TX",280,Excellent,Success
3,119776250,https://media.guitarcenter.com/is/image/MMGS7/...,Used Taylor 214CE Deluxe Acoustic Electric Guitar,"Jackson, MS",1250,Excellent,Success
4,119776239,https://media.guitarcenter.com/is/image/MMGS7/...,Used Martin D28 Acoustic Guitar,"Greensboro, NC",2700,Excellent,Success
5,119776267,https://media.guitarcenter.com/is/image/MMGS7/...,Used Gretsch Guitars G5024E Rancher Acoustic E...,"Goodlettsville, TN",350,Excellent,Success
...,...,...,...,...,...,...,...
90,119749149,https://media.guitarcenter.com/is/image/MMGS7/...,Used Taylor 214CE Deluxe Koa Acoustic Electric...,"Johnson City, TN",1200,Excellent,Success
91,119757084,https://media.guitarcenter.com/is/image/MMGS7/...,Used Taylor Academy 12E Acoustic Electric Guitar,"N. Olmsted, OH",600,Excellent,Success
92,119754997,https://media.guitarcenter.com/is/image/MMGS7/...,Used Taylor 814CE V-Class Acoustic Guitar,"Florence, KY",3080,Excellent,Success
93,119755674,https://media.guitarcenter.com/is/image/MMGS7/...,Used Used Furch Red Pure D-LR Alpine Spruce Na...,"Bloomington, MN",2000,Excellent,Success


#### 2. Create training, validation, and testing set

In [16]:
# calculate the mean and std of R, G, and B channels for my guitar image data sets
path = "guitar_resized_images"

# define a tensor transformer
image_transformer = T.ToTensor()

def normal_para(path, transformer):
    tensors = []
    # walk through all images and convert them to tensors
    for index in df['index'].values:
        file_name = '000000'+str(index) + '.png'
        file_path = os.path.join(path,file_name)
        image = Image.open(file_path)
        tensor = transformer(image)
        tensors.append(tensor)

    # stack all tensors into a single tensor
    tensor_stack = torch.stack(tensors, dim=0)

    # calculate the mean and std
    mean = torch.mean(tensor_stack, dim=[0, 2, 3])
    std = torch.std(tensor_stack, dim=[0, 2, 3])
    
    return mean,std

# calculate the mean and std of R, G, and B channels
mean,std = normal_para(path, image_transformer)

In [17]:
# define a class to generate training, validation, and testing set
class Guitar_Image_Data_Set:
    """ this class is designed to generate training, validation, and testing set from guitar images"""
    def __init__(self,dataframe,path,transform):
        self.df = dataframe
        self.path = path
        self.tf = transform
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self,idx):
        file_name = '000000'+ str(df.iloc[idx]['index']) + '.png'
        file_path = os.path.join(self.path,file_name)
        image = Image.open(file_path)
        image = self.tf(image)
        price = df.iloc[idx]['price']
        
        return image, price

In [18]:
# transform images to tensors and then normalize tensors
transform = T.Compose([
            T.ToTensor(),
            T.Normalize(mean, std)
            ])

# create an instance of the Dataset
guitar_dataset = Guitar_Image_Data_Set(dataframe = df,
                                       path = path,
                                       transform = transform)

# define the ratio for training, validation, and testing set
total = len(guitar_dataset)
train = 0.7
validation = 0.15

# split the whole dataset into training, validation, and testing set
train_size = int(train * total)
validation_size = int(validation * total)
test_size = total - train_size - validation_size
train_dataset, validation_dataset, test_dataset = random_split(guitar_dataset, [train_size, validation_size, test_size])

In [19]:
# Use DataLoader to batch and shuffle the dataset
# actual training, validation, and testing data set
# loader_train = DataLoader(train_dataset, batch_size=64,shuffle = True)
# loader_val = DataLoader(validation_dataset, batch_size=64,shuffle = True)
# loader_test = DataLoader(test_dataset, batch_size=64,shuffle = True)

# use smaller batch size for demonstration
loader_train = DataLoader(train_dataset, batch_size=4,shuffle = True)
loader_val = DataLoader(validation_dataset, batch_size=4,shuffle = True)
loader_test = DataLoader(test_dataset, batch_size=4,shuffle = True)

#### 3. Train baseline models

In [20]:
# define the configuration of torch
dtype = torch.float

gpu_index = torch.randint(0, torch.cuda.device_count(), (1,)).item()
device = torch.device('cuda:{}'.format(gpu_index))
print("PyTorch is using GPU {}!".format(device))

# Constant to control how frequently we print train loss
print_every = 100

PyTorch is using GPU cuda:0!


In [21]:
# define a function that returns a consistent, predetermined random number
def fix_random_seed(seed_no=0):
    torch.manual_seed(seed_no)
    torch.cuda.manual_seed(seed_no)
    random.seed(seed_no)

# define functions for adjusting learning rate
def adjust_learning_rate(optimizer, lrd, epoch, schedule):
    """
    Multiply lrd to the learning rate if epoch is in schedule

    Inputs:
    - optimizer: An Optimizer object we will use to train the model
    - lrd: learning rate decay; a factor multiplied at scheduled epochs
    - epochs: the current epoch number
    - schedule: the list of epochs that requires learning rate update

    Returns: Nothing, but learning rate might be updated
    """
    if epoch in schedule:
        for param_group in optimizer.param_groups:
            print('lr decay from {} to {}'.format(param_group['lr'], param_group['lr'] * lrd))
            param_group['lr'] *= lrd

# define functions for training the model
def train_model(model, optimizer, epochs=1, learning_rate_decay=.1, schedule=[], criterion = nn.MSELoss()):
    """
    Train a model on guitar image dataset using the PyTorch Module API.

    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for

    Returns: Nothing, but prints model accuracies during training.
    """
    
    num_iters = epochs * len(loader_train)
    num_prints = num_iters // print_every + 1

    for e in range(epochs):
        
        adjust_learning_rate(optimizer, learning_rate_decay, e, schedule)

        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=dtype)
            outputs = model(x)
            
            # calculatet the loss using assigned criterion 
            loss = criterion(outputs, y.view(-1, 1))
            
            # Zero out all of the gradients for the variables which the optimizer will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            tt = t + e * len(loader_train)
            print('Epoch %d, Iteration %d, loss = %.0f' % (e, tt, loss.item()))

In [22]:
# train a toy model - a neural network with two fully connected layers
fix_random_seed(0)

C, H, W = 3, 200, 200

# initialize the output layer with 1 node for regression tasks.
output_size = 1 

hidden_layer_size = 4096
learning_rate = 1e-6
weight_decay = 1e-3
momentum = 0.5
epochs = 5

# To give a specific name to each module, use OrderedDict.
model = nn.Sequential(OrderedDict([
  ('flatten', nn.Flatten()),
  ('fc1', nn.Linear(C * H * W, hidden_layer_size)),
  ('relu1', nn.ReLU()),
  ('fc2', nn.Linear(hidden_layer_size, output_size)),
]))

model.to(device)

# use SGD as the optimizer
optimizer = optim.SGD(model.parameters(), 
                      lr=learning_rate,
                      weight_decay=weight_decay,
                      momentum=momentum,
                      nesterov=True)

# print the Architecture of the model
print('Architecture:')
print(model)
print()

# print the loss in the training process
print('Training:')
train_model(model, optimizer,epochs = epochs)

Architecture:
Sequential(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=120000, out_features=4096, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=4096, out_features=1, bias=True)
)

Training:
Epoch 0, Iteration 0, loss = 15691427
Epoch 0, Iteration 1, loss = 13631764
Epoch 0, Iteration 2, loss = 4000830
Epoch 0, Iteration 3, loss = 21682328
Epoch 0, Iteration 4, loss = 1056676
Epoch 0, Iteration 5, loss = 7297549
Epoch 0, Iteration 6, loss = 2623719
Epoch 0, Iteration 7, loss = 1075998
Epoch 0, Iteration 8, loss = 7066142
Epoch 0, Iteration 9, loss = 1314058
Epoch 0, Iteration 10, loss = 1053513
Epoch 0, Iteration 11, loss = 3597074
Epoch 0, Iteration 12, loss = 166501
Epoch 1, Iteration 13, loss = 1352148
Epoch 1, Iteration 14, loss = 7310182
Epoch 1, Iteration 15, loss = 915801
Epoch 1, Iteration 16, loss = 265838
Epoch 1, Iteration 17, loss = 351422
Epoch 1, Iteration 18, loss = 351365
Epoch 1, Iteration 19, loss = 10167117
Epoch 1, Iteration 20

In [23]:
# train a baseline model - vgg16
fix_random_seed(0)

C, H, W = 3, 200, 200

# initialize the output layer with 1 node for regression tasks.
output_size = 1 

learning_rate = 1e-4
dropout_ratio = 0.1
epochs = 5

# load the vgg16 model
model = models.vgg16(pretrained=True)

# modify the classifier to fit the guitar image dataset
model.classifier = nn.Sequential(
    nn.Linear(25088, 512),  
    nn.ReLU(),
    nn.Dropout(dropout_ratio),
    nn.Linear(512, 1)  # output a single value for regression
)

model.to(device)

# use adam as the optimizer
optimizer = optim.Adam(model.parameters(), 
                       lr = learning_rate)

# print the Architecture of the model
print('Architecture:')
print(model)
print()

# print the loss in the training process
print('Training:')
train_model(model, optimizer, epochs = epochs)

Architecture:
VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding