In [None]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

batch_size = 8
epochs = 10
learning_rate = 1e-3

transform = transforms.Compose([transforms.Resize((2048,2048)), 
                                transforms.ToTensor()])

train_data = datasets.ImageFolder(root='/datasets/score_data/cover-data/train', transform=transform)
val_data = datasets.ImageFolder(root='/datasets/score_data/cover-data/val', transform=transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)

model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

device = torch.device("cuda:5" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(epochs):
    model.train()

    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print ('Loss: {:.4f}'.format(loss.item()))

    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print('Validation Accuracy on epoch [{}]: {}%'.format(epoch+1, 100 * correct / total))

torch.save(model.state_dict(), 'model.pth')

In [None]:
import torch
from torchvision import transforms,models
from PIL import Image

device = torch.device("cuda:4" if torch.cuda.is_available() else "cpu")
model = models.resnet18(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model.load_state_dict(torch.load('model.pth'))
model.to(device)
model.eval()

image_path = 'IMSLP64564_8.jpg'
image = Image.open(image_path)
transform = transforms.Compose([transforms.Resize((2048, 2048)), 
                                transforms.ToTensor()])
image = transform(image).unsqueeze(0).to(device)

with torch.no_grad():
    output = model(image)
    _, predicted = torch.max(output, 1)
    if predicted.item() == 1:
        print("This image is a score.")
    else:
        print("This image is not a score.")

In [None]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import os
from concurrent.futures import ProcessPoolExecutor

device = torch.device("cuda:4" if torch.cuda.is_available() else "cpu")
model = models.resnet18(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model.load_state_dict(torch.load('2048-cover.pth', map_location=device))
model.to(device)
model.eval()

transform = transforms.Compose([transforms.Resize((2048, 2048)), 
                                transforms.ToTensor()])

def process_image(image_path):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)

    return image_path, predicted.item()

image_dir = '/datasets/score_data/hd_data/hd_data_jpg'
output_dir = '/datasets/score_data/cover-data/cover'

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

image_files = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))]


def move_non_score_images():
    with ProcessPoolExecutor(max_workers=os.cpu_count()) as executor:
        for image_path, predicted in executor.map(process_image, image_files):
            if predicted == 0:  # Assuming '0' is the class for non-score images
                os.rename(image_path, os.path.join(output_dir, os.path.basename(image_path)))

move_non_score_images()

In [None]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import os
import shutil

def process_image(image_path, device, model):
    transform = transforms.Compose([
        transforms.Resize((2048, 2048)),
        transforms.ToTensor()
    ])
    
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)

    return image_path, predicted.item()

def main():
    device = torch.device("cuda:5" if torch.cuda.is_available() else "cpu")
    model = models.resnet18(pretrained=False)
    num_ftrs = model.fc.in_features
    model.fc = torch.nn.Linear(num_ftrs, 2)
    model.load_state_dict(torch.load('2048-cover.pth', map_location=device))
    model = model.to(device)
    model.eval()
    
    # Paths setup
    image_dir = '/datasets/score_data/hd_piano/hd_data_jpg'
    output_dir = '/datasets/score_data/cover-data/cover'
    
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    image_files = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
    
    for image_path in image_files:
        _, predicted = process_image(image_path, device, model)
        if predicted == 0:  # Assuming '0' is the class for non-score images
            shutil.copy2(image_path, os.path.join(output_dir, os.path.basename(image_path)))

if __name__ == '__main__':
    main()

In [1]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from sklearn.metrics import classification_report
import numpy as np

batch_size = 8
epochs = 10
learning_rate = 1e-3

transform = transforms.Compose([transforms.Resize((2048,2048)), 
                                transforms.ToTensor()])

train_data = datasets.ImageFolder(root='/datasets/score_data/cover-data/train', transform=transform)
val_data = datasets.ImageFolder(root='/datasets/score_data/cover-data/val', transform=transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)

model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

device = torch.device("cuda:5" if torch.cuda.is_available() else "cpu")
model.to(device)

y_pred = []
y_true = []

for epoch in range(epochs):
    model.train()

    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print ('Loss: {:.4f}'.format(loss.item()))

model.eval()
with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        
        y_pred.extend(predicted.view(-1).cpu().numpy())
        y_true.extend(labels.cpu().numpy())

print('完整的分类任务评测指标：')
print(classification_report(y_true, y_pred, target_names=val_data.classes))

torch.save(model.state_dict(), 'model.pth')



Loss: 0.0164
Loss: 0.0017
Loss: 0.0039
Loss: 0.0133
Loss: 0.0027
Loss: 0.0022
Loss: 0.0008
Loss: 10.3067
Loss: 0.0026
Loss: 0.0118
完整的分类任务评测指标：
              precision    recall  f1-score   support

       cover       0.95      1.00      0.98        20
       score       1.00      0.97      0.98        30

    accuracy                           0.98        50
   macro avg       0.98      0.98      0.98        50
weighted avg       0.98      0.98      0.98        50



In [2]:
print(classification_report(y_true, y_pred, target_names=val_data.classes, digits=4))

              precision    recall  f1-score   support

       cover     0.9524    1.0000    0.9756        20
       score     1.0000    0.9667    0.9831        30

    accuracy                         0.9800        50
   macro avg     0.9762    0.9833    0.9793        50
weighted avg     0.9810    0.9800    0.9801        50

