In [1]:
import pandas as pd
import numpy as np
import torch
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from keras.preprocessing import image
from torchmetrics.classification import MultilabelF1Score
from torchvision import models
from torchvision.transforms import v2
import os
import warnings


In [2]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
warnings.filterwarnings('ignore')

# Data

In [3]:
# You need extract file ml1m.zip to folder ml1m before run code

users = pd.read_csv('ml1m/content/dataset/users.dat', sep='::',
                        engine='python',
                        names=['userid', 'gender', 'age', 'occupation', 'zip']).set_index('userid')
ratings = pd.read_csv('ml1m/content/dataset/ratings.dat', engine='python',
                          sep='::', names=['userid', 'movieid', 'rating', 'timestamp'])
movies_train = pd.read_csv('ml1m/content/dataset/movies_train.dat', engine='python',
                         sep='::', names=['movieid', 'title', 'genre'], encoding='ISO-8859-1', index_col=False).set_index('movieid')
movies_test = pd.read_csv('ml1m/content/dataset/movies_test.dat', engine='python',
                         sep='::', names=['movieid', 'title', 'genre'], encoding='ISO-8859-1', index_col=False).set_index('movieid')
movies_train['genre'] = movies_train.genre.str.split('|')
movies_train.index.name = 'ID'
movies_test['genre'] = movies_test.genre.str.split('|')
movies_test.index.name = 'ID'

users.age = users.age.astype('int')
users.gender = users.gender.astype('category')
users.occupation = users.occupation.astype('category')
ratings.movieid = ratings.movieid.astype('int')
ratings.userid = ratings.userid.astype('int')

In [4]:
with open('ml1m/content/dataset/genres.txt', 'r') as f:
    genre_all = f.readlines()
genres = [genre.strip() for genre in genre_all]

In [5]:
def preprocess(df, path='ml1m/content/dataset/ml1m-images', genres=genres) -> pd.DataFrame:
    df['img_path'] = df.apply(lambda x: os.path.join(path, str(x.name) + '.jpg'), axis=1)

    # multi-hot encoding
    for genre in genres:
        df[genre] = df.genre.apply(lambda x: 1 if genre in x else 0)
    df.drop(columns=['genre'], inplace=True)

    # Remove movies without images
    df = df[df.img_path.apply(lambda x: os.path.exists(x))]

    return df

In [6]:
trainset = preprocess(movies_train)
testset = preprocess(movies_test)

# Custom Dataset

In [7]:
class Poroset(Dataset):
    def __init__(self, df):
        self.df = df
        self.img_path = df.img_path.values
        self.labels = df[genres].values
        self.transform = v2.Compose([
            v2.Resize((224, 224)),
            v2.ToTensor(),
            v2.Normalize(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        img = image.load_img(self.img_path[idx])
        img = self.transform(img)
        label = torch.tensor(self.labels[idx]).float()
        return img, label

In [8]:
trainset = Poroset(trainset)
testset = Poroset(testset)

In [9]:
trainloader = DataLoader(trainset, batch_size=32, shuffle=True)
testloader = DataLoader(testset, batch_size=32, shuffle=True)

## Model

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [11]:
model = models.resnet101(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, len(genres))
model = model.to(device)

In [12]:
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [13]:
def train(epoch):
    model.train()
    for x, y in tqdm(trainloader):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        y_pred = model(x)
        loss = criterion(y_pred, y.float())
        loss.backward()
        optimizer.step()

In [14]:
for epoch in range(16):
    train(epoch)

100%|██████████| 82/82 [00:25<00:00,  3.27it/s]


In [15]:
model.eval()
y_pred = []
y_test = []
for x, y in tqdm(testloader):
    x, y = x.to(device), y.to(device)
    y_pred.append(model(x).detach().cpu().numpy())
    y_test.append(y.detach().cpu().numpy())
y_pred = np.concatenate(y_pred)
y_test = np.concatenate(y_test)
f1 = MultilabelF1Score(num_labels=18, threshold=0.5, average='macro')
f1.update(torch.tensor(y_pred), torch.tensor(y_test))
f1.compute()

100%|██████████| 21/21 [00:04<00:00,  4.29it/s]


tensor(0.0544)