In [1]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms
import torch.optim as optim
from torch.optim import lr_scheduler
from PIL import Image
import natsort

import matplotlib.pyplot as plt
import time
import copy
import os
from tqdm import tqdm
import numpy as np
import pandas as pd

### Make dataset

Данные представляют собой спутниковый снимок переписного участка в США и значение показателя физической активности для данного участка.

In [2]:
class CensusTractDataset(Dataset):
    def __init__(self, main_dir, city, transform):
        self.main_dir = main_dir
        self.city = city
        self.transform = transform
        self.all_imgs = self.rem(main_dir)
        self.total_imgs = natsort.natsorted(self.all_imgs)

    def __getitem__(self, idx):
        img_loc = os.path.join('../data', self.main_dir, self.total_imgs[idx])
        image = Image.open(img_loc).convert("RGB")
        tensor_image = self.transform(image)
        
        y_loc = os.path.join('../data', self.city, 'data_'+ self.city +'_physical_activity.csv')
        df = pd.read_csv(y_loc)
        y = torch.tensor(df[df['0'] == self.total_imgs[idx]]['3'].values)        
        
        return tensor_image, y
    
    def rem(self, main_dir):
        all_imgs = os.listdir(main_dir)
        for img in all_imgs:
            if not img.lower().endswith(('.jpg', '.jpeg', '.png')):
                all_imgs.remove(img)
        return all_imgs
    
    def __len__(self):
        return len(self.total_imgs)

In [3]:
trns = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [4]:
path = 'san_fran_images'
city = 'san_fran'

In [5]:
batch_size = 1
my_dataset = CensusTractDataset(path, city, transform=trns)
dataloaders = DataLoader(my_dataset, batch_size=batch_size, num_workers=0)

In [6]:
data_iter = iter(dataloaders)
img, label = next(data_iter)
img.shape, label.shape

(torch.Size([1, 3, 224, 224]), torch.Size([1, 1]))

In [7]:
len(my_dataset)

6825

### Train

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [9]:
def train_model(model, criterion, optimizer, num_epochs=2):

    since = time.time()

    for epoch in range(num_epochs):
        model.train()
        print('size of train loader is: ', len(dataloaders))

        for inputs, labels in dataloaders:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs.float(), labels.float())
            print("loss_train_step is: ", loss)

            loss.backward()
            optimizer.step()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    
    return model

In [10]:
model_ft = models.resnet18(pretrained=True)

num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 1)

model_ft = model_ft.to(device)

In [11]:
optimizer_ft = torch.optim.Adam(model_ft.parameters(), lr=0.01)
criterion = nn.MSELoss()

In [12]:
# model_ft = train_model(model_ft, criterion, optimizer_ft, num_epochs=2)
# ...