In [77]:
import time
from tqdm.notebook import tqdm as tqdm
import cv2
from PIL import Image
import numpy as np
import pandas as pd
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import DataLoader, Dataset
from torch.optim import lr_scheduler
import torch.nn as nn
import timm
import warnings
import boto3
import json
from urllib.parse import urlparse

In [2]:
class args:
    batch_size = 128
    num_workers = 4
    arch = 'resnet50'

In [74]:
def get_s3json(s3, url):
    url_parts = urlparse(url, allow_fragments=False)
    response = s3.get_object(Bucket=url_parts.netloc, Key=url_parts.path.strip("/"))
    content = response['Body']
    json_content = json.loads(content.read())
    return json_content

s3 = boto3.client('s3')

In [85]:
def combine_csvs(csv_list):
    return_file = pd.read_csv(csv_list[0], nrows=0)
    for csv_file in csv_list:
        temp_df = pd.read_csv(csv_file)
        return_file = return_file.append(temp_df, ignore_index=True)
    return_file['filename'] = "sentinel2_composite/transformed_data/" + return_file['filename']
    return return_file.drop_duplicates(subset=['filename'])

In [112]:
class MLP(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        self.layers = nn.Sequential(
                nn.Flatten(),
                nn.Linear(2048, 1024),
                nn.ReLU(),
                nn.Linear(1024, 512),
                nn.ReLU(),
                nn.Linear(512, n_classes))

    def forward(self, x):
        '''Forward pass'''
        return self.layers(x)

def get_model(architecture, freeze_layers, num_classes, url):
    device = torch.device("cuda:0")
    model = timm.create_model(architecture, pretrained = True)
    if freeze_layers == 'yes':
        set_parameter_requires_grad(model, True)
    model.fc = MLP(num_classes)

    #print("Model fc layer = " + str(model.fc))
    s3_client = boto3.client('s3')
    url_parts = urlparse(url, allow_fragments=False)
    s3_client.download_file(url_parts.netloc, url_parts.path.strip("/"), 'saved_weights.pth')   
    model.load_state_dict(torch.load('saved_weights.pth'))
    model = model.to(device)
    return model

In [101]:
def tsttransforms():
    return A.Compose([
        A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ToTensorV2()
    ])

class BirdDataset(Dataset):
    def __init__(self, df, mode, transform=None):
        self.data = df
        self.img_dir = f'./'
        self.transform = transform
        self.mode = mode

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):

        src_filename = self.data.iloc[idx]['filename']
        fname = self.data.iloc[idx]['filename'] + ".png"
        fname = fname.replace('sentinel2_composite/transformed_data/', '')
        img_path = f'{self.img_dir}/data/{fname}'
        image = cv2.imread(img_path)
        if self.transform is not None:
            image = self.transform(image = image)['image']
        image = image.float() / 255.
        label = self.data.iloc[idx]['label']

        return image, label, src_filename

In [67]:
configuration = {
    "spec_location" : "s3://w210-poverty-mapper/modeling/model_specs/",
    "weights_location" : "s3://w210-poverty-mapper/modeling/model_artifacts/"
}

In [94]:
countries = {
    "Bangladesh" : "within_country_bangladesh_5k_50d_2_within_even_resnet50_no_20_0.0001_0.1_10_64_4",
    "Nepal" : "within_country_nepal_5k_50d_2_within_even_resnet50_no_30_0.0001_0.1_10_64_4",
    "Philippines" : "within_country_philippines_5k_50d_2_within_even_resnet50_no_30_0.0001_0.1_10_64_4",
    "Tajikistan" : "within_country_tajikistan_5k_50d_2_within_even_resnet50_no_20_0.0001_0.1_10_64_4",
    "Timor Leste" : "within_country_timor_leste_5k_50d_2_within_even_resnet50_no_20_0.0001_0.1_10_64_4"
}

In [113]:
def predict_on(base_name, model_inputs):
    tstdataset = BirdDataset(model_inputs, 'test', tsttransforms())
    device = torch.device("cuda:0")
    model = get_model(args.arch, "no", 2, configuration["weights_location"] + base_name + ".pth")
    loaderargs = {'num_workers' : args.num_workers, 'batch_size':args.batch_size, 'pin_memory': False, 'drop_last': False}
    tstloader = DataLoader(tstdataset, shuffle = False, **loaderargs)
    tstpreds = []
    step = 1
    model.eval()
    print("Starting the predictions for " + base_name)
    for img, label, location in tstloader:
        inputs = img.to(device, dtype=torch.float)
        with torch.no_grad():
            outputs = model(inputs)
            tstpreds.append(outputs)
            if step % 100 == 1:
                print("step : ",step)
                #print("label: ", label[0], " predicted: ", outputs.argmax(1).detach().cpu().numpy()[0], " File: ", location[0])
            step = step + 1
    predicted_labels = torch.cat(tstpreds).argmax(1).detach().cpu().numpy()
    print("Done with predictions for " + base_name)
    return predicted_labels

In [118]:
density_file_base = pd.read_csv("s3://w210-poverty-mapper/modeling/metadata/source_data/meta_data_full_updated_density_new_full_value_LZ.csv")
output = pd.DataFrame()

for country in countries:
    density_file = density_file_base.copy()
    base_name = countries[country]
    spec = get_s3json(s3, "s3://w210-poverty-mapper/modeling/model_specs/" + base_name + ".json")
    csv_files_with_label = combine_csvs([
        spec["train"],
        spec["val"],
        spec["test"]
    ])
    density_file = pd.merge(density_file, csv_files_with_label, on='filename', how='outer')
    model_inputs = density_file[(density_file['countries'] == "['" + country + "']") & (density_file['Density'] > 50)][['filename', 'label']]
    #print("Length of model inputs ", len(model_inputs))
    predictions = predict_on(base_name, model_inputs)
    #print("Length of predictions ", len(predictions))
    model_inputs['prediction'] = predictions
    output = output.append(model_inputs)


Starting the predictions for within_country_bangladesh_5k_50d_2_within_even_resnet50_no_20_0.0001_0.1_10_64_4
step :  1
step :  101
step :  201
Done with predictions for within_country_bangladesh_5k_50d_2_within_even_resnet50_no_20_0.0001_0.1_10_64_4
Starting the predictions for within_country_nepal_5k_50d_2_within_even_resnet50_no_30_0.0001_0.1_10_64_4
step :  1
step :  101
Done with predictions for within_country_nepal_5k_50d_2_within_even_resnet50_no_30_0.0001_0.1_10_64_4
Starting the predictions for within_country_philippines_5k_50d_2_within_even_resnet50_no_30_0.0001_0.1_10_64_4
step :  1
step :  101
step :  201
step :  301
step :  401
Done with predictions for within_country_philippines_5k_50d_2_within_even_resnet50_no_30_0.0001_0.1_10_64_4
Starting the predictions for within_country_tajikistan_5k_50d_2_within_even_resnet50_no_20_0.0001_0.1_10_64_4
step :  1
step :  101
Done with predictions for within_country_tajikistan_5k_50d_2_within_even_resnet50_no_20_0.0001_0.1_10_64_4
Star

In [119]:
len(output)

122456

In [122]:
output.label.value_counts()

1    22021
0     5558
Name: label, dtype: int64

In [123]:
density_file = density_file_base.copy()

In [124]:
density_file = pd.merge(density_file, output, on='filename', how='outer')

In [125]:
len(density_file)

156656

In [126]:
density_file.head()

Unnamed: 0,filename,zone,center,lat_lon_bounds,utm_bounds,countries,partial_updated,Density,label,prediction
0,sentinel2_composite/transformed_data/42S/341-0...,42S,"(67.78771480363916, 37.195334792066234)","[(67.77493453609193, 37.205298500604044), (67....","BoundingBox(left=391290.0, bottom=4116110.0, r...",['Tajikistan'],False,307.808161,,1.0
1,sentinel2_composite/transformed_data/42S/341-0...,42S,"(67.78803746297999, 37.175147632248574)","[(67.77526071406628, 37.18511147083712), (67.7...","BoundingBox(left=391290.0, bottom=4113870.0, r...",['Tajikistan'],False,143.624449,,1.0
2,sentinel2_composite/transformed_data/42S/341-0...,42S,"(67.7883598008825, 37.15496040109096)","[(67.77558656709091, 37.16492436967268), (67.7...","BoundingBox(left=391290.0, bottom=4111630.0, r...",['Tajikistan'],False,155.264702,,1.0
3,sentinel2_composite/transformed_data/42S/341-0...,42S,"(67.78868181768202, 37.134773098609735)","[(67.77591209550492, 37.14473719712704), (67.7...","BoundingBox(left=391290.0, bottom=4109390.0, r...",['Tajikistan'],False,154.851017,,1.0
4,sentinel2_composite/transformed_data/42S/341-0...,42S,"(67.7890035137133, 37.1145857248212)","[(67.7762372996468, 37.124549953216665), (67.7...","BoundingBox(left=391290.0, bottom=4107150.0, r...",['Tajikistan'],False,178.775927,,1.0


In [127]:
density_file.to_csv("combined_labeled.csv", index=False)

In [128]:
density_file.label.value_counts()

1    22021
0     5558
Name: label, dtype: int64

In [129]:
density_file.prediction.value_counts()

1.0    96361
0.0    26095
Name: prediction, dtype: int64