In [1]:
import tensorflow as tf
from PIL import Image
import pandas as pd
import numpy as np
import random
import cv2
import os

import resnet

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"] = "4"

In [3]:
COUNTRY = "bra"
CSV_PATH = "./cci_final.csv"
IMAGERY_DIR = "../../CCI/hmbaier/"
BATCH_SIZE = 64
checkpoint_path = "training/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
checkpoint_dir

'training'

In [4]:
files = os.listdir(IMAGERY_DIR)
files = [IMAGERY_DIR + i for i in files if COUNTRY in i]

files[0:5]

['../../CCI/hmbaier/43135129_bra_2015-01-01_2015-03-30.png',
 '../../CCI/hmbaier/43157114_bra_2015-01-01_2015-03-30.png',
 '../../CCI/hmbaier/32003005_bra_2015-01-01_2015-03-30.png',
 '../../CCI/hmbaier/52026663_bra_2015-01-01_2015-03-30.png',
 '../../CCI/hmbaier/33066523_bra_2015-01-01_2015-03-30.png']

In [5]:
class Dataloader():
    
    def __init__(self, country, imagery_direc, scores_df, batch_size):
        """
        Arguments:
            country: one of ['mex', 'slv', 'peru', 'phl']
            imagery_direc: path to folder containing school imagery
            scores_df: path to CSV file with school IDs and test scroes
            split: train/test split, should be between .01 and 1, recommended is between .65 and .8
            batch_size: number of images in a batch
        """
        self.country = country
        self.imagery_direc = imagery_direc
        self.imagery = os.listdir(self.imagery_direc)
        self.imagery = [i for i in self.imagery if self.country in i]
        self.scores_df = pd.read_csv(scores_df)
        self.scores_df = self.scores_df[self.scores_df['country'] == self.country]
        self.batch_size = batch_size

        # Load the data into a list with the format [(school_image, school_test_score), ...]
        self.data = self.load_data()
        
        
    def load_data(self):
        """
        Load the imagery into a list in the format: [(imager_tensor, test_score), ...]
        """
        data = []
        for col, row in self.scores_df.iterrows():
            school_id = str(row.school_id)
            test_score = row.y
            impath = [i for i in self.imagery if school_id in i]
            if len(impath) > 0:
                image = np.array(Image.open(self.imagery_direc + impath[0]))
#                 image = self.tfs(image)
                data.append((image, test_score))
        return data

In [6]:
data = Dataloader(country = COUNTRY, 
                  imagery_direc = IMAGERY_DIR, 
                  scores_df = CSV_PATH,
                  batch_size = BATCH_SIZE)

In [7]:
all_data = data.data

In [8]:
model = tf.keras.models.load_model('saved_model/my_model')

model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0001),
    loss = tf.keras.losses.BinaryCrossentropy()
)

In [18]:
preds, trues = [], []

with tf.device('/device:GPU:0'):
    
    for i in range(0, len(all_data)):

        preds.append(np.argmax(model.predict(np.expand_dims(all_data[i][0], 0))))
        trues.append(all_data[i][1])
    

In [19]:
from sklearn.metrics import confusion_matrix, accuracy_score

cm = confusion_matrix(trues, preds)
cm

array([[2140,    0],
       [4601,    0]])

In [20]:
accuracy_score(trues, preds)

0.31746031746031744