In [0]:
import os
from glob import glob
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from PIL import Image
import mectools.data as dt
import sklearn.model_selection as sk

In [0]:
import models
import tools

In [0]:
import warnings
warnings.filterwarnings('ignore')

In [0]:
import matplotlib as mpl
plt = plotter(backend='agg')
%matplotlib inline

In [0]:
for gpu in tf.config.experimental.list_physical_devices('GPU'):
    print(gpu)
    tf.config.experimental.set_memory_growth(gpu, True)

In [0]:
K = 256 # image size in pixels
C = 1 # number of image channels (density, landsat)
S = 1024 # map size in sat pixels

In [0]:
# load in firm and location data
firms = pd.read_csv('../firms/census_2004_geocode.csv', usecols=['id', 'industry', 'income', 'total_assets', 'employees'])
targ = pd.read_csv('../index/census2004_mincloud2002.csv', usecols=['id', 'lat_wgs84', 'lon_wgs84', 'prod_id'])
firms = pd.merge(firms, targ, on='id', how='left').dropna().set_index('id', drop=False)

# calculate outcome stats
firms['prod'] = firms['income']/firms['employees']
firms['lprod'] = dt.log(firms['prod'])
firms = firms.dropna(subset=['lprod'])
print(len(firms))

In [0]:
df_train, df_valid = sk.train_test_split(firms, test_size=0.2)

In [0]:
def parse_function(fid, lprod):
    # image = tf.concat([
    #     tools.load_tile(fid, 'density', S),
    #     tools.load_tile(fid, 'landsat', S)
    # ], -1)
    image = tools.load_tile(fid, 'density', S)
    return (fid, image), lprod

In [0]:
def make_dataset(df, buffer=10000, batch=64):
    fids = tf.constant(df['id'])
    labels = tf.reshape(tf.cast(tf.constant(df['lprod']), tf.float32), (-1, 1))
    data = tf.data.Dataset.from_tensor_slices((fids, labels))
    data = data.map(parse_function)
    data = data.shuffle(buffer_size=buffer)
    data = data.batch(batch)
    data = data.repeat()
    return data

In [0]:
train, valid = make_dataset(df_train, 64), make_dataset(df_valid, 64)

In [0]:
model = models.gen_depth_low(K, C)
model.summary()

In [0]:
# train keras model
history = model.fit(train,
    epochs=10, steps_per_epoch=1000, validation_data=valid, validation_steps=20
)

In [0]:
x_test, y_test, yh_test = tools.predict_data(model, valid, 100)
fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(10, 5))
tools.eval_model(y_test, yh_test, ymin=2, ymax=6, axs=(ax0, ax1))

In [0]:
x_test, y_test, yh_test = tools.predict_data(model, train, 100)
fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(10, 5))
tools.eval_model(y_test, yh_test, ymin=2, ymax=6, axs=(ax0, ax1))