In [None]:
cd ..

In [None]:
import tensorflow as tf
import matplotlib
import numpy as np
import pandas as pd
import json
import os
import itertools
import functools
import data.helpers as helpers
import seaborn as sns
import matplotlib.pyplot as plt
from data.tile_creater import discretiser, grid_translation, get_pixel_lens

In [None]:
cmap = sns.cubehelix_palette(light=1, as_cmap=True)
def plot_slice(tslice, label):
    fig, ax = plt.subplots(1)
    fig.set_size_inches(8,6)
    ax = sns.heatmap(tslice, ax=ax, vmax=1000)
    #ax.set_aspect('equal')
    ax.scatter(label[0], label[1], marker='o', color = 'r', s=50)
    sns.despine()
    fig.tight_layout()
    #fig.savefig('inspecttile', dpi=600)

In [None]:
def get_fnames():
    prefix = 'data/tiles_small/'
    fnames = [prefix+i for i in os.listdir(prefix)
             if i.split('.')[-1] == 'txt']
    return fnames
def parse_filename(fname):
    no_prefix = fname.strip('data/tiles_small')
    eventid, seq_id = no_prefix.split('_')
    return eventid, int(seq_id.strip('.txt'))
def get_label(metadata, filename):
    eventid, seqid = parse_filename(filename)
    label = metadata[eventid]
    return label
def _format_label(eventlat,eventlon, eventdepth,
                  eventmag,eventid,topleft,
                  size_x,size_y,numx,numy, nonzeros):
    """
    Args: label is like (53.223, 6.949, 3.0, 0.502994544, 'knmi2018etrn',...)
    (oneevent['eventlat'],oneevent['eventlon']
            ,oneevent['eventdepth'], oneevent['magnitude']
            ,oneevent['eventid'], topleft, sizex, sizey, numx,numy)
    Returns: onehot tile encoding - numpy array
    """
    xy = grid_translation(topleft,eventlat,eventlon)
    plen_x, plen_y = get_pixel_lens(numx, numy, size_x, size_y)
    n,m = discretiser(plen_x,plen_y,xy)
    event_label_matrix = np.zeros((int(numy),int(numx)))
    try:
        event_label_matrix[m,n]=1.0
        event_label_matrix_flatten = event_label_matrix.flatten()
        nonzeroidx = np.nonzero(event_label_matrix_flatten)[0][0]
        return (m,n)#nonzeroidx#event_label_matrix_flatten
    except IndexError:
        print('outside grid')
        #earthquake was outside our grid if index error
        return
def format_frames(metadata,frames):
    """frames is a sequence of (eventid,seqnum,array)"""
    frame_group = list(frames)[0:20]
    label = metadata[frames[0][0]]
    onehot = _format_label(*label)
    tensor_frames = np.dstack([np.loadtxt(frame[2]) 
                               for frame in frame_group])
    return tensor_frames, onehot
def consistent_grouper(n, seq):
    return (i for i in helpers.grouper(n, seq)
            if len(i)==n)

def gen(metadatafile,frames_per_eg, datafilenames):
    with open(metadatafile) as f:
        metadata = json.load(f)
    loaded_files = ((*parse_filename(fname),fname)
                    for fname in datafilenames) #np.loadtxt here in real execution
    sorted_by_event = sorted(loaded_files, key=lambda x:x[0])
    grouped_by_event = itertools.groupby(sorted_by_event, key = lambda x:x[0])
    inorder_by_event = (sorted(g, key = lambda x:x[1]) for k,g in grouped_by_event)
    window_groups_by_event = (helpers.grouper(frames_per_eg, group)
                              for group in inorder_by_event)
    formatted = (format_frames(metadata,framegroup) for framegroup
                 in itertools.chain(*window_groups_by_event))
    ingrid = ((tensor, label) for tensor, label
             in formatted if label)
    return ingrid
    #for idx, row in x.iterrows():
    #    data = np.asarray([row,
    #                       row])
    #    data = np.dstack((data,data, data))
    #    label = y[idx]
    #    yield data, label
def train_input_fn(metadatafile, train_filenames, batch_size=20, repeat=1):
    """An input function for training"""
    # Convert the inputs to a Dataset.
    #dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))#.map(namer)
    dataset = tf.data.Dataset.from_generator(lambda: gen(metadatafile,500, train_filenames)
                                             ,output_types=(tf.int64, tf.int64)
                                             ,output_shapes = (tf.TensorShape([30,60,20]),
                                                               tf.TensorShape([2]))
                                            )
    # Shuffle, repeat, and batch the examples.
    dataset = dataset.shuffle(100).repeat(repeat).batch(batch_size)
    # Return the dataset.
    iterator = dataset.make_one_shot_iterator()
    d,l = iterator.get_next()
    return {'image':d}, l
def test_input_fn(metadatafile, test_filenames, batch_size=1):
    """An input function for training"""
    # Convert the inputs to a Dataset.
    #dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))#.map(namer)
    print('testing: ', len(test_filenames))
    dataset = tf.data.Dataset.from_generator(lambda: gen(metadatafile,500, test_filenames)
                                             ,output_types=(tf.int64, tf.int64)
                                             ,output_shapes = (tf.TensorShape([30,60,20]),
                                                               tf.TensorShape([2]))
                                            )
    # Return the dataset.
    dataset=dataset.batch(1)
    iterator = dataset.make_one_shot_iterator()
    d,l = iterator.get_next()
    return {'image':d}, l

def split_traintest(fnames, train_proportion):
    eventgetter = lambda f: parse_filename(f)[0]
    orderedbyevent = sorted(fnames, key = eventgetter) 
    groupedbyevent = [list(g) for k,g in
                      itertools.groupby(orderedbyevent, key = eventgetter)]
    numevents = len(groupedbyevent)
    n_training = int(numevents*train_proportion)
    train, test = groupedbyevent[0:n_training], groupedbyevent[n_training::]
    return list(itertools.chain(*train)), list(itertools.chain(*test))
def specificevent(fnames,event):
    matching = [i for i in fnames if parse_filename(i)[0]==event]
    return matching
def justoneevent(fnames):
    s = sorted(fnames, key = lambda f: parse_filename(f)[0], reverse=True)
    grouped = itertools.groupby(s, key = lambda f: parse_filename(f)[0])
    justone = next((list(g) for k,g in grouped))
    return sorted(justone, key = lambda f: parse_filename(f)[1])

In [None]:
train, test = split_traintest(get_fnames(), 0.7)
justone = specificevent(get_fnames(), 'knmi2017gpqn')#justoneevent(train)
with tf.Session() as sess:
    n = train_input_fn('data/tiles_small/metadata.json',justone)
    i, label = sess.run(n)
    print(label[0])
    for idx in range(20):
        sliced = i['image'][0,:,:,idx]
        plot_slice(sliced, label[0])

In [None]:
def main(labelmeta, train_fnames, test_fnames): 
    my_feature_columns = [tf.feature_column.numeric_column('image',shape=10*30*60)]
    # Build 2 hidden layer DNN with 100, 100 units respectively.
    classifier = tf.estimator.DNNRegressor(
        feature_columns=my_feature_columns,
        # The model must choose between 4 classes
        label_dimension=2,
        # Two hidden layers of 100 nodes each.
        hidden_units=[10],
        model_dir = 'tmp/small_tiles_debug',
        config=tf.estimator.RunConfig().replace(save_summary_steps=1)
        )
    # Train the Model.
    classifier.train(input_fn=lambda:train_input_fn(labelmeta,train_fnames,repeat=1000))

    # Evaluate the model.
    eval_result = classifier.evaluate(
        input_fn=lambda:test_input_fn(labelmeta, test_fnames))
    print([k for k in eval_result])
    #print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
    predictions = classifier.predict(
        input_fn=lambda:test_input_fn(labelmeta, test_fnames))
    return classifier

In [None]:
train, test = split_traintest(get_fnames(), 0.7)
justone = specificevent(get_fnames(), 'knmi2017gpqn')
labelmeta = 'data/tiles_small/metadata.json'
c = main(labelmeta, justone, justone)

In [None]:
predictions = c.predict(
        input_fn=lambda:test_input_fn(labelmeta, justone))

In [None]:
pred = list(predictions)

In [None]:
testeg= [i[1] for i in gen(labelmeta,10, justone)]


In [None]:
predlabel = [i.get('predictions') for i in pred]
predlabel[0], testeg[0]

In [None]:
fig,ax = plt.subplots(1)
s,f =(0,2000)
ax.scatter([i[0] for i in predlabel[s:f]], [i[1] for i in predlabel[s:f]], color ='b')
for one, two in list(zip(predlabel, testeg))[s:f]:
    ax.plot([one[0],two[0]], [one[1],two[1]], linestyle ='--', alpha=0.1)
ax.scatter([i[0] for i in testeg[s:f]], [i[1] for i in testeg[s:f]], color ='r', s=50)
sns.despine()
ax.set_xlim([0,60])
ax.set_ylim([0,30])