In [1]:
%edit
%load_ext autoreload
%autoreload 2

IPython will make a temporary file named: /tmp/ipython_edit_4ccsgL/ipython_edit_2WDWKa.py


In [4]:
import matplotlib
matplotlib.use('Agg')

import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.cluster import AgglomerativeClustering

ImportError: No module named sklearn.manifold

In [5]:
'''
Developed by Gengchen Mai

gengchen.mai@gmail.com
05/08/2019
'''


from argparse import ArgumentParser

from spacegraph_codebase.utils import *
from spacegraph_codebase.Place2Vec.cur_data_utils import load_pointset
from spacegraph_codebase.data_utils import load_ng
from spacegraph_codebase.model import NeighGraphEncoderDecoder
from spacegraph_codebase.train_helper import run_train, run_eval, run_joint_train

from torch import optim
import numpy as np

In [6]:
parser = ArgumentParser()
# dir
parser.add_argument("--data_dir", type=str, default="./Place2Vec/")
parser.add_argument("--model_dir", type=str, default="./")
parser.add_argument("--log_dir", type=str, default="./")
parser.add_argument("--num_context_sample", type=int, default=10,
    help='The number of context points we can sample, maximum is 10')

# model
parser.add_argument("--embed_dim", type=int, default=64,
    help='Point feature embedding dim')
parser.add_argument("--dropout", type=float, default=0.5,
    help='The dropout rate used in all fully connected layer')

# encoder
parser.add_argument("--enc_agg", type=str, default="mean",
    help='the type of aggragation function for feature encoder')

# model type
parser.add_argument("--model_type", type=str, default="relative",
    help='''the type pf model we use, 
    relative: only relatve position; 
    global: only global position; 
    join: relative and global position
    together: use global position of center point in context prediction''')

# space encoder
parser.add_argument("--spa_enc", type=str, default="gridcell",
    help='the type of spatial relation encoder, gridcell/naive')
parser.add_argument("--spa_embed_dim", type=int, default=64,
    help='Point Spatial relation embedding dim')
parser.add_argument("--freq", type=int, default=16,
    help='The number of frequency used in the space encoder')
parser.add_argument("--max_radius", type=float, default=10e4,
    help='The maximum spatial context radius in the space encoder')
parser.add_argument("--spa_f_act", type=str, default='sigmoid',
    help='The final activation function used by spatial relation encoder')
parser.add_argument("--freq_init", type=str, default='geometric',
    help='The frequency list initialization method')

# global space/position encoder
parser.add_argument("--g_spa_enc", type=str, default="gridcell",
    help='the type of spatial relation encoder, gridcell/naive')
parser.add_argument("--g_spa_embed_dim", type=int, default=64,
    help='Point Spatial relation embedding dim')
parser.add_argument("--g_freq", type=int, default=16,
    help='The number of frequency used in the space encoder')
parser.add_argument("--g_max_radius", type=float, default=10e4,
    help='The maximum spatial context radius in the space encoder')
parser.add_argument("--g_spa_f_act", type=str, default='sigmoid',
    help='The final activation function used by spatial relation encoder')
parser.add_argument("--g_freq_init", type=str, default='geometric',
    help='The frequency list initialization method')


parser.add_argument("--use_dec", type=str, default='T',
    help='whether to use another decoder following the initial decoder')


# initial decoder, without query embedding
parser.add_argument("--init_decoder_atten_type", type=str, default='concat',
    help='''the type of the intersection operator attention in initial decoder
    concat: the relative model
    g_pos_concat: the together model''')
parser.add_argument("--init_decoder_atten_act", type=str, default='leakyrelu',
    help='the activation function of the intersection operator attention, see GAT paper Equ 3 in initial decoder')
parser.add_argument("--init_decoder_atten_f_act", type=str, default='sigmoid',
    help='the final activation function of the intersection operator attention, see GAT paper Equ 6 in initial decoder')
parser.add_argument("--init_decoder_atten_num", type=int, default=1,
    help='the number of the intersection operator attention in initial decoder')
parser.add_argument("--init_decoder_use_layn", type=str, default='T',
    help='whether to use layer normalzation in initial decoder')
parser.add_argument("--init_decoder_use_postmat", type=str, default='T',
    help='whether to use post matrix in initial decoder')


# decoder 
parser.add_argument("--decoder_atten_type", type=str, default='concat',
    help='''the type of the intersection operator attention
    concat: the relative model
    g_pos_concat: the together model''')
parser.add_argument("--decoder_atten_act", type=str, default='leakyrelu',
    help='the activation function of the intersection operator attention, see GAT paper Equ 3')
parser.add_argument("--decoder_atten_f_act", type=str, default='sigmoid',
    help='the final activation function of the intersection operator attention, see GAT paper Equ 6')
parser.add_argument("--decoder_atten_num", type=int, default=0,
    help='the number of the intersection operator attention')
parser.add_argument("--decoder_use_layn", type=str, default='T',
    help='whether to use layer normalzation')
parser.add_argument("--decoder_use_postmat", type=str, default='T',
    help='whether to use post matrix')

# encoder decoder
parser.add_argument("--join_dec_type", type=str, default='max',
    help='the type of join_dec, min/max/mean/cat')
parser.add_argument("--act", type=str, default='sigmoid',
    help='the activation function for the encoder decoder')

# train
parser.add_argument("--opt", type=str, default="adam")
parser.add_argument("--lr", type=float, default=0.01,
    help='learning rate')
parser.add_argument("--max_iter", type=int, default=50000000,
    help='the maximum iterator for model converge')
parser.add_argument("--max_burn_in", type=int, default=5000,
    help='the maximum iterator for relative/global model converge')
parser.add_argument("--batch_size", type=int, default=512)
parser.add_argument("--tol", type=float, default=0.000001)


# eval
parser.add_argument("--log_every", type=int, default=50)
parser.add_argument("--val_every", type=int, default=5000)


# load old model
parser.add_argument("--load_model", action='store_true')

# cuda
parser.add_argument("--cuda", action='store_true')

_StoreTrueAction(option_strings=['--cuda'], dest='cuda', nargs=0, const=True, default=False, type=None, choices=None, help=None, metavar=None)

In [8]:
args_list = """--data_dir ../data_collection/Place2Vec/ 
  --model_dir ./model_dir/Place2Vec/ 
  --log_dir ./model_dir/Place2Vec/ 
  --num_context_sample 10 
  --embed_dim 64 
  --dropout 0.5 
  --enc_agg mean 
  --model_type global 
  --spa_enc theory 
  --spa_embed_dim 64 
  --freq 16 
  --max_radius 10000 
  --spa_f_act sigmoid 
  --freq_init geometric 
  --g_spa_enc theory 
  --g_spa_embed_dim 64 
  --g_freq 16 
  --g_max_radius 10e5 
  --g_spa_f_act sigmoid 
  --g_freq_init geometric 
  --use_dec T 
  --init_decoder_atten_type concat 
  --init_decoder_atten_act leakyrelu 
  --init_decoder_atten_f_act sigmoid 
  --init_decoder_atten_num 1 
  --init_decoder_use_layn T 
  --init_decoder_use_postmat T 
  --decoder_atten_type concat 
  --decoder_atten_act leakyrelu 
  --decoder_atten_f_act sigmoid 
  --decoder_atten_num 1 
  --decoder_use_layn T 
  --decoder_use_postmat T 
  --join_dec_type max 
  --act sigmoid 
  --opt adam 
  --lr 0.001 
  --max_iter 2000 
  --batch_size 512 
  --log_every 50 
  --val_every 50 """

args = parser.parse_args(args_list.split())

In [10]:
def make_args_combine(args):
    args_combine = "/{data:s}-{num_context_sample:d}-{embed_dim:d}-{dropout:.1f}-{enc_agg:s}-{model_type:s}-{spa_enc:s}-{spa_embed_dim:d}-{freq:d}-{max_radius:.1f}-{spa_f_act:s}-{freq_init:s}-{g_spa_enc:s}-{g_spa_embed_dim:d}-{g_freq:d}-{g_max_radius:.1f}-{g_spa_f_act:s}-{g_freq_init:s}-{use_dec:s}-{init_decoder_atten_type:s}-{init_decoder_atten_act:s}-{init_decoder_atten_f_act:s}-{init_decoder_atten_num:d}-{init_decoder_use_layn:s}-{init_decoder_use_postmat:s}-{decoder_atten_type:s}-{decoder_atten_act:s}-{decoder_atten_f_act:s}-{decoder_atten_num:d}-{decoder_use_layn:s}-{decoder_use_postmat:s}-{join_dec_type:s}-{act:s}-{opt:s}-{lr:.6f}-{batch_size:d}".format(
        data=args.data_dir.strip().split("/")[-2],
        num_context_sample=args.num_context_sample,
        embed_dim=args.embed_dim,
        dropout=args.dropout,
        enc_agg=args.enc_agg,

        model_type=args.model_type,

        spa_enc=args.spa_enc,
        spa_embed_dim=args.spa_embed_dim,
        freq=args.freq,
        max_radius=args.max_radius,
        spa_f_act=args.spa_f_act,
        freq_init=args.freq_init,

        g_spa_enc=args.g_spa_enc,
        g_spa_embed_dim=args.g_spa_embed_dim,
        g_freq=args.g_freq,
        g_max_radius=args.g_max_radius,
        g_spa_f_act=args.g_spa_f_act,
        g_freq_init=args.g_freq_init,

        use_dec=args.use_dec,

        init_decoder_atten_type=args.init_decoder_atten_type,
        init_decoder_atten_act=args.init_decoder_atten_act,
        init_decoder_atten_f_act=args.init_decoder_atten_f_act,
        init_decoder_atten_num=args.init_decoder_atten_num,
        init_decoder_use_layn=args.init_decoder_use_layn,
        init_decoder_use_postmat=args.init_decoder_use_postmat,

        decoder_atten_type=args.decoder_atten_type,
        decoder_atten_act=args.decoder_atten_act,
        decoder_atten_f_act=args.decoder_atten_f_act,
        decoder_atten_num=args.decoder_atten_num,
        decoder_use_layn=args.decoder_use_layn,
        decoder_use_postmat=args.decoder_use_postmat,

        join_dec_type = args.join_dec_type,
        act = args.act,
        opt=args.opt,
        lr=args.lr,
        batch_size=args.batch_size
        )
    return args_combine

In [11]:
print("Loading NeighGraph data..")

print("Loading training NeighGraph data..")
train_ng_list = load_ng(args.data_dir + "/neighborgraphs_training.pkl")
print("Loading validation NeighGraph  data..")
val_ng_list = load_ng(args.data_dir + "/neighborgraphs_validation.pkl")
print("Loading testing NeighGraph data..")
test_ng_list = load_ng(args.data_dir + "/neighborgraphs_test.pkl")

Loading NeighGraph data..
Loading training NeighGraph data..
Loading validation NeighGraph  data..
Loading testing NeighGraph data..


In [19]:
def make_enc_dec(args):
    args_combine = make_args_combine(args)
    
    log_file = args.log_dir + args_combine + ".log"
    model_file = args.model_dir + args_combine + ".pth"

    logger = setup_logging(log_file, filemode='a')
    
    print("Loading PointSet data..")

    pointset, feature_embedding = load_pointset(data_dir=args.data_dir, 
                                                    embed_dim=args.embed_dim,
                                                    do_feature_sampling = False)
    if args.cuda:
        pointset.feature_embed_lookup = cudify(feature_embedding)
        
    # make feature encoder
    enc = get_encoder(pointset.feature_embed_lookup, feature_embedding, pointset, args.enc_agg)

    if args.model_type == "relative" or args.model_type == "join" or args.model_type == "together":
        # make relative space encoder
        spa_enc = get_spa_encoder(spa_enc_type=args.spa_enc, 
                            spa_embed_dim=args.spa_embed_dim, 
                            coord_dim = 2, 
                            frequency_num = args.freq, 
                            max_radius = args.max_radius,
                            dropout = args.dropout,
                            freq_init = args.freq_init)
    else:
        spa_enc = None

    if args.model_type == "global" or args.model_type == "join" or args.model_type == "together":
        # make global space encoder
        g_spa_enc = get_spa_encoder(spa_enc_type=args.g_spa_enc, 
                            spa_embed_dim=args.g_spa_embed_dim, 
                            coord_dim = 2, 
                            frequency_num = args.g_freq, 
                            max_radius = args.g_max_radius,
                            dropout = args.dropout,
                            freq_init = args.g_freq_init)
    else:
        g_spa_enc = None

    # make decoder
    if args.model_type == "relative" or args.model_type == "join" or args.model_type == "together":

        # make query embedding initial decoder
        init_dec = get_context_decoder(dec_type=args.init_decoder_atten_type, 
                            query_dim=args.embed_dim, 
                            key_dim=args.embed_dim, 
                            spa_embed_dim=args.spa_embed_dim, 
                            g_spa_embed_dim=args.g_spa_embed_dim,
                            have_query_embed = False, 
                            num_attn = args.init_decoder_atten_num, 
                            activation = args.init_decoder_atten_act, 
                            f_activation = args.init_decoder_atten_f_act, 
                            layn = args.init_decoder_use_layn, 
                            use_postmat = args.init_decoder_use_postmat,
                            dropout = args.dropout)

        if args.use_dec == "T":
            # make decoder
            dec = get_context_decoder(dec_type=args.decoder_atten_type, 
                                query_dim=args.embed_dim, 
                                key_dim=args.embed_dim, 
                                spa_embed_dim=args.spa_embed_dim, 
                                g_spa_embed_dim=args.g_spa_embed_dim,
                                have_query_embed = True, 
                                num_attn = args.decoder_atten_num, 
                                activation = args.decoder_atten_act, 
                                f_activation = args.decoder_atten_f_act, 
                                layn = args.decoder_use_layn, 
                                use_postmat = args.decoder_use_postmat,
                                dropout = args.dropout)
        else:
            dec = None

        if args.model_type == "join":
            joint_dec = JointRelativeGlobalDecoder(feature_embed_dim = args.embed_dim, 
                            f_act = args.act, 
                            dropout = args.dropout,
                            join_type = args.join_dec_type)
        else:
            joint_dec = None

    else:
        init_dec = None
        dec = None
        joint_dec = None

    if args.model_type == "global" or args.model_type == "join":
        # make global space decoder
        g_spa_dec = DirectPositionEmbeddingDecoder(g_spa_embed_dim=args.g_spa_embed_dim, 
                            feature_embed_dim=args.embed_dim, 
                            f_act = args.act, 
                            dropout = args.dropout)
    else:
        g_spa_dec = None




    # if args.model_type == "global" or args.model_type == "relative":
    # make encoder encoder
    enc_dec = get_enc_dec(model_type=args.model_type, 
                        pointset=pointset, 
                        enc = enc, 
                        spa_enc = spa_enc, 
                        g_spa_enc = g_spa_enc, 
                        g_spa_dec = g_spa_dec, 
                        init_dec=init_dec, 
                        dec=dec, 
                        joint_dec=joint_dec, 
                        activation = args.act, 
                        num_context_sample = args.num_context_sample, 
                        num_neg_resample = 10)

    if args.cuda:
        enc_dec.cuda()

    if args.opt == "sgd":
        optimizer = optim.SGD(filter(lambda p : p.requires_grad, enc_dec.parameters()), lr=args.lr, momentum=0)
    elif args.opt == "adam":
        optimizer = optim.Adam(filter(lambda p : p.requires_grad, enc_dec.parameters()), lr=args.lr)

#     logger.info("Save file at {}".format(args_combine + ".pth"))
    print("Load model from {}".format(args_combine + ".pth"))
    enc_dec.load_state_dict(torch.load(model_file))
    return enc_dec

In [14]:
coords = []
interval = 1000
# latitude
for y in range(1600000, 1650000+interval, interval):
    coord = []
#     longitude
    for x in range(-1713000, -1670000+interval, interval):
        coord.append([x,y])
    coords.append(coord)

extent = (-1713000, -1670000, 1600000, 1650000)


In [15]:
rel_coords = []
for y in range(0, 10010, 10):
    coord = []
    for x in range(0, 10010, 10):
        coord.append([x,y])
    rel_coords.append(coord)

In [18]:
g_enc_dec = make_enc_dec(args)

Loading PointSet data..


2019-05-20 07:16:13,939 - INFO - Save file at /Place2Vec-10-64-0.5-mean-global-theory-64-16-10000.0-sigmoid-geometric-theory-64-16-1000000.0-sigmoid-geometric-T-concat-leakyrelu-sigmoid-1-T-T-concat-leakyrelu-sigmoid-1-T-T-max-sigmoid-adam-0.001000-512.pth


In [20]:
x = []
y = []
types = []
cnt = 0
select_cnt = 0
filter_out_ids = [1575, 2342, 6883, 10577, 16755, 17172, 21961, 24320]
for pt_id in g_enc_dec.pointset.pt_dict:
    cnt += 1
#     if pt_id not in filter_out_ids:
    point = g_enc_dec.pointset.pt_dict[pt_id]
    pt_coord = point.coord
    if pt_coord[0] > -1713000 and pt_coord[0] < -1670000 and \
    pt_coord[1] > 1600000 and pt_coord[1] < 1650000:
        select_cnt += 1
        x.append(pt_coord[0])
        y.append(pt_coord[1])
        types.append(point.features[0])
print(cnt)
print(select_cnt)

27924
27924


In [23]:
#%matplotlib inline
plt.scatter(x, y, s=1, c=types,alpha=0.5)
plt.show()

AttributeError: 'module' object has no attribute 'to_rgba'