# Instalação de dependências

# Bibliotecas

In [1]:
import pandas as pd
import numpy as np
import csv
import multiprocessing
from gensim.models import Word2Vec
from gensim.models import KeyedVectors
from time import time  # To time our operations

#import logging  # Setting up the loggings to monitor gensim
#logging.basicConfig(format="%(levelname)s - %(asctime)s: %(message)s", datefmt= '%H:%M:%S', level=logging.INFO)

## Treinamento em Pipeline

Construindo vocabulário

In [3]:
# Configurando variáveis de controle e inspeção

cores = multiprocessing.cpu_count()   #Quantidade de cores utilizados no treinamento

t = time()  #Tempo de realização do processo

osm_tables = ['pois_polygons_information', 'pois_roads_information', 'pois_lines_information', 'pois_points_information']
# osm_tables = ['pois_polygons_information']
#osm_tables = ['bins_roads_information', 'bins_lines_information']
#osm_tables = ['bins_polygons_information']

for osm_table in osm_tables:
    n = 400
    w = 0.7
    
    
    
    print('Training model (tuple of weight):', w)

    #ARQUIVO DE DADOS
    file_name = './train_files/new-york-sl-tuple-geoc2vec-μ90-' + osm_table+ '-pfp-c.csv'

    print("loading file:", file_name)
    tuples = pd.read_csv(file_name)

    #Removendo linhas danificadas
    tuples = tuples.dropna()
    tuples = tuples[['center_poi', 'context_osm']]


    #Adaptando para sentenças do word2vec
    sentencesTuples = tuples.values.tolist()

    #Criando estrutura do skip-gram
    p2v_modeltp = Word2Vec(min_count=1,
                            window=1,
                            sg=1, #Skip-gram
                            size=35, #TAMANHO DO VETOR
                            sample=6e-5, 
                            alpha=0.03, 
                            min_alpha=0.0007, 
                            negative=20,
                            workers=cores-1)


    #Criando vocubulário
    p2v_modeltp.build_vocab(sentencesTuples, progress_per=10000)


    #Treinando o modelo
    p2v_modeltp.train(sentencesTuples, total_examples=p2v_modeltp.corpus_count, epochs=1, report_delay=1)

    #Salvando em arquivo
    model_name = './model/new-york-sl-tuple-geoc2vec-μ90' + osm_table+ 'pfp-c.model'
    
    print('saving file:', model_name)
    p2v_modeltp.save(model_name)


        #except Exception as e:
            #print(str(e))
print('Time to train the model: {} mins'.format(round((time() - t) / 60, 2)))        
print('Process finish.')

Training model (tuple of weight): 0.7
loading file: ./train_files/new-york-sl-tuple-geoc2vec-μ90-pois_polygons_information-pfp-c.csv
saving file: ./model/new-york-sl-tuple-geoc2vec-μ90pois_polygons_informationpfp-c.model
Training model (tuple of weight): 0.7
loading file: ./train_files/new-york-sl-tuple-geoc2vec-μ90-pois_roads_information-pfp-c.csv
saving file: ./model/new-york-sl-tuple-geoc2vec-μ90pois_roads_informationpfp-c.model
Training model (tuple of weight): 0.7
loading file: ./train_files/new-york-sl-tuple-geoc2vec-μ90-pois_lines_information-pfp-c.csv
saving file: ./model/new-york-sl-tuple-geoc2vec-μ90pois_lines_informationpfp-c.model
Training model (tuple of weight): 0.7
loading file: ./train_files/new-york-sl-tuple-geoc2vec-μ90-pois_points_information-pfp-c.csv
saving file: ./model/new-york-sl-tuple-geoc2vec-μ90pois_points_informationpfp-c.model
Time to train the model: 0.06 mins
Process finish.


In [3]:
model_lines = Word2Vec.load('./model/new-york-sl-tuple-geoc2vec-μ90pois_lines_informationpfp-c.model')
model_points = Word2Vec.load('./model/new-york-sl-tuple-geoc2vec-μ90pois_points_informationpfp-c.model')
model_roads = Word2Vec.load('./model/new-york-sl-tuple-geoc2vec-μ90pois_roads_informationpfp-c.model')
model_polygons = Word2Vec.load('./model/new-york-sl-tuple-geoc2vec-μ90pois_polygons_informationpfp-c.model')

In [4]:
model_lines.wv.most_similar(positive=['Bar'])

[('lines_barrier_hedge', 0.9970628023147583),
 ('lines_highway_service', 0.996224045753479),
 ('Sandwich Shop', 0.9961956739425659),
 ('lines_highway_steps', 0.9961392283439636),
 ('Coffee Shop', 0.9958198070526123),
 ('lines_route_road', 0.9957050085067749),
 ('lines_bicycle_no', 0.9956915974617004),
 ('lines_covered_no', 0.9956207871437073),
 ('lines_waterway_stream', 0.9955495595932007),
 ('lines_highway_residential', 0.9955477714538574)]

In [5]:
model_points.wv.most_similar(positive=['Shop'])

KeyError: "word 'Shop' not in vocabulary"

In [12]:
model_roads.wv.most_similar(positive=['Gift Shop'])

KeyError: "word 'Gift Shop' not in vocabulary"

In [13]:
model_polygons.wv.most_similar(positive=['Gift Shop'])

[('polygons_amenity_townhall', 0.49467912316322327),
 ('polygons_sport_ice_skating;ice_hockey', 0.4497722089290619),
 ('polygons_amenity_arts_centre', 0.4353005886077881),
 ('Rental Car Location', 0.4101969599723816),
 ('polygons_sport_roller_hockey', 0.37085920572280884),
 ('Middle Eastern Restaurant', 0.36881059408187866),
 ('polygons_surface_fine_gravel', 0.35554057359695435),
 ('polygons_natural_reef', 0.3481622040271759),
 ('polygons_sport_squash;tennis', 0.33777955174446106),
 ('polygons_building_supermarket', 0.3320648670196533)]

In [4]:
p2v_modeltp.wv.most_similar(positive=['Park'])

[('Bar', 0.8342798352241516),
 ('points_amenity_bicycle_parking', 0.8123246431350708),
 ('points_covered_no', 0.8061373829841614),
 ('points_barrier_bollard', 0.8020102977752686),
 ('points_highway_crossing', 0.756341278553009),
 ('Coffee Shop', 0.7543727159500122),
 ('points_amenity_charging_station', 0.7541670203208923),
 ('points_barrier_kerb', 0.7423862218856812),
 ('American Restaurant', 0.7271029949188232),
 ('Bowling Alley', 0.7263635396957397)]

In [5]:
p2v_modeltp.wv.most_similar(positive=['Ice Cream Shop'])

[('points_barrier_bollard', 0.7203967571258545),
 ('American Restaurant', 0.7024220824241638),
 ('Residential Building (Apartment / Condo)', 0.6553056836128235),
 ('points_highway_crossing', 0.6469929814338684),
 ('Airport', 0.6395797729492188),
 ('points_covered_no', 0.6297082901000977),
 ('Korean Restaurant', 0.6239877343177795),
 ('points_amenity_bench', 0.6144056916236877),
 ('Home (private)', 0.6076717376708984),
 ('Bank', 0.6037388443946838)]

In [6]:
p2v_modeltp.wv.most_similar(negative=['Ice Cream Shop'])

[('points_junction_yes', 0.461407870054245),
 ('Bridal Shop', 0.44068989157676697),
 ('points_sport_horse_racing', 0.37038713693618774),
 ('Pool Hall', 0.3459329903125763),
 ('points_sport_table_tennis;billiards', 0.31778281927108765),
 ('points_sport_gymnastics', 0.30393359065055847),
 ('points_railway_signal_box', 0.2953101098537445),
 ('points_sport_hapkido;taekwondo;jujitsu', 0.2921789586544037),
 ('points_building_house', 0.2883090674877167),
 ('points_bicycle_no', 0.2809799611568451)]

In [10]:
p2v_modeltp.wv.most_similar(negative=['Subway'])

[('points_sport_gymnastics', 0.41582000255584717),
 ('points_building_hut', 0.3834674656391144),
 ('points_sport_weightlifting;exercise', 0.34964239597320557),
 ('points_foot_customers', 0.34006866812705994),
 ('points_historic_maritime', 0.33627849817276),
 ('points_barrier_height_restrictor', 0.3200080692768097),
 ('points_sport_horse_racing', 0.3165079355239868),
 ('Cemetery', 0.30581700801849365),
 ('points_natural_tree_stump', 0.30005496740341187),
 ('points_amenity_casino', 0.29914891719818115)]

In [7]:
model_name = './geographic/GEOC2VEC/new-york-sl-tuple-geoc2vec5bin-wgt0.5pfp-concat-c.model'
p2v_modeltp = Word2Vec.load(model_name)

FileNotFoundError: [Errno 2] No such file or directory: './geographic/GEOC2VEC/new-york-sl-tuple-geoc2vec5bin-wgt0.5pfp-concat-c.model'

In [6]:
!python train.py --data-train dataset/NYC/NYC_train.csv --data-val dataset/NYC/NYC_val.csv --data-adj-mtx dataset/NYC/graph_A.csv --data-node-feats dataset/NYC/graph_X.csv --time-units 48 --time-feature norm_in_day_time --poi-embed-dim 128 --user-embed-dim 128 --time-embed-dim 32 --cat-embed-dim 280 --node-attn-nhid 128 --transformer-nhid 1024 --transformer-nlayers 2 --transformer-nhead 2 --batch 20 --use-embeddings True --epochs 1 --name expcustom

python: can't open file '/home/diogo/poi-recommendation-TCC/Code/db_parser/train.py': [Errno 2] No such file or directory
