### Tail prediction model pipeline

#### Current trained model can be found here:
/shared/Fanbo/Knowledge/pykeen/FB15_100_epoch_base

In [None]:
from pykeen.pipeline import pipeline
pipeline_result = pipeline(
    model='TuckER',
    dataset='FB15k-237',
    result_tracker='tensorboard',
    training_loop='LCWA',
    stopper='early',
    stopper_kwargs=dict(frequency=10, patience=5, relative_delta=0.002),
    epochs=100,
    dataset_kwargs=dict(
        create_inverse_triples=True,
    ))
pipeline_result.save_to_directory('FB15_100_epoch_base')

#### loading and evaluate trained model results can be check on tensorboard, if trained(takes time) otherwise directly evaluate

In [None]:
import torch
my_pykeen_model = torch.load('/shared/Fanbo/Knowledge/pykeen/FB15_100_epoch_base/trained_model.pkl')

In [None]:
from pykeen.evaluation import RankBasedEvaluator
from pykeen.datasets.freebase import FB15k237
dataset = FB15k237()
evaluator = RankBasedEvaluator()

# Get triples to test
mapped_triples = dataset.testing.mapped_triples

# Evaluate
results = evaluator.evaluate(
    model=model,
    mapped_triples=mapped_triples,
    batch_size=1024,
    additional_filter_triples=[
        dataset.training.mapped_triples,
        dataset.validation.mapped_triples,
    ],
)

## Relation prediction

In [None]:
import pandas as pd
from pykeen.models import predict

#### evaluation function created for tail and relation respectively

In [20]:

def top_k_v2_tail(df,top_k,model,training_data):
    count=0
    total=len(df)
    top_k=top_k
    for index,row in df.iterrows():
        try:
            tmp=predict.get_tail_prediction_df(model, row[0], row[1], triples_factory=training_data).iloc[:top_k,3].sum()
           # print(count)
            if tmp>=1:
                count+=1
                #print(count)
        except:
            total-=1
            print("execpt")
    print(count)
    print(count/total)

def top_k_v2(df,top_k,model,training_data):
    count=0
    total=len(df)
    top_k=top_k
    for index,row in df.iterrows():
        try:
            tmp=predict.get_relation_prediction_df(model, row[0], row[2], triples_factory=training_data).iloc[:top_k,3].sum()
           # print(count)
            if tmp>=1:
                count+=1
                #print(count)
        except:
            total-=1
            print("execpt")
            #print(row[0], row[2])
    print(count)
    print(count/total)

### Relation prediction nations test ground FB15k-237

In [None]:
# Get a training dataset
from pykeen.datasets import get_dataset
dataset = get_dataset(dataset="FB15k-237")
training_triples_factory = dataset.training

# Pick a model
from pykeen.models import TuckER
model = TuckER(triples_factory=training_triples_factory,loss='MarginRankingLoss')
model.to('cuda')


# Pick an optimizer from Torch
from torch.optim import Adam
optimizer = Adam(params=model.get_grad_params())

# Pick a training approach (sLCWA or LCWA)
from pykeen.training import SLCWATrainingLoop
training_loop = SLCWATrainingLoop(
    model=model,
    triples_factory=training_triples_factory,
    optimizer=optimizer,
    negative_sampler='basic',
    negative_sampler_kwargs=dict(
        corruption_scheme=['relation']))
        #filtered=True,corruption_scheme=["head",'relation','tail']))

# Train like Cristiano Ronaldo
_ = training_loop.train(
    triples_factory=training_triples_factory,
    num_epochs=20,
    batch_size=256,
)

#### Model is torch model, current trained model can be loaded:

In [None]:
model = torch.load('/shared/Fanbo/Knowledge/pykeen/FB15k_237_relation_model/FB15k_237_relation_model.pkl')

In [5]:
import pandas as pd
validation_Fb=pd.read_csv('/shared/Fanbo/Knowledge/pykeen/FB15K_pykeen_data/valid.txt',sep='\t',header=None)
train_Fb=pd.read_csv('/shared/Fanbo/Knowledge/pykeen/FB15K_pykeen_data/train.txt',sep='\t',header=None)
test_Fb=pd.read_csv('/shared/Fanbo/Knowledge/pykeen/FB15K_pykeen_data/test.txt',sep='\t',header=None)

#### Relation predictoin training data score

In [23]:
top_k_v2(train_Fb.iloc[:500,:],3,model,training_triples_factory)

491
0.982


#### validation score

In [68]:
top_k_v2(validation_Fb.iloc[:500,:],2,model,dataset.validation)

execpt
454
0.9098196392785571


#### if directly predict

In [56]:
test=predict.get_relation_prediction_df(model,"/m/06wxw","/m/02fqwt", triples_factory=dataset.training, testing=dataset.validation.mapped_triples)
test

Unnamed: 0,relation_id,relation_label,score,in_training,in_testing
131,131,/location/location/time_zones,8.358224,False,True
116,116,/influence/influence_node/peers./influence/pee...,3.198422,False,False
53,53,/education/educational_degree/people_with_this...,3.175734,False,False
23,23,/base/biblioness/bibs_location/state,2.387324,False,False
22,22,/base/biblioness/bibs_location/country,2.349178,False,False
...,...,...,...,...,...
69,69,/film/actor/film./film/performance/special_per...,-4.804832,False,False
169,169,/organization/non_profit_organization/register...,-4.951991,False,False
34,34,/base/popstra/location/vacationers./base/popst...,-5.496681,False,False
104,104,/film/special_film_performance_type/film_perfo...,-7.353208,False,False


### Pegasus for conditional generation:
labels (torch.LongTensor of shape (batch_size, sequence_length), optional) — Labels for computing the masked language modeling loss. Indices should either be in [0, ..., config.vocab_size] or -100 (see input_ids docstring). Tokens with indices set to -100 are ignored (masked), the loss is only computed for the tokens with labels in [0, ..., config.vocab_size].