In [1]:
import numpy as np
import os
import torch

In [2]:
data_path = '../data/wn18rr'
def read_triple(file_path, entity2id, relation2id):
    '''
    Read triples and map them into ids.
    '''
    triples = []
    with open(file_path) as fin:
        for line in fin:
            h, r, t = line.strip().split('\t')
            triples.append((entity2id[h], relation2id[r], entity2id[t]))
    return triples

with open(os.path.join(data_path, 'entities.dict')) as fin:
    entity2id = dict()
    for line in fin:
        eid, entity = line.strip().split('\t')
        entity2id[entity] = int(eid)

with open(os.path.join(data_path, 'relations.dict')) as fin:
    relation2id = dict()
    for line in fin:
        rid, relation = line.strip().split('\t')
        relation2id[relation] = int(rid)
            
train_triples = read_triple(os.path.join(data_path, 'train.txt'), entity2id, relation2id)
valid_triples = read_triple(os.path.join(data_path, 'valid.txt'), entity2id, relation2id)
test_triples = read_triple(os.path.join(data_path, 'test.txt'), entity2id, relation2id)

# triples = train_triples + valid_triples + test_triples
triples = test_triples
triples = torch.LongTensor(triples)


# Categorize relations

In [5]:
num_relations = len(relation2id)

one_many, one_one, many_one, many_many = 0., 0., 0., 0.
one_many_num, one_one_num, many_one_num, many_many_num = 0., 0., 0., 0.

many_thresh = 1.5

relation_dict = {}

for i in range(num_relations):
    relation_mask = (triples[:, 1] == i)
    if torch.sum(relation_mask) == 0:
        relation_dict[list(relation2id.keys())[i]] = 'None'
        continue
    head = triples[relation_mask, 0].data.tolist()
    tail = triples[relation_mask, 2].data.tolist()
    head = set(head)
    tail = set(tail)
    
    pairs = triples[relation_mask, :]
    pairs_tail = pairs[:, 2].unsqueeze(1).expand(-1, len(tail))
    tensor_tail = torch.Tensor(list(tail)).view(1, len(tail))
    n_heads = (tensor_tail == pairs_tail).sum(dim=0)
    avg_head = torch.mean(n_heads.float())
    
    pairs_head = pairs[:, 0].unsqueeze(1).expand(-1, len(head))
    tensor_head = torch.Tensor(list(head)).view(1, len(head))
    n_tails = (tensor_head == pairs_head).sum(dim=0)
    avg_tail = torch.mean(n_tails.float())
    
    n = torch.sum(relation_mask).item()
    if avg_head > many_thresh:
        if avg_tail > many_thresh:
            cat = 'M-M'
            many_many += 1
            many_many_num += n
        else:
            cat = 'M-1'
            many_one += 1
            many_one_num += n
    else:
        if avg_tail > many_thresh:
            cat = '1-M'
            one_many += 1
            one_many_num += n
        else:
            cat = '1-1'
            one_one += 1
            one_one_num += n
    
    relation_dict[list(relation2id.keys())[i]] = cat
    print(i, list(relation2id.keys())[i], cat, n, avg_head.item(), avg_tail.item())
    
        
        
    
    

1 /people/person/employment_history./business/employment_tenure/title M-1 9 2.25 1.0
3 /base/localfood/food_producing_region/seasonal_availability./base/localfood/produce_availability/produce 1-1 2 1.0 1.0
5 /education/educational_institution/students_graduates./education/education/student 1-M 312 1.068493127822876 1.6956521272659302
7 /people/person/spouse_s./people/marriage/type_of_union M-1 347 173.5 1.0146198272705078
10 /location/symbol_of_administrative_division/official_symbol_of./location/location_symbol_relationship/administrative_division 1-1 2 1.0 1.0
12 /user/szaijan/fantasy_football/player/nfl_team 1-1 1 1.0 1.0
13 /astronomy/celestial_object/category 1-1 2 1.0 1.0
14 /basketball/basketball_player/player_statistics./basketball/basketball_player_stats/team 1-1 3 1.0 1.0
15 /sports/sports_team/roster./ice_hockey/hockey_roster_position/position M-1 22 5.5 1.2222222089767456
17 /computer/operating_system_developer/operating_systems_developed 1-M 2 1.0 2.0
18 /music/performance

127 /business/board_member/organization_board_memberships./organization/organization_board_membership/organization 1-1 6 1.0 1.0
128 /film/film/starring./film/performance/actor 1-1 1123 1.401997447013855 1.4072681665420532
129 /film/film_character/portrayed_in_films./film/performance/actor 1-1 10 1.0 1.4285714626312256
130 /travel/tourist_attraction/near_travel_destination 1-1 4 1.0 1.0
131 /broadcast/content/genre 1-1 1 1.0 1.0
132 /tv/non_character_role/tv_regular_personal_appearances./tv/tv_regular_personal_appearance/person 1-M 14 1.0 2.799999952316284
134 /olympics/olympic_participating_country/athletes./olympics/olympic_athlete_affiliation/athlete 1-1 6 1.0 1.2000000476837158
135 /sports/sports_league/teams./sports/sports_league_participation/team 1-M 16 1.0 2.2857143878936768
136 /location/statistical_region/places_exported_to./location/imports_and_exports/exported_to 1-1 19 1.0555555820465088 1.2666666507720947
137 /government/political_district/representatives./government/gove

263 /royalty/royal_line/monarchs_from_this_line 1-1 1 1.0 1.0
264 /american_football/football_team/historical_roster./american_football/football_historical_roster_position/player 1-1 8 1.3333333730697632 1.1428571939468384
266 /government/governmental_jurisdiction/government_bodies 1-1 1 1.0 1.0
268 /people/person/nationality M-1 508 12.390243530273438 1.0099403858184814
269 /basketball/basketball_position/player_roster_position./basketball/basketball_roster_position/team 1-M 25 1.25 4.166666507720947
270 /base/popstra/company/shopping_choice./base/popstra/shopping_choice/shopper 1-1 3 1.0 1.0
272 /book/author/school_or_movement M-1 2 2.0 1.0
273 /location/country/second_level_divisions 1-M 67 1.0 8.375
275 /base/popstra/celebrity/supporter./base/popstra/support/supported_organization 1-1 7 1.0 1.0
276 /base/americancomedy/celebrity_impressionist/celebrities_impersonated 1-M 8 1.0 2.6666667461395264
277 /sports/sport/leagues 1-1 2 1.0 1.0
279 /location/statistical_region/religions./loc

405 /education/university/international_tuition./measurement_unit/dated_money_value/currency M-1 11 2.75 1.0
406 /media_common/netflix_genre/titles 1-M 298 1.0681003332138062 4.8852458000183105
407 /film/film_festival/individual_festivals 1-1 1 1.0 1.0
409 /military/military_commander/military_commands./military/military_command/military_combatant 1-1 2 1.0 1.0
411 /olympics/olympic_games/athletes./olympics/olympic_athlete_affiliation/sport M-M 53 1.514285683631897 2.3043477535247803
412 /education/school_category/schools_of_this_kind 1-M 85 1.0493826866149902 6.538461685180664
413 /business/business_operation/industry M-1 41 2.7333333492279053 1.0789474248886108
414 /tv/tv_actor/starring_roles./tv/regular_tv_appearance/character 1-1 5 1.25 1.0
415 /education/educational_institution/parent_institution 1-1 12 1.5 1.0
416 /education/educational_institution_campus/educational_institution 1-1 68 1.0 1.0
417 /base/popstra/product/sold_to./base/popstra/product_choice/consumer 1-1 1 1.0 1.0
4

549 /base/biblioness/bibs_location/country M-1 86 3.909090995788574 1.0
550 /sports/sports_team/roster./basketball/basketball_roster_position/player 1-1 1 1.0 1.0
551 /organization/role/leaders./organization/leadership/person 1-M 4 1.0 2.0
552 /sports/sports_team/roster./american_football/football_historical_roster_position/player 1-1 7 1.1666666269302368 1.1666666269302368
553 /film/film/produced_by 1-1 160 1.2598425149917603 1.0389610528945923
554 /business/business_operation/assets./measurement_unit/dated_money_value/currency M-1 26 26.0 1.0
556 /sports/sports_position/players./american_football/football_historical_roster_position/team M-M 88 1.6603773832321167 4.6315789222717285
557 /fictional_universe/fictional_character/appears_in_these_fictional_universes M-1 2 2.0 1.0
558 /government/governmental_jurisdiction/governing_officials./government/government_position_held/appointed_by 1-1 6 1.0 1.5
559 /tv/tv_series_season/regular_cast./tv/regular_tv_appearance/seasons 1-1 1 1.0 1.0
5

715 /base/nobelprizes/nobel_prize_winner/nobel_honor./base/nobelprizes/nobel_honor/subject_area M-1 4 2.0 1.0
716 /people/marriage_union_type/unions_of_this_type./people/marriage/location_of_ceremony 1-M 53 1.0 53.0
717 /american_football/football_team/historical_roster./sports/sports_team_roster/position M-M 84 4.4210524559021 1.75
719 /government/governmental_body/sessions 1-M 4 1.0 2.0
720 /medicine/disease/includes_diseases 1-1 2 1.0 1.0
723 /baseball/baseball_team/league 1-1 1 1.0 1.0
726 /basketball/basketball_player/team./sports/sports_team_roster/position 1-1 1 1.0 1.0
727 /baseball/baseball_team/team_stats./baseball/baseball_team_stats/season M-1 27 2.700000047683716 1.2272727489471436
730 /sports/sports_team/sport M-1 53 7.5714287757873535 1.0
731 /base/activism/activist/area_of_activism M-1 15 1.875 1.0
733 /government/government_office_category/offices 1-1 2 1.0 1.0
734 /computer/computer_manufacturer_brand/computer_models 1-1 2 1.0 1.0
735 /basketball/basketball_player/for

884 /base/americancomedy/comedy_group_member/belongs_to./base/americancomedy/comedy_group_membership/group M-1 4 2.0 1.0
885 /book/magazine_genre/magazines_in_this_genre 1-1 1 1.0 1.0
886 /film/film_character/portrayed_in_films_dubbed./film/dubbing_performance/film 1-1 1 1.0 1.0
887 /film/film/runtime./film/film_cut/film_release_region M-1 18 2.0 1.0
888 /travel/travel_destination/local_transportation 1-1 1 1.0 1.0
889 /time/event/included_in_event 1-1 6 1.5 1.0
890 /film/production_company/films 1-M 187 1.0446927547454834 3.6666667461395264
892 /olympics/olympic_medal/medal_winners./olympics/olympic_medal_honor/medalist 1-M 6 1.0 2.0
894 /location/administrative_division/country M-1 47 2.238095283508301 1.0
895 /government/government_office_category/officeholders./government/government_position_held/governmental_body 1-M 5 1.0 1.6666666269302368
896 /education/field_of_study/students_majoring./education/education/minor 1-1 10 1.0 1.25
898 /user/hangy/default_domain/sports_team_gender/

1038 /sports/sports_position/players./sports/sports_team_roster/team 1-M 429 1.2580645084381104 10.725000381469727
1040 /olympics/olympic_sport/athletes./olympics/olympic_athlete_affiliation/athlete 1-M 8 1.0 2.6666667461395264
1044 /location/hud_foreclosure_area/total_90_day_vacant_residential_addresses./measurement_unit/dated_integer/source M-1 95 95.0 1.0
1045 /venture_capital/venture_funded_company/venture_investors./venture_capital/venture_investment/investor 1-1 1 1.0 1.0
1046 /organization/endowed_organization/endowment./measurement_unit/dated_money_value/currency M-1 45 11.25 1.0
1047 /american_football/football_player/former_teams./american_football/football_historical_roster_position/position_s 1-1 3 1.5 1.0
1048 /film/film_location/featured_in_films 1-M 112 1.0566037893295288 1.9310344457626343
1050 /music/artist/origin 1-1 61 1.2708333730697632 1.0
1051 /tv/tv_program/regular_cast./tv/regular_tv_appearance/actor 1-1 151 1.0202702283859253 1.451923131942749
1052 /military/mi

1205 /organization/organization/phone_number./common/phone_number/service_location M-1 12 2.4000000953674316 1.2000000476837158
1207 /government/governmental_jurisdiction/agencies 1-1 3 1.0 1.5
1209 /finance/currency/countries_used 1-M 8 1.0 2.0
1213 /base/fight/crime_type/people_convicted_of_this_crime./base/crime/criminal_conviction/guilty_of 1-1 3 1.5 1.0
1214 /military/military_conflict/combatants./military/military_combatant_group/combatants 1-M 48 1.1428571939468384 1.548387050628662
1219 /computer/programming_language/influenced_by 1-1 5 1.25 1.25
1220 /soccer/football_player/current_team./sports/sports_team_roster/position M-1 7 2.3333332538604736 1.0
1221 /award/hall_of_fame_inductee/hall_of_fame_inductions./award/hall_of_fame_induction/hall_of_fame M-1 14 3.5 1.0
1222 /american_football/football_player/position_s 1-1 5 1.0 1.25
1223 /people/person/place_of_birth M-1 299 1.8121211528778076 1.0
1224 /sports/drafted_athlete/drafted./sports/sports_league_draft_pick/draft 1-1 1 1.

In [8]:
relation_dict
print(one_many, one_one, many_one, many_many)
print(one_many_num, one_one_num, many_one_num, many_many_num)
print(one_many_num + one_one_num + many_one_num + many_many_num)
print(len(test_triples))

185.0 489.0 249.0 38.0
16369.0 10206.0 21228.0 11268.0
59071.0
59071


In [11]:
with open(os.path.join(data_path, 'relation_category.txt'), 'w') as f:
    for key, value in relation_dict.items():
        f.write('%s\t%s\n' % (key, value))