# Imports

In [1]:
import pandas as pd
import numpy as np
from bokeh.plotting import ColumnDataSource, figure, output_file, show
from bokeh.io import output_notebook

In [2]:
output_notebook()

# Load Data

In [3]:
df = pd.read_pickle('first_two_sentences_EN_and_ES_POB.pkl')
df_ES = pd.read_csv('ES_result.csv', index_col=[0])
df_EN = pd.read_csv('EN_result.csv', index_col=[0])

In [4]:
ES_0_prob = df_ES['0_prob']
ES_1_prob = df_ES['1_prob']
ES_2_prob = df_ES['2_prob']
EN_0_prob = df_EN['0_prob']
EN_1_prob = df_EN['1_prob']
EN_2_prob = df_EN['2_prob']

In [5]:
Wikidata_ES = df_ES['wikidata']
Wikidata_EN = df_EN['wikidata']

In [6]:
df['ES_0_prob'] = ES_0_prob
df['ES_1_prob'] = ES_1_prob
df['ES_2_prob'] = ES_2_prob

In [7]:
df['EN_0_prob'] = EN_0_prob
df['EN_1_prob'] = EN_1_prob
df['EN_2_prob'] = EN_2_prob

In [8]:
df['Wikidata_ES'] = Wikidata_ES
df['Wikidata_EN'] = Wikidata_EN

In [9]:
print(df.shape)

(31543, 11)


In [10]:
df.head()

Unnamed: 0,Qid,sentence_EN,sentence_ES,ES_0_prob,ES_1_prob,ES_2_prob,EN_0_prob,EN_1_prob,EN_2_prob,Wikidata_ES,Wikidata_EN
0,Q1015531,ludmila manicler born 6 july 1987 is an argent...,ludmila maniclerperfil futbolista afasan pedr...,0.061765,0.313477,0.624759,0.961972,0.008174,0.029854,lugar de nacimiento El Verde Más Cercano,place of birth San Pedro
1,Q1050388,was a japanese general who served during the s...,"mitsuru ushijima 牛島 満, ushijima mitsuru, 31 d...",0.016616,0.010163,0.973222,0.00072,0.000418,0.998862,lugar de nacimiento Kagoshima,place of birth Kagoshima
2,Q10526787,is a japanese football player currently playin...,"wataru endo en japonés 遠藤 航; yokohama, jap...",0.958571,0.00253,0.038899,0.000745,0.000572,0.998683,lugar de nacimiento Yokohama,place of birth Yokohama
3,Q105695,"annasophia robb born december 8, 1993 is an am...","annasophia robb denver, colorado; 8 de diciem...",0.904332,0.002148,0.09352,0.907669,0.017064,0.075267,lugar de nacimiento Denver,place of birth Denver
4,Q1087146,"christos dimitriou papakyriakopoulos , commonl...","christos papakyriakopoulos, en griego χρήστος...",0.536401,0.229368,0.234231,0.965165,0.007697,0.027139,lugar de nacimiento Atenas,place of birth Athens


# Plot

In [11]:
output_file("EN_ES_inconsistent_Prob_Plot.html")

source = ColumnDataSource(data=dict(
    x=df['ES_1_prob'],
    y=df['EN_1_prob'],
    desc=df['Qid'],
))

TOOLTIPS = [
    ("index", "$index"),
    ("(x,y)", "($x, $y)"),
    ("desc", "@desc"),
]

p = figure(plot_width=2000, plot_height=2000, tooltips=TOOLTIPS, title="EN_ES_inconsistent_Prob_Plot")
p.xaxis.axis_label = 'ES_inconsistent_prob'
p.yaxis.axis_label = 'EN_inconsistent_prob'

p.circle('x', 'y', size=3, source=source)

In [12]:
show(p)

In [13]:
output_file("EN_ES_consistent_Prob_Plot.html")

source = ColumnDataSource(data=dict(
    x=df['ES_0_prob'],
    y=df['EN_0_prob'],
    desc=df['Qid'],
))

TOOLTIPS = [
    ("index", "$index"),
    ("(x,y)", "($x, $y)"),
    ("desc", "@desc"),
]

p0 = figure(plot_width=2000, plot_height=2000, tooltips=TOOLTIPS, title="EN_ES_consistent_Prob_Plot")
p0.xaxis.axis_label = 'ES_consistent_prob'
p0.yaxis.axis_label = 'EN_consistent_prob'

p0.circle('x', 'y', size=3, source=source)

In [14]:
show(p0)

In [15]:
output_file("EN_ES_irrelevant_Prob_Plot.html")

source = ColumnDataSource(data=dict(
    x=df['ES_2_prob'],
    y=df['EN_2_prob'],
    desc=df['Qid'],
))

TOOLTIPS = [
    ("index", "$index"),
    ("(x,y)", "($x, $y)"),
    ("desc", "@desc"),
]

p2 = figure(plot_width=2000, plot_height=2000, tooltips=TOOLTIPS, title="EN_ES_irrelevant_Plot")
p2.xaxis.axis_label = 'ES_irrelevant_prob'
p2.yaxis.axis_label = 'EN_irrelevant_prob'

p2.circle('x', 'y', size=3, source=source)

In [16]:
show(p2)

# 

In [17]:
df_ES_prob = df[['ES_0_prob', 'ES_1_prob', 'ES_2_prob']].to_numpy()
df_EN_prob = df[['EN_0_prob', 'EN_1_prob', 'EN_2_prob']].to_numpy()

In [18]:
pred_ES = np.argmax(df_ES_prob, axis=1)
pred_EN = np.argmax(df_EN_prob, axis=1)

In [19]:
labels_ES, counts_ES = np.unique(pred_ES, return_counts=True)
print("Frequency of label of ES:")
print(np.asarray((labels_ES, counts_ES)))

Frequency of label of ES:
[[    0     1     2]
 [21305  3409  6829]]


In [20]:
labels_EN, counts_EN = np.unique(pred_EN, return_counts=True)
print("Frequency of label of EN:")
print(np.asarray((labels_EN, counts_EN)))

Frequency of label of EN:
[[    0     1     2]
 [19636   104 11803]]


In [21]:
df.to_pickle('Final_result_EN_ES.pkl')

# Check Top K

In [22]:
df.sort_values(by='ES_1_prob', axis=0, ascending=False)

Unnamed: 0,Qid,sentence_EN,sentence_ES,ES_0_prob,ES_1_prob,ES_2_prob,EN_0_prob,EN_1_prob,EN_2_prob,Wikidata_ES,Wikidata_EN
8475,Q1139198,"matthew polinsky born february 24, 1984 is an ...",matt polinsky nacido el 24 de febrero de 1984...,0.016511,0.963244,0.020246,0.960774,0.008074,0.031152,lugar de nacimiento Pittsburgh,place of birth Pittsburgh
24019,Q595599,cameron drew neru howieson born 22 december 19...,cameron howieson 22 de diciembre de 1994 en d...,0.016360,0.963240,0.020400,0.962460,0.007947,0.029593,lugar de nacimiento Blenheim,place of birth Blenheim
6880,Q9126923,meng hongwei ; born november 1953 is a former ...,"meng hongwei ; harbin, provincia de heilongji...",0.016246,0.963231,0.020523,0.961945,0.007728,0.030327,lugar de nacimiento Pokai,place of birth Harbin
5431,Q2252,buzz aldrin ; born edwin eugene aldrin jr.; ja...,"buzz aldrin glen ridge, nueva jersey; 20 de e...",0.016530,0.963169,0.020302,0.933176,0.012430,0.054393,lugar de nacimiento Montclair,place of birth Glen Ridge
23052,Q3566067,he is known as a leader of the new art movemen...,"wang guangyi en chino 王广义, harbin, provincia ...",0.016246,0.963163,0.020591,0.966835,0.007976,0.025189,lugar de nacimiento Pokai,place of birth Harbin
...,...,...,...,...,...,...,...,...,...,...,...
11630,Q299179,"was the 121st emperor of japan, according to t...","fue el emperador de japón, según el orden t...",0.005394,0.000971,0.993635,0.000834,0.000518,0.998647,lugar de nacimiento Quioto,place of birth Kyoto
3985,Q3454038,he has refereed many matches in the afc champi...,es un árbitro japonés de fútbol es miembro d...,0.007642,0.000963,0.991395,0.000728,0.000590,0.998682,lugar de nacimiento Nagoya,place of birth Nagoya
4135,Q297628,"duke was a japanese politician, statesman and ...","el , fue un político y estadista japonés y fu...",0.003198,0.000943,0.995859,0.000797,0.000528,0.998675,lugar de nacimiento Quioto,place of birth Kyoto
13268,Q93745,was a japanese politician and leader of the ja...,"fue un político japonés, líder del partido s...",0.011268,0.000937,0.987795,0.000715,0.000359,0.998926,lugar de nacimiento prefectura de Tokio,place of birth Tokyo
