# Manually evaluating the rhyme model

In [1]:
from siamese_rhyme import siamese_rhyme
import pandas as pd
import torch.nn.functional as F


model = siamese_rhyme(load=True)

In [4]:
def flip_words(df,columns = ['word1','word2']):
    for column in columns: 
        df[column] = df[column].apply(lambda x: x[::-1])
        
    return df

def get_word_vecs(df,columns = ['word1','word2']):    
    df['vec1'] = df[columns[0]].apply(lambda x: model.get_word_vec(x))
    df['vec2'] = df[columns[1]].apply(lambda x: model.get_word_vec(x))
    return df
    
def get_distances(df):
    df['dist'] = df.apply(lambda x: 1- F.cosine_similarity(x.vec1,x.vec2).cpu().detach().numpy()[0],axis=1)
    return df

def get_predictions(df,value):
    df['pred'] = df['dist'].apply(lambda x: int(x<value))
    return df

In [4]:
def flip_words(df,columns = ['word1','word2']):
    for column in columns: 
        df[column] = df[column].apply(lambda x: x[::-1])
        
    return df

In [5]:
df_train = pd.read_csv('data/train.csv')

df_train = flip_words(df_train)

df_train

Unnamed: 0,word1,word2,rhyme
0,eigenthum,mir,0
1,schwül,üplfh,0
2,meise,heunenreise,1
3,gitterstäben,hinunterweenb,0
4,laternenschein,koth,0
...,...,...,...
42739,sinnen,bgneenni,0
42740,finsterni,sonnenlicht,0
42741,schreit,speit,1
42742,zeigt,seelen,0


In [6]:
df_train_pred = get_distances(get_word_vecs(df_train))

In [7]:
df_train_pred = get_predictions(df_train_pred, 0.45)

In [8]:
df_false = df_train_pred.loc[df_train_pred['rhyme'] != df_train_pred['pred']]

df_false

Unnamed: 0,word1,word2,rhyme,vec1,vec2,dist,pred
70,fült,vergilt,0,"[[tensor(-0.0874, device='cuda:0', grad_fn=<Un...","[[tensor(1.2055, device='cuda:0', grad_fn=<Unb...",0.225376,1
83,beut,braut,0,"[[tensor(0.6734, device='cuda:0', grad_fn=<Unb...","[[tensor(-0.4566, device='cuda:0', grad_fn=<Un...",0.293041,1
101,nennt,trennt,0,"[[tensor(1.7195, device='cuda:0', grad_fn=<Unb...","[[tensor(1.7264, device='cuda:0', grad_fn=<Unb...",0.026631,1
150,kinn,hin,0,"[[tensor(-0.5106, device='cuda:0', grad_fn=<Un...","[[tensor(-0.3997, device='cuda:0', grad_fn=<Un...",0.219374,1
166,scharf,warf,0,"[[tensor(-0.5596, device='cuda:0', grad_fn=<Un...","[[tensor(-0.0294, device='cuda:0', grad_fn=<Un...",0.069962,1
...,...,...,...,...,...,...,...
42670,seltenheit,dankbarkeit,0,"[[tensor(-0.0609, device='cuda:0', grad_fn=<Un...","[[tensor(0.4352, device='cuda:0', grad_fn=<Unb...",0.300585,1
42685,unterla,ba,1,"[[tensor(0.4569, device='cuda:0', grad_fn=<Unb...","[[tensor(-1.0586, device='cuda:0', grad_fn=<Un...",0.568108,0
42691,paar,war,0,"[[tensor(-0.3231, device='cuda:0', grad_fn=<Un...","[[tensor(0.8569, device='cuda:0', grad_fn=<Unb...",0.193058,1
42702,kauf,lauf,0,"[[tensor(-1.5053, device='cuda:0', grad_fn=<Un...","[[tensor(-1.6217, device='cuda:0', grad_fn=<Un...",0.067199,1


## it was found that a decision boundary of around 0.45 works best on the train set

In [5]:
df_val = pd.read_csv('data/val.csv')

df_val = flip_words(df_val)

df_val

Unnamed: 0,word1,word2,rhyme
0,ring,ging,1
1,reg,weg,1
2,wein,freun,1
3,bahre,kleid,0
4,lauf,blut,0
...,...,...,...
10682,brausen,nicht,0
10683,jahren,bewahren,1
10684,flehn,beten,0
10685,lieben,mehr,0


In [11]:
df_val_pred = get_distances(get_word_vecs(df_val))
df_val_pred = get_predictions(df_val_pred, 0.45)


df_false = df_val_pred.loc[df_val_pred['rhyme'] != df_val_pred['pred']]

df_false

Unnamed: 0,word1,word2,rhyme,vec1,vec2,dist,pred
17,quell,trautgesell,0,"[[tensor(-0.5122, device='cuda:0', grad_fn=<Un...","[[tensor(-2.0960, device='cuda:0', grad_fn=<Un...",0.219652,1
18,triebe,elibe,0,"[[tensor(-1.9753, device='cuda:0', grad_fn=<Un...","[[tensor(-2.5342, device='cuda:0', grad_fn=<Un...",0.439913,1
19,zinnen,städterinnen,0,"[[tensor(0.4635, device='cuda:0', grad_fn=<Unb...","[[tensor(0.6619, device='cuda:0', grad_fn=<Unb...",0.196575,1
24,fischerkahn,an,1,"[[tensor(0.7715, device='cuda:0', grad_fn=<Unb...","[[tensor(0.0998, device='cuda:0', grad_fn=<Unb...",0.541865,0
39,erhielt,fühlt,0,"[[tensor(0.1579, device='cuda:0', grad_fn=<Unb...","[[tensor(1.2007, device='cuda:0', grad_fn=<Unb...",0.198892,1
...,...,...,...,...,...,...,...
10541,schalle,halle,0,"[[tensor(-0.5467, device='cuda:0', grad_fn=<Un...","[[tensor(-0.1072, device='cuda:0', grad_fn=<Un...",0.063353,1
10560,gefällt,fällt,0,"[[tensor(0.5290, device='cuda:0', grad_fn=<Unb...","[[tensor(0.9425, device='cuda:0', grad_fn=<Unb...",0.021355,1
10567,ma,a,0,"[[tensor(-1.1135, device='cuda:0', grad_fn=<Un...","[[tensor(-0.0675, device='cuda:0', grad_fn=<Un...",0.239272,1
10595,nit,dermit,0,"[[tensor(1.2828, device='cuda:0', grad_fn=<Unb...","[[tensor(1.5895, device='cuda:0', grad_fn=<Unb...",0.268855,1


on the small sample it looks as if the validation set contains quite some false negatives, which is partially 
due to the synthetic data generation