# **Run the cells below to prepare the data (REQUIRED)**

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
import time
import dat
import ipywidgets as widgets
import pandas as pd
import numpy as np

In [2]:
model = dat.Model("glove_100_3_polish.txt", "words.txt")

In [3]:
uploader = widgets.FileUpload(
    accept='.xlsx',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
    multiple=False  # True to accept multiple files upload else False
)

display(uploader)

FileUpload(value={}, accept='.xlsx', description='Upload')

In [5]:
try:
    
    input_file = list(uploader.value.values())[0]
    content = input_file['content']
    df_upload = pd.read_excel(content, header=None)
    df_upload = df_upload.fillna(value=" ")
    data = df_upload.values.tolist()
    print("upload successful")
    
except IndexError:
    print("upload a file first")


upload successful


**Run cell below to obtain the DAT score**

In [6]:
for i in data:
    x = model.dat(i)
    if x != None:
        print(x.round(2))
    else:
        print(x)

66.87


**Run cell below to obtain words not found in model**

In [25]:
inv_words_list = []

for i in data:
    x = model.invalid_words(i)
    if len(x) < 1:
        continue
    print(', '.join(x))
    inv_words_list.append(x)
    
if len(inv_words_list) < 1:
    print("no invalid words")

chusteczki
ksiażka
ochraniacz-na-zęby
zamek-drzewo-kość-lakier-lilijka-znak-poduszka-piernik-kamień-kosz
rośliny
zasłony, leki
chmury, książki, wiśnie
netflix
woda-ogień, sąsiad-przyjaciel, książka-podręcznik
buty
szkicowanie
kalosze, widom, gwiazda-śmierci
pieniądze
pieniądze
problemy, rozwiązania
myśli
perfum
chusteczki
słup-graniczny
psy, rośliny, flamastry
narkotyki
marzenia, najlepszy
pierogi
szuflady
oczy
pieniądze, przyjażń
instagram, gry, netflix
zęby
miksolog
facebook
rękawice
leki
ostrzeszów
przemyślenia
obrazy, zwierciadła
odtrącenie
paznokiec
swiatlo
tory
mamdarynka
łózko


**Run cell below to obtain a html file with word distances matrix**

In [39]:
date = time.strftime("%Y-%b-%d__%H_%M_%S",time.localtime())

word_pairs = []
dat_list = []

for i in data:
    x = model.dat(i, 1)
    y = model.dat(i)
    if x != None:
        word_pairs.append(x)
    if y != None:
        dat_list.append(y.round(2))

f=open("dat_matrix"+date+".html","w")
f.write('<meta charset="UTF-8">')

score = 0
for pairs in word_pairs:
    tags = []
    for t1, t2, _ in pairs:
        tags += [t1, t2]
        
    tags = index = columns = sorted(list(set(tags)))
    tags = dict((t, i) for i, t in enumerate(tags))
    correlation = np.identity(len(tags))
    
    for t1, t2, corr in pairs:
        correlation[tags[t1]][tags[t2]] = corr
        correlation[tags[t2]][tags[t1]] = corr
        
    df = pd.DataFrame(correlation, index=index, columns=columns)
    
    mask = np.zeros_like(df, dtype=bool)
    mask[np.triu_indices_from(mask)] = True
    df[mask] = np.nan
    df_style = (df
    .style
    .background_gradient(cmap='RdYlGn', axis=None, vmin=0, vmax=1)
    .highlight_null(null_color='#f1f1f1')
    .set_precision(2))
    
    f.write(df_style.render())
    f.write("DAT: " + str(dat_list[score]))
    
    score = score + 1
    
f.close()
