# **Run the cells below to prepare the data (REQUIRED)**

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
import time
import dat
import ipywidgets as widgets
import pandas as pd
import numpy as np
%pip install openpyxl
import openpyxl

In [None]:
model = dat.Model("glove_100_3_polish.txt", "words.txt")

**place the prepared xlsx file in the project's folder and change the filename accordingly**

In [None]:
try:
    
    df_upload = pd.read_excel('dat-data.xlsx', header=None)
    df_upload = df_upload.fillna(value=' ')
    data = df_upload.values.tolist()
    print('upload successful')
    
except IndexError:
    print('upload a file first')

**Run cell below to obtain the DAT score**

In [None]:
for i in data:
    try:
        x = model.dat(i)
        if x != None:
            print(x.round(2))
        else:
            print(x)
    except TypeError:
        print(i)

**Run cell below to obtain words not found in model**

In [None]:
inv_words_list = []

for i in data:
    x = model.invalid_words(i)
    if len(x) < 1:
        continue
    print(', '.join(x))
    inv_words_list.append(x)
    
if len(inv_words_list) < 1:
    print("no invalid words")

**Run cell below to obtain a html file with distances by pairs**

In [None]:
date = time.strftime("%Y-%b-%d__%H_%M_%S",time.localtime())

word_pairs = []
dat_list = []

for i in data:
    x = model.dat(i,1)
    y = model.dat(i)
    if x != None:
        word_pairs.append(x)
    if y != None:
        dat_list.append(y.round(2))

f=open("dat_matrix"+date+".html","w")
f.write('<meta charset="UTF-8">')

score = 0
for pairs in word_pairs:
    tags = []
    for t1, t2, _ in pairs:
        tags += [t1, t2]
        
    tags = index = columns = sorted(list(set(tags)))
    tags = dict((t, i) for i, t in enumerate(tags))
    correlation = np.identity(len(tags))
    
    for t1, t2, corr in pairs:
        correlation[tags[t1]][tags[t2]] = corr
        correlation[tags[t2]][tags[t1]] = corr
        
    df = pd.DataFrame(correlation, index=index, columns=columns)
    
    mask = np.zeros_like(df, dtype=bool)
    mask[np.triu_indices_from(mask)] = True
    df[mask] = np.nan
    df_style = (df
    .style
    .background_gradient(cmap='RdYlGn', axis=None, vmin=0, vmax=1)
    .highlight_null(null_color='#f1f1f1')
    .set_precision(2))
    
    f.write(df_style.render())
    f.write("DAT: " + str(dat_list[score]))
    
    score = score + 1
    
f.close()


**Run cellsbelow to obtain dat score and matrix for all given words (if 7 minimum were given)**

In [None]:
for i in data:
    x = model.full_dat(i)
    if x != None:
        print(x.round(2))
    else:
        print(x)

In [None]:
date = time.strftime("%Y-%b-%d__%H_%M_%S",time.localtime())

full_word_pairs = []
full_dat_list = []

for i in data:
    x = model.full_dat(i, 1)
    y = model.full_dat(i)
    if x != None:
        full_word_pairs.append(x)
    if y != None:
        full_dat_list.append(y.round(2))

f_full=open("dat_matrix_full"+date+".html","w")
f_full.write('<meta charset="UTF-8">')
dat_csv_full = ("dat_csv"+date+".csv")
score = 0
for pairs in full_word_pairs:
    full_tags = []
    for t1, t2, _ in pairs:
        full_tags += [t1, t2]
  
    full_tags = index = columns = list(dict.fromkeys(full_tags))
    full_tags = dict((t, i) for i, t in enumerate(full_tags))

    
    full_correlation = np.identity(len(full_tags))
    for t1, t2, full_corr in pairs:
        full_correlation[full_tags[t1]][full_tags[t2]] = full_corr
        full_correlation[full_tags[t2]][full_tags[t1]] = full_corr
        
    full_df = pd.DataFrame(full_correlation, index=index, columns=columns)
    
    full_mask = np.zeros_like(full_df, dtype=bool)
    full_mask[np.triu_indices_from(full_mask)] = True
    full_df[full_mask] = np.nan
    df_style = (full_df
    .style
    .background_gradient(cmap='RdYlGn', axis=None, vmin=0, vmax=1)
    .highlight_null(null_color='#f1f1f1')
    .set_precision(2))
    
    f_full.write(df_style.render())
    f_full.write("DAT: " + str(full_dat_list[score]))
    full_df.to_csv(dat_csv_full, mode='a')
    
    score = score + 1
    
f_full.close()


In [None]:
#uploader = widgets.FileUpload(
#    accept='.xlsx',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
#    multiple=False  # True to accept multiple files upload else False
#)

#display(uploader)



In [None]:
#try:
    
#    input_file = list(uploader.value.values())[0]
#    input_file = uploader.value.content
#    content = input_file['content']
#    df_upload = pd.read_excel(content, header=None)
#    df_upload = df_upload.fillna(value=" ")
#    data = df_upload.values.tolist()
#    print("upload successful")
    
#except IndexError:
#    print("upload a file first")
