In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
%matplotlib inline
from six.moves.urllib.request import urlretrieve
import zipfile
import numpy as np
import pandas as pd
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='3'
import time
import random
import tensorflow as tf
from matplotlib import pylab
from scipy.sparse import lil_matrix

# 1. Downloading the data

In [None]:
url = 'http://mlg.ucd.ie/files/datasets/bbc-fulltext.zip'


def download_data(url, data_dir):
    """Download a file if not present, and make sure it's the right size."""

    # Create the data directory if it does not exist
    os.makedirs(data_dir, exist_ok=True)

    file_path = os.path.join(data_dir, 'bbc-fulltext.zip')

    # If file doesnt exist, download
    if not os.path.exists(file_path):
        print('Downloading file...')
        filename, _ = urlretrieve(url, file_path)
    else:
        print("File already exists")

    extract_path = os.path.join(data_dir, 'bbc')

    # If data has not been extracted already, extract data
    if not os.path.exists(extract_path):
        with zipfile.ZipFile(os.path.join(data_dir, 'bbc-fulltext.zip'), 'r') as zipf:
            zipf.extractall(data_dir)
    else:
        print("bbc-fulltext.zip has already been extracted")

download_data(url, 'data')

# 2. Reading data without preprocessing

In [2]:
def read_data(data_dir):

    # This will contain the full list of stories
    news_stories = []

    print("Reading files")

    i = 0 # Just used for printing progress
    for root, dirs, files in os.walk(data_dir):

        for fi, f in enumerate(files):

            # We don't read the README file
            if 'README' in f:
                continue

            # Printing progress
            i += 1
            print("."*i, f, end='\r')

            # Open the file
            with open(os.path.join(root, f), encoding='latin-1') as f:

                story = []
                # Read all the lines
                for row in f:

                    story.append(row.strip())

                # Create a single string with all the rows in the doc
                story = ' '.join(story)
                # Add that to the list
                news_stories.append(story)

        print('', end='\r')

    print(f"\nDetected {len(news_stories)} stories")
    return news_stories


news_stories = read_data(os.path.join('data', 'bbc'))

# Printing some stats and sample data
print(f"{sum([len(story.split(' ')) for story in news_stories])} words found in the total news set")
print('Example words (start): ',news_stories[0][:50])
print('Example words (end): ',news_stories[-1][-50:])

Reading files
..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

# 3. Building a tokenizer

In [3]:
from tensorflow.keras.preprocessing.text import Tokenizer

n_vocab = 15000 + 1
tokenizer = Tokenizer(
    num_words=n_vocab - 1,
    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
    lower=True, split=' ', oov_token=''
)

tokenizer.fit_on_texts(news_stories)
print("Data fitted on the tokenizer")

Data fitted on the tokenizer


# 4. Generate the cooc matrix

In [4]:
from scipy.sparse import save_npz, load_npz

def generate_cooc_matrix(text, tokenizer, window_size, n_vocab, use_weighting=True):

    # Convert list of text to list of list of word IDs
    sequences = tokenizer.texts_to_sequences(text)

    # A sparse matrix to retain co-occurrences of words
    cooc_mat = lil_matrix((n_vocab, n_vocab), dtype=np.float32)

    # Go through each sequence one by one
    for si, sequence in enumerate(sequences):

        # Printing the progress
        if (si+1)%100==0:
            print('.'*((si+1)//100), f"{si+1}/{len(sequences)}", end='\r')

        # For each target word,
        for i, wi in zip(np.arange(window_size, len(sequence)-window_size), sequence[window_size:-window_size]):

            # Get the context window word IDs
            context_window = sequence[i-window_size: i+window_size+1]

            # The weight for the words in the context window (except target word) will be 1
            window_weights = np.ones(shape=(window_size*2 + 1,), dtype=np.float32)
            window_weights[window_size] = 0.0

            if use_weighting:
                # If weighting is used, penalize context words based on distance to target word
                distances = np.abs(np.arange(-window_size, window_size+1))
                distances[window_size] = 1.0
                # Update the sparse matrix
                cooc_mat[wi, context_window] += window_weights/distances
            else:
                # Update the sparse matrix
                cooc_mat[wi, context_window] += window_weights

    print("\n")

    return cooc_mat

# ----------------------------------------- IMPORTANT ---------------------------------------------- #
#                                                                                                    #
# Set this true or false, depending on whether you want to generate the matrix or reuse the existing #
#                                                                                                    #
# ---------------------------------------------------------------------------------------------------#
generate_cooc = True

# Generate the matrix
if generate_cooc:
    t1 = time.time()
    cooc_mat = generate_cooc_matrix(news_stories, tokenizer, 1, n_vocab, True)
    t2 = time.time()
    print(f"It took {t2-t1} seconds to generate the co-occurrence matrix")

    save_npz(os.path.join('data','cooc_mat.npz'), cooc_mat.tocsr())
# Load the matrix from disk
else:
    try:
        cooc_mat = load_npz(os.path.join('data','cooc_mat.npz')).tolil()
        print(f"Cooc matrix of type {type(cooc_mat).__name__} was loaded from disk")
    except FileNotFoundError as ex:
        raise FileNotFoundError(
            "Could not find the co-occurrence matrix on the disk. Did you generate the matrix by setting generate_cooc=True?"
        )

...................... 2200/2225

It took 283.9533791542053 seconds to generate the co-occurrence matrix


# 5. Defining the hyperparameters

In [5]:
batch_size = 4096 # Data points in a single batch

embedding_size = 128 # Dimension of the embedding vector.

window_size=1 # We use a window size of 1 on either side of target word

epochs = 5 # Number of epochs to train for

# We pick a random validation set to sample nearest neighbors
valid_size = 16 # Random set of words to evaluate similarity on.
# We sample valid data points randomly from a large window without always being deterministic
valid_window = 250

# When selecting valid examples, we select some of the most frequent words as well as
# some moderately rare words
np.random.seed(54321)
random.seed(54321)

valid_term_ids = np.array(random.sample(range(valid_window), valid_size))
valid_term_ids = np.append(
    valid_term_ids, random.sample(range(1000, 1000+valid_window), valid_size),
    axis=0
)

# 6. Defining the model

In [6]:
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Input, Embedding, Dot, Add
from tensorflow.keras.models import Model

K.clear_session()

# Define two input layers for context and target words
word_i = Input(shape=())
word_j = Input(shape=())

# Each context and target has their own embeddings (weights and biases)

# Embedding weights
embeddings_i = Embedding(n_vocab, embedding_size, name='target_embedding')(word_i)
embeddings_j = Embedding(n_vocab, embedding_size, name='context_embedding')(word_j)

# Embedding biases
b_i = Embedding(n_vocab, 1, name='target_embedding_bias')(word_i)
b_j = Embedding(n_vocab, 1, name='context_embedding_bias')(word_j)

# Compute the dot product between embedding vectors (i.e., w_i.w_j)
ij_dot = Dot(axes=-1)([embeddings_i,embeddings_j])

# Add the biases (i.e., w_i.w_j + b_i + b_j )
pred = Add()([ij_dot, b_i, b_j])

# The final model
glove_model = Model(inputs=[word_i, word_j],outputs=pred, name='glove_model')

# Glove has a specific loss function with a sound mathematical underpinning
# It is a form of mean squared error
glove_model.compile(loss="mse", optimizer = 'adam')

glove_model.summary()




In [7]:
news_sequences = tokenizer.texts_to_sequences(news_stories)

# 7. Defining the Data Generator function

In [8]:
def glove_data_generator(
    sequences, window_size, batch_size, vocab_size, cooccurrence_matrix, x_max=100.0, alpha=0.75, seed=None
):
    """ Generate batches of inputs and targets for GloVe """

    # Shuffle the data so that in every epoch, the order of data is different.
    rand_sequence_ids = np.arange(len(sequences))
    np.random.shuffle(rand_sequence_ids)

    # We will use a sampling table to make sure we don't oversample stop words
    sampling_table = tf.keras.preprocessing.sequence.make_sampling_table(vocab_size)

    # For each story/article
    for si in rand_sequence_ids:

        # Generate positive skip-grams while using subsampling
        positive_skip_grams, _ = tf.keras.preprocessing.sequence.skipgrams(
            sequences[si],
            vocabulary_size=vocab_size,
            window_size=window_size,
            negative_samples=0.0,
            shuffle=False,
            sampling_table=sampling_table,
            seed=seed
        )

        # Take targets and context words separately
        targets, context = zip(*positive_skip_grams)
        targets, context = np.array(targets).ravel(), np.array(context).ravel()


        x_ij = np.array(cooccurrence_matrix[targets, context].toarray()).ravel()

        # Compute log - Introducing an additive shift to make sure we don't compute log(0)
        log_x_ij = np.log(x_ij + 1)

        # Sample weights
        # if x < x_max => (x/x_max)**alpha / else => 1
        sample_weights = np.where(x_ij < x_max, (x_ij/x_max)**alpha, 1)

        # If seed is not provided, generate a random one
        if not seed:
            seed = random.randint(0, 10e6)

        # Shuffle data
        np.random.seed(seed)
        np.random.shuffle(context)
        np.random.seed(seed)
        np.random.shuffle(targets)
        np.random.seed(seed)
        np.random.shuffle(log_x_ij)
        np.random.seed(seed)
        np.random.shuffle(sample_weights)

        # Generate a batch or data in the format
        # ((target words, context words), log(X_ij) <- true targets, f(X_ij) <- sample weights)
        for eg_id_start in range(0, context.shape[0], batch_size):
            yield (
                targets[eg_id_start: min(eg_id_start+batch_size, targets.shape[0])],
                context[eg_id_start: min(eg_id_start+batch_size, context.shape[0])]
            ), log_x_ij[eg_id_start: min(eg_id_start+batch_size, x_ij.shape[0])], \
            sample_weights[eg_id_start: min(eg_id_start+batch_size, sample_weights.shape[0])]


# Generate some data
news_glove_data_gen = glove_data_generator(
    news_sequences, 2, 10, n_vocab, cooc_mat
)

for x, y, z in news_glove_data_gen:
    print(x)
    print(y)
    print(z)
    break


(array([11701,  1792,  3188,  3520, 11382,  1326,  2541,   217,  2214,
        5193]), array([    1,  1814,   403,   138,  3434, 13637,     7,   137,     2,
         828]))
[0.6931472 0.6931472 0.6931472 0.6931472 0.        0.6931472 1.0986123
 0.6931472 2.3978953 0.6931472]
[0.03162277 0.03162277 0.03162277 0.03162277 0.         0.03162277
 0.05318296 0.03162277 0.17782794 0.03162277]


# 8. Training the model

In [9]:
class ValidationCallback(tf.keras.callbacks.Callback):

    def __init__(self, valid_term_ids, model_with_embeddings, tokenizer):

        self.valid_term_ids = valid_term_ids
        self.model_with_embeddings = model_with_embeddings
        self.tokenizer = tokenizer

        super().__init__()

    def on_epoch_end(self, epoch, logs=None):
        """ Validation logic """

        # We will use context embeddings to get the most similar words
        # Other strategies include: using target embeddings, mean embeddings after avaraging context/target
        embedding_weights = self.model_with_embeddings.get_layer("context_embedding").get_weights()[0]
        normalized_embeddings = embedding_weights / np.sqrt(np.sum(embedding_weights**2, axis=1, keepdims=True))

        # Get the embeddings corresponding to valid_term_ids
        valid_embeddings = normalized_embeddings[self.valid_term_ids, :]

        # Compute the similarity between valid_term_ids and all the embeddings
        # V x d (d x D) => V x D
        top_k = 5 # Top k items will be displayed
        similarity = np.dot(valid_embeddings, normalized_embeddings.T)

        # Invert similarity matrix to negative
        # Ignore the first one because that would be the same word as the probe word
        similarity_top_k = np.argsort(-similarity, axis=1)[:, 1: top_k+1]

        # Print the output
        for i, term_id in enumerate(valid_term_ids):

            similar_word_str = ', '.join([self.tokenizer.index_word[j] for j in similarity_top_k[i, :] if j > 1])
            print(f"{self.tokenizer.index_word[term_id]}: {similar_word_str}")

        print('\n')

# 9. Running the GloVe algorithm

In [10]:
glove_validation_callback = ValidationCallback(valid_term_ids, glove_model, tokenizer)

# Train the model for several epochs
for ei in range(epochs):

    print(f"Epoch: {ei+1}/{epochs} started")

    news_glove_data_gen = glove_data_generator(
        news_sequences, window_size, batch_size, n_vocab, cooc_mat
    )

    glove_model.fit(
        news_glove_data_gen, epochs=1,
        callbacks=glove_validation_callback,
    )

Epoch: 1/5 started
   2224/Unknown [1m39s[0m 17ms/step - loss: 1.0483election: attorney, forthcoming, coming, studio, manager
me: him, come, broadband, likely, japan
with: formal, held, press, between, through
you: we, they, also, still, doing
were: are, was, but, because, wanted
win: time, based, its, charge, level
those: time, charge, governments, its, place
music: cameras, growing, leaders, areas, our
also: it, now, there, like, they
third: first, own, take, according, club
best: actor, growing, won, former, result
him: come, me, charge, another, working
too: so, how, better, stronger, any
some: its, her, broadband, came, their
through: them, into, set, door, could
mr: tony, gordon, jack, article, closest
file: illegally, systems, licensed, star, administration
pair: them, come, working, broadband, looking
ceremony: time, playing, go, use, his
believed: took, said, being, wanted, were
post: case, came, around, chelsea, time
indian: children, kings, board, universities, kingfisher


  self.gen.throw(typ, value, traceback)


   2224/Unknown [1m38s[0m 17ms/step - loss: 0.0415election: forthcoming, attorney, upcoming, manager, motors
me: likely, part, come, aimed, him
with: formal, tally, electro, uphill, held
you: we, they, still, afford, detained
were: are, because, saying, was, when
win: based, least, brit, largest, victory
those: driven, backed, took, important, showed
music: cameras, subscriber, assistants, divide, revolution
also: give, now, made, which, going
third: fourth, take, seventh, statement, first
best: supporting, category, actor, superhero, result
him: come, part, place, me, working
too: pretty, how, so, better, stronger
some: compared, end, kind, way, came
through: able, door, mci, outstanding, them
mr: article, bernie, resignation, gordon, 63
file: illegally, systems, catchy, abilities, solana
pair: come, them, broadband, won, offered
ceremony: driven, politics, rise, baby, least
believed: wanted, because, fact, ensure, says
post: grew, immediate, forced, podcasts, career
indian: childre

In [22]:
def save_embeddings(model, tokenizer, vocab_size, save_dir):

    # Create the directory if it doesn't exist
    os.makedirs(save_dir, exist_ok=True)

    # Get the words sorted according to their ID from the tokenizer
    _, words_sorted = zip(*sorted(list(tokenizer.index_word.items()), key=lambda x: x[0])[:vocab_size-1])
    # Add one word in front to represent the reserved ID (0)
    words_sorted = [None] + list(words_sorted)

    # Create a new array by concatenating embeddings and bias

    context_embedding_weights = model.get_layer("context_embedding").get_weights()[0]
    context_embedding_bias = model.get_layer("context_embedding_bias").get_weights()[0]
    context_embedding = np.concatenate([context_embedding_weights, context_embedding_bias], axis=1)

    target_embedding_weights = model.get_layer("target_embedding").get_weights()[0]
    target_embedding_bias = model.get_layer("target_embedding_bias").get_weights()[0]
    target_embedding = np.concatenate([target_embedding_weights, target_embedding_bias], axis=1)

    # Save the array as pandas DataFrames
    pd.DataFrame(
        context_embedding,
        index = words_sorted
    ).to_pickle(os.path.join(save_dir, "context_embedding_and_bias.pkl"))

    pd.DataFrame(
        target_embedding,
        index = words_sorted
    ).to_pickle(os.path.join(save_dir, "target_embedding_and_bias.pkl"))


save_embeddings(glove_model, tokenizer, n_vocab, save_dir='glove_embeddings')

# 11. Usecase1 : Visualize word embedding for some words

In [23]:
# Load the context and target embeddings
context_embeddings = pd.read_pickle("C:/Users/ariji/Exploratory Data Analysis/glove_embeddings/context_embedding_and_bias.pkl")
target_embeddings = pd.read_pickle("C:/Users/ariji/Exploratory Data Analysis/glove_embeddings/target_embedding_and_bias.pkl")

In [35]:
context_embeddings

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,119,120,121,122,123,124,125,126,127,128
,-0.036330,-0.034749,-0.016596,-0.036723,-0.047355,-0.034517,0.021469,0.036908,-0.013375,-0.046320,...,0.040131,0.038787,0.020259,0.045287,0.008850,0.001571,-0.034098,-0.046850,-0.016506,-0.007898
,0.263443,0.165927,0.205571,0.245334,-0.397638,-0.034381,-0.247318,-0.215806,0.216790,-0.133485,...,-0.248698,-0.095389,0.088773,-0.139337,0.505093,0.181932,0.186754,-0.393379,0.024291,0.353624
the,-0.266497,0.331774,-0.245536,0.067307,0.151394,-0.387824,-0.284053,-0.222052,0.188199,-0.048570,...,-0.145824,0.317727,0.081592,-0.191424,0.373281,0.142110,0.445748,-0.421791,-0.146392,1.005131
to,-0.070858,0.400328,-0.060839,-0.175254,0.384202,-0.404565,-0.314036,-0.329025,-0.186250,0.061983,...,0.101050,-0.113064,-0.141717,-0.262100,0.047011,0.345855,0.331112,-0.451555,-0.208570,0.862355
of,0.354768,0.398066,0.206499,0.069180,0.434604,-0.195982,0.039904,-0.102387,0.203462,-0.230983,...,-0.131796,0.202652,-0.239801,-0.086405,0.326264,0.361356,0.202365,-0.389407,0.335596,0.711045
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
claxton's,0.108052,0.047125,0.100965,0.120811,-0.134853,-0.032541,-0.072874,-0.105455,0.072872,-0.006496,...,-0.063904,0.072837,0.139502,-0.081198,0.043937,0.111830,0.050387,-0.051679,0.100905,0.089221
gears,0.008147,0.044746,0.018390,0.012532,-0.040089,-0.025723,-0.029935,0.066408,-0.021493,0.028001,...,0.031134,-0.039890,0.020140,0.026129,-0.007554,-0.005120,0.012199,-0.055415,-0.030244,0.038155
garland,0.065258,0.116206,0.033323,0.112492,-0.059957,0.084381,-0.093605,-0.023280,0.069636,-0.039847,...,-0.106903,0.011817,-0.033269,-0.032361,0.056454,0.092133,0.040090,-0.132536,0.005027,0.113452
pentathlon,0.056899,-0.004680,-0.060077,0.029075,0.000193,-0.057718,-0.048092,-0.023860,0.012735,-0.076836,...,-0.086595,-0.028878,-0.049910,0.077126,0.022727,0.002971,-0.031748,-0.038566,-0.029229,0.084709


In [25]:
tokenizer.word_index['dog']

6031

In [31]:
context_embeddings.iloc[6031]

0      0.065112
1      0.152522
2     -0.004471
3     -0.192605
4      0.054866
         ...   
124    0.035825
125    0.171777
126   -0.100865
127    0.001464
128    0.200592
Name: dog, Length: 129, dtype: float32

In [29]:
print(context_embeddings.index)

Index([        None,           '',        'the',         'to',         'of',
              'and',          'a',         'in',        'for',         'is',
       ...
       'meticulous',   'olympian', 'decathlete',    'findlay',   'stumbled',
        'claxton's',      'gears',    'garland', 'pentathlon',   'jeanette'],
      dtype='object', length=15001)


In [32]:
tokenizer.word_index['dog']

6031

In [33]:
# example 1 : word vector for "dog"
#GloVe_dog = glove_model.get_layer("context_embedding").get_weights()[0][tokenizer.word_index["dog"]]
GloVe_dog_embedding = context_embeddings.iloc[tokenizer.word_index['dog']]

print(len(GloVe_dog_embedding))
print(GloVe_dog_embedding)

129
0      0.065112
1      0.152522
2     -0.004471
3     -0.192605
4      0.054866
         ...   
124    0.035825
125    0.171777
126   -0.100865
127    0.001464
128    0.200592
Name: dog, Length: 129, dtype: float32


In [34]:
# example 2 : word vector for "cat"
#GloVe_cat = glove_model.get_layer("context_embedding").get_weights()[0][tokenizer.word_index["cat"]]
GloVe_cat_embedding = context_embeddings.iloc[tokenizer.word_index['cat']]

print(len(GloVe_cat_embedding))
print(GloVe_cat_embedding)

129
0      0.034035
1      0.093885
2      0.043728
3      0.101932
4      0.065371
         ...   
124   -0.027784
125    0.054978
126    0.029087
127    0.124382
128    0.196347
Name: cat, Length: 129, dtype: float32


In [36]:
# example 3 : word vector for "man"
#GloVe_man = glove_model.get_layer("context_embedding").get_weights()[0][tokenizer.word_index["man"]]
GloVe_man_embedding = context_embeddings.iloc[tokenizer.word_index['man']]

print(len(GloVe_man_embedding))
print(GloVe_man_embedding)

129
0      0.079416
1     -0.076226
2      0.000903
3      0.184338
4      0.111428
         ...   
124    0.233430
125    0.035179
126    0.170419
127   -0.130050
128    0.287434
Name: man, Length: 129, dtype: float32


In [37]:
# example 4 : word vector for "woman"
#GloVe_woman = glove_model.get_layer("context_embedding").get_weights()[0][tokenizer.word_index["woman"]]
GloVe_woman_embedding = context_embeddings.iloc[tokenizer.word_index['woman']]

print(len(GloVe_woman_embedding))
print(GloVe_woman_embedding)

129
0     -0.046141
1      0.131601
2     -0.055070
3     -0.157930
4      0.164094
         ...   
124   -0.042518
125   -0.024443
126   -0.074304
127   -0.271653
128    0.252643
Name: woman, Length: 129, dtype: float32


# 12. Usecase2 : Similarity of the word embeddings

In [39]:
# example 1 : similarity score between dog and cat
similarity = np.dot(GloVe_dog_embedding, GloVe_cat_embedding) / (np.linalg.norm(GloVe_dog_embedding) * np.linalg.norm(GloVe_cat_embedding))

print(similarity)

0.40173376


In [40]:
# example 2 : similarity score between dog and man
similarity = np.dot(GloVe_dog_embedding, GloVe_man_embedding) / (np.linalg.norm(GloVe_dog_embedding) * np.linalg.norm(GloVe_man_embedding))

print(similarity)

0.20789835


In [41]:
# example 3 : similarity score between woman and man
similarity = np.dot(GloVe_man_embedding, GloVe_woman_embedding) / (np.linalg.norm(GloVe_man_embedding) * np.linalg.norm(GloVe_woman_embedding))

print(similarity)

0.13812399


# 13. Usecase3 : Analogy task

In [19]:
king_vector = glove_model.get_layer("context_embedding").get_weights()[0][tokenizer.word_index["king"]]
man_vector = glove_model.get_layer("context_embedding").get_weights()[0][tokenizer.word_index["man"]]
woman_vector = glove_model.get_layer("context_embedding").get_weights()[0][tokenizer.word_index["woman"]]

In [20]:
tokenizer.word_index["Queen"] ## doesnn't exist in the word index

KeyError: 'Queen'

In [21]:
# application of analogy to calculate Queen vector

queen_vector = king_vector - man_vector + woman_vector
print(len(queen_vector))
print(queen_vector)

128
[-0.1072585   0.25532705 -0.16392715 -0.59597886 -0.10661711  0.09065117
  0.1273671   0.08389243  0.40643725 -0.11784977 -0.19390135  0.46458665
  0.23958902 -0.29843414  0.13671944  0.33693662 -0.1761643   0.11873163
  0.0778905   0.11633837 -0.42010018 -0.23044534 -0.481146   -0.3766451
  0.12563775  0.5352857   0.05852748  0.16316634  0.33384562  0.4587633
  0.11799078 -0.18976457  0.19446024  0.19190978  0.13035901  0.18002236
 -0.26145184 -0.02780539  0.21991275 -0.3134792   0.13610429 -0.2726212
 -0.06337574 -0.01761575  0.04404603 -0.11970913  0.33556253  0.31643936
  0.10766982  0.28283274  0.2290895  -0.34123135 -0.11084142 -0.38804632
 -0.12532598  0.4942302  -0.17592773 -0.0310415  -0.4120087   0.5502757
  0.09279455  0.33266354 -0.06980092 -0.01942305 -0.357976    0.46552268
  0.10120413  0.0862118   0.35329393 -0.1342226   0.11715373  0.05425069
  0.19657649 -0.11257687  0.23719537 -0.30711657  0.21943654  0.32002527
 -0.43137053  0.23663983 -0.2469813   0.2050604  -0