In [485]:
import os
import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords
import string
import re
from gensim.models import Word2Vec
from gensim.models.phrases import Phrases, Phraser
import logging
logging.basicConfig(format="%(levelname)s - %(asctime)s: %(message)s", datefmt= '%H:%M:%S', level=logging.INFO)
from nltk import bigrams
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.calibration import CalibratedClassifierCV
from keras.layers import Dense
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN
from keras.layers import LSTM, GlobalMaxPooling1D
from keras.initializers import Constant
import keras.metrics
from numpy import newaxis
from keras.preprocessing.text import Tokenizer
from sklearn import metrics

## Reading in the Data:

We have used all the tweets from 2018 (train and test) to create the embeddings. However, balanced datasets have been used to train and test the model. The following code reads in all the data. Note, we are only considering earthquakes and floods in the following sections and we will build separate models for each earthquakes and floods. 

In [2]:
os.chdir('../10_Data/30_Balanced Tweets (Crit = High = Medium = Low)/10_2018 Train')

In [3]:
df_e1 = pd.read_csv('earthquake_TREC_2018_train_BALANCED.csv')
df_f1 = pd.read_csv('flood_TREC_2018_train_BALANCED.csv')

In [4]:
os.chdir('../15_2018 Test')

In [5]:
df_e2 = pd.read_csv('earthquake_TREC_2018_test_BALANCED.csv')
df_f2 = pd.read_csv('flood_TREC_2018_test_BALANCED.csv')

In [6]:
os.chdir('../../20_Extracted Tweets/10_2018 Train')

In [7]:
df_e1_embed = pd.read_csv('Earthquake_TREC_2018_train.csv')
df_f1_embed = pd.read_csv('flood_TREC_2018_train.csv')

In [8]:
os.chdir('../15_2018 Test')

In [9]:
df_e2_embed = pd.read_csv('Earthquake_TREC_2018_test.csv')
df_f2_embed = pd.read_csv('Floods_TREC_2018_test.csv')

## Combining dataframes
We are now combine earthquake tweets into one df and flood tweets into another df. We have subsetting on the tweets and priority columns. We do this for both the embedding dataframes and our balanced datasets, although we will not use the balanced datasets until later.

In [520]:
df_quake_e = pd.DataFrame()
df_quake_e['Tweet'] = pd.concat([df_e1_embed['Tweet'] , df_e2_embed['Tweet']])
df_quake_e['Priority'] = pd.concat([df_e1_embed['Priority'] , df_e2_embed['Priority']])

In [521]:
df_flood_e = pd.DataFrame()
df_flood_e['Tweet'] = pd.concat([df_f1_embed['Tweet'] , df_f2_embed['Tweet']])
df_flood_e['Priority'] = pd.concat([df_f1_embed['Priority'] , df_f2_embed['Priority']])

In [522]:
df_quake = pd.DataFrame()
df_quake['Tweet'] = pd.concat([df_e1['Tweet'] , df_e2['Tweet']])
df_quake['Priority'] = pd.concat([df_e1['Priority'] , df_e2['Priority']])

In [523]:
df_flood = pd.DataFrame()
df_flood['Tweet'] = pd.concat([df_f1['Tweet'] , df_f2['Tweet']])
df_flood['Priority'] = pd.concat([df_f1['Priority'] , df_f2['Priority']])

Cross checking the shapes to make sure they match. **They do match**

In [524]:
df_quake_e.shape, df_flood_e.shape, df_quake.shape, df_flood.shape

((5140, 2), (2518, 2), (100, 2), (120, 2))

## Converting to categorical (0 & 1)

We will now define a function to convert the priority to a categorical 0 & 1. This will be necessary when we train a model.

In [525]:
'''
Creating a categorical variable to keep label critical tweets as 1 and 0 otherwise
'''
def to_categorical(array):
    t = []
    for element in array:
        if element =='Critical':
            t.append(1)
        else:
            t.append(0)
        
    return (t)


## Pre-Processing

We shall now perform a series of pre processing to our tweets. This includes tokenizing them, removing stop words and lemmatising them.  

**I also wrote a line of code to remove the links in the tweets as it was throwing in a lot of gibberish into the emebddings.** The link is almost always at the end of the tweet so its relatively easy to remove. 

I left the punctuation marks in. We have to take a call on this later.

In [526]:
def preProcess(df):
    df['Tweet'] = df['Tweet'].astype('str')
    
    df['Tweet'] = df['Tweet'].apply(lambda x: re.split('http?s *: *\/\/.*', str(x))[0])
    
    token_array = []
    #for tweet in df['Tweet']:
    #    translator = str.maketrans(string.punctuation, ' '*len(string.punctuation))
    #    tweet = tweet.translate(translator)
    #    token_tweet = word_tokenize(tweet)
    #    token_array.append(token_tweet)
    
    for tweet in df['Tweet']:
        token_tweet = word_tokenize(tweet)
        token_array.append(token_tweet)
        
    stop_words=set(stopwords.words("english"))
    filtered_token_array=[]
    for tweet in token_array:
        filtered_tweet = []
        for word in tweet:
                if word not in stop_words:
                    filtered_tweet.append(word)
        filtered_token_array.append(filtered_tweet)
        
    lem = WordNetLemmatizer()

    lemmatized_array=[]
    for tweet in filtered_token_array:
        lemmatized_tweet = []
        for word in tweet:
            lemmatized_tweet.append(lem.lemmatize(word,'v'))
        lemmatized_array.append(lemmatized_tweet)
    
    lemmatized_array_join = []
    for element in lemmatized_array:
        lemmatized_array_join.append(' '.join(element))
        
    return (lemmatized_array_join)


The tweets have been pre processed and we now have a collection of tweets. Word2Vec takes in a list of words together contained in a bigger list (Corpus -> List of Tweet -> List of Words).

In [527]:
l_quake_e = preProcess(df_quake_e)
l_flood_e = preProcess(df_flood_e)

list_of_words_quake = []
list_of_words_flood = []

for tweet in l_quake_e:
        token_tweet = word_tokenize(tweet)
        list_of_words_quake.append(token_tweet)
        
for tweet in l_flood_e:
        token_tweet = word_tokenize(tweet)
        list_of_words_flood.append(token_tweet)
        

## Creating a Word2Vec model

We will now initialise a word2vec model. This takes in a paramter 'min_count' which is the number of minimum occurences of a word required for it be included in the embeddings. **For now we set min_count = 1**. I did this because we are creating our own embeddings (not picking from wikipedia embeddings etc) and our data is not that huge. 

In [528]:
'''
min_count = int - Ignores all words with total absolute frequency lower than this - (2, 100)

window = int - The maximum distance between the current and predicted word within a sentence. 
        E.g. window words on the left and window words on the left of our target - (2, 10)

size = int - Dimensionality of the feature vectors. - (50, 300)

sample = float - The threshold for configuring which higher-frequency words are randomly downsampled. 
        Highly influencial. - (0, 1e-5)

alpha = float - The initial learning rate - (0.01, 0.05)

min_alpha = float - Learning rate will linearly drop to min_alpha as training progresses. 
        To set it: alpha - (min_alpha * epochs) ~ 0.00

negative = int - If > 0, negative sampling will be used, the int for negative specifies how many "noise words" 
        should be drown. If set to 0, no negative sampling is used. - (5, 20)
        
workers = int - Use these many worker threads to train the model (=faster training with multicore machines)
'''
w2v_quake = Word2Vec(min_count=1,
                     window=4,
                     size=100,
                     sample=6e-5, 
                     alpha=0.03, 
                     min_alpha=0.0007, 
                     negative=20)
w2v_flood = Word2Vec(min_count=1,
                     window=4,
                     size=100,
                     sample=6e-5, 
                     alpha=0.03, 
                     min_alpha=0.0007, 
                     negative=20)

We will now build our vocabulary. Do not run the following code multiple times before initialising the w2v instance again from above.

In [529]:
w2v_quake.build_vocab(list_of_words_quake)
w2v_flood.build_vocab(list_of_words_flood)

INFO - 14:08:40: collecting all words and their counts
INFO - 14:08:40: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO - 14:08:40: collected 13732 word types from a corpus of 70267 raw words and 5140 sentences
INFO - 14:08:40: Loading a fresh vocabulary
INFO - 14:08:40: effective_min_count=1 retains 13732 unique words (100% of original 13732, drops 0)
INFO - 14:08:40: effective_min_count=1 leaves 70267 word corpus (100% of original 70267, drops 0)
INFO - 14:08:40: deleting the raw counts dictionary of 13732 items
INFO - 14:08:40: sample=6e-05 downsamples 669 most-common words
INFO - 14:08:40: downsampling leaves estimated 33474 word corpus (47.6% of prior 70267)
INFO - 14:08:40: estimated required memory for 13732 words and 100 dimensions: 17851600 bytes
INFO - 14:08:40: resetting layer weights
INFO - 14:08:42: collecting all words and their counts
INFO - 14:08:42: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO - 14:08:42: collected 9614 

The following code is to view the vocabulary that we created

In [530]:
vocabulary_quake = w2v_quake.wv.vocab
vocabulary_flood = w2v_flood.wv.vocab
vocabulary_quake

{'ã€�': <gensim.models.keyedvectors.Vocab at 0x1d27abb1080>,
 '#': <gensim.models.keyedvectors.Vocab at 0x1d201fafba8>,
 'USGS': <gensim.models.keyedvectors.Vocab at 0x1d201fafb38>,
 'Breakingã€': <gensim.models.keyedvectors.Vocab at 0x1d201faf978>,
 '‘': <gensim.models.keyedvectors.Vocab at 0x1d201faf940>,
 'M': <gensim.models.keyedvectors.Vocab at 0x1d201faf780>,
 '1.1': <gensim.models.keyedvectors.Vocab at 0x1d201fafd68>,
 ',': <gensim.models.keyedvectors.Vocab at 0x1d201fafb70>,
 '28km': <gensim.models.keyedvectors.Vocab at 0x1d201fafe10>,
 'SSW': <gensim.models.keyedvectors.Vocab at 0x1d201fafb00>,
 'Fairbanks': <gensim.models.keyedvectors.Vocab at 0x1d201fafdd8>,
 'Alaska': <gensim.models.keyedvectors.Vocab at 0x1d201faff28>,
 'http': <gensim.models.keyedvectors.Vocab at 0x1d201fafa20>,
 ':': <gensim.models.keyedvectors.Vocab at 0x1d27bbe5e80>,
 '//t.co/hSyciQFM': <gensim.models.keyedvectors.Vocab at 0x1d203657b00>,
 'PastHour': <gensim.models.keyedvectors.Vocab at 0x1d2036578d0>

In [531]:
w2v_quake.train(list_of_words_quake, total_examples=w2v_quake.corpus_count, epochs=30, report_delay=1)
w2v_flood.train(list_of_words_flood, total_examples=w2v_quake.corpus_count, epochs=30, report_delay=1)

INFO - 14:08:46: training model with 3 workers on 13732 vocabulary and 100 features, using sg=0 hs=0 sample=6e-05 negative=20 window=4
INFO - 14:08:46: worker thread finished; awaiting finish of 2 more threads
INFO - 14:08:46: worker thread finished; awaiting finish of 1 more threads
INFO - 14:08:46: worker thread finished; awaiting finish of 0 more threads
INFO - 14:08:46: EPOCH - 1 : training on 70267 raw words (33444 effective words) took 0.2s, 209666 effective words/s
INFO - 14:08:46: worker thread finished; awaiting finish of 2 more threads
INFO - 14:08:46: worker thread finished; awaiting finish of 1 more threads
INFO - 14:08:46: worker thread finished; awaiting finish of 0 more threads
INFO - 14:08:46: EPOCH - 2 : training on 70267 raw words (33457 effective words) took 0.1s, 307183 effective words/s
INFO - 14:08:47: worker thread finished; awaiting finish of 2 more threads
INFO - 14:08:47: worker thread finished; awaiting finish of 1 more threads
INFO - 14:08:47: worker thread 

INFO - 14:08:49: EPOCH - 24 : training on 70267 raw words (33361 effective words) took 0.1s, 415091 effective words/s
INFO - 14:08:49: worker thread finished; awaiting finish of 2 more threads
INFO - 14:08:49: worker thread finished; awaiting finish of 1 more threads
INFO - 14:08:49: worker thread finished; awaiting finish of 0 more threads
INFO - 14:08:49: EPOCH - 25 : training on 70267 raw words (33405 effective words) took 0.1s, 366596 effective words/s
INFO - 14:08:49: worker thread finished; awaiting finish of 2 more threads
INFO - 14:08:49: worker thread finished; awaiting finish of 1 more threads
INFO - 14:08:49: worker thread finished; awaiting finish of 0 more threads
INFO - 14:08:49: EPOCH - 26 : training on 70267 raw words (33369 effective words) took 0.1s, 335774 effective words/s
INFO - 14:08:49: worker thread finished; awaiting finish of 2 more threads
INFO - 14:08:49: worker thread finished; awaiting finish of 1 more threads
INFO - 14:08:49: worker thread finished; await

INFO - 14:08:51: worker thread finished; awaiting finish of 1 more threads
INFO - 14:08:51: worker thread finished; awaiting finish of 0 more threads
INFO - 14:08:51: EPOCH - 14 : training on 48396 raw words (22207 effective words) took 0.1s, 429111 effective words/s
INFO - 14:08:51: worker thread finished; awaiting finish of 2 more threads
INFO - 14:08:51: worker thread finished; awaiting finish of 1 more threads
INFO - 14:08:51: worker thread finished; awaiting finish of 0 more threads
INFO - 14:08:51: EPOCH - 15 : training on 48396 raw words (22364 effective words) took 0.1s, 430682 effective words/s
INFO - 14:08:51: worker thread finished; awaiting finish of 2 more threads
INFO - 14:08:51: worker thread finished; awaiting finish of 1 more threads
INFO - 14:08:51: worker thread finished; awaiting finish of 0 more threads
INFO - 14:08:51: EPOCH - 16 : training on 48396 raw words (22348 effective words) took 0.1s, 394243 effective words/s
INFO - 14:08:51: worker thread finished; await

(669166, 1451880)

The following code allows us to see words closely related (positively or negatively) with any other word. In this case we put in the word 'help'.

In [532]:
w2v_quake.wv.most_similar(positive=["dead"])

INFO - 14:08:52: precomputing L2-norms of word weight vectors


[('Powerful', 0.9994490146636963),
 ('rock', 0.9993658661842346),
 ('powerful', 0.9993584156036377),
 ('Italy', 0.9993373155593872),
 ('6', 0.9993027448654175),
 ('A', 0.9993012547492981),
 ('northern', 0.9992755651473999),
 ('USGS', 0.9991604089736938),
 ('report', 0.9991366863250732),
 ('BreakingNews', 0.9991248846054077)]

We will now convert these word2vec embeddings into a pandas dataframe. This can be converted into a csv for use later in maybe R. We will also explore ways to use this in an SVM model to classify our tweets. 

In [583]:
word_list_quake = []
vector_list_quake = []
for word in w2v_quake.wv.vocab:
    word_list_quake.append(word)
    vector_list_quake.append(w2v_quake.wv.get_vector(word))
    
word_list_flood = []
vector_list_flood = []
for word in w2v_flood.wv.vocab:
    word_list_flood.append(word)
    vector_list_flood.append(w2v_flood.wv.get_vector(word))

In [584]:
quake_embeddings = pd.DataFrame()
quake_embeddings['Word'] = word_list_quake
quake_embeddings['Vector'] = vector_list_quake

In [585]:
flood_embeddings = pd.DataFrame()
flood_embeddings['Word'] = word_list_flood
flood_embeddings['Vector'] = vector_list_flood

In [586]:
quake_embeddings.to_csv('Word2Vec_Earthquake.csv')
flood_embeddings.to_csv('Word2Vec_Flood.csv')

## RNN LSTM Classifier

### Creating Word2Vec Vectors for Train Data:

Before we can implement the classifier, we need to convert our sentences (from balanced df) to a word2vec representation using our word2vec model that we trained above. This was tricky. All vectors corresponding to the tweets have to be the same length as the longest one. However, this does not mean we simply pad a 0 to the end of all the tweets but we need to append an array size (100,) beacuse each word in our word2vec is in 100 dimensions.

In [537]:
def word2vec_convert(all_tweets, model):
    
    max_tweet_length = 0
    for tweet in all_tweets:
        if len(word_tokenize(tweet))>max_tweet_length:
            max_tweet_length = len(word_tokenize(tweet))
            
    whole_vec = np.zeros((len(all_tweets), max_tweet_length, 100))
    num = 0
    for tweet in all_tweets:
        tweet_vec = np.zeros(shape = (max_tweet_length, 100))
        vec = np.zeros(100)
        for i in range(len(word_tokenize(tweet))):
            try:
                vec = np.add(vec, model[word_tokenize(tweet)[i]])
                tweet_vec[i] = vec 
                num = num + 1
            except:
                pass
        whole_vec[i] = tweet_vec
    return(whole_vec, num)

Breaking the quake df into 3 different dfs. Each df with have equal number of (citical, high), (critical, med) and (critical, low} tweets. We will analyse how the model does w.r.t to each of these categories. We are then getting the respective embeddings for each df. The embeddings will be in the shape of (50, 34, 100) aka (50 tweets, max padded length 34 and 100 dims)

In [538]:
#change made here...
df_quake_low = df_quake[(df_quake['Priority'] == 'Critical') | (df_quake['Priority'] == 'Low')]
df_quake_med = df_quake[(df_quake['Priority'] == 'Critical') | (df_quake['Priority'] == 'Medium')]
df_quake_high = df_quake[(df_quake['Priority'] == 'Critical') | (df_quake['Priority'] == 'High')]

l_quake_l = preProcess(df_quake_low)
word2vec_convert_quake_l, num_q_l = word2vec_convert(l_quake_l, w2v_quake)

l_quake_m = preProcess(df_quake_med)
word2vec_convert_quake_m, num_q_m = word2vec_convert(l_quake_m, w2v_quake)

l_quake_h = preProcess(df_quake_high)
word2vec_convert_quake_h, num_q_h = word2vec_convert(l_quake_h, w2v_quake)


t_quake_low = to_categorical(df_quake_low['Priority'])
t_quake_med = to_categorical(df_quake_med['Priority'])
t_quake_high = to_categorical(df_quake_high['Priority'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
  from ipykernel import kernelapp as app


In [539]:
word2vec_convert_quake_l_svm = word2vec_convert_quake_l.transpose(0,1,2).reshape(word2vec_convert_quake_l.shape[0],word2vec_convert_quake_l.shape[1]*word2vec_convert_quake_l.shape[2])
word2vec_convert_quake_l_svm = word2vec_convert_quake_l_svm[:, :, newaxis]

In [540]:
model = keras.Sequential()                    
model.add(LSTM(32, input_shape = (word2vec_convert_quake_l_svm.shape[1], word2vec_convert_quake_l_svm.shape[2]),
              return_sequences = False))
model.add(Dense(16,activation='relu'))
model.add(Dense(1,activation='sigmoid')) 

In [541]:
model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['acc', keras.metrics.Precision(), keras.metrics.Recall(), keras.metrics.AUC()])

In [542]:
history = model.fit(word2vec_convert_quake_l_svm , t_quake_low , batch_size=10, epochs=20, validation_split=0.3, shuffle=True)

Train on 35 samples, validate on 15 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [544]:
y_pred = model.predict(history.validation_data[0])

In [545]:
y_pred

array([[0.7286086],
       [0.7286086],
       [0.7286086],
       [0.7286086],
       [0.7286086],
       [0.7286086],
       [0.7286086],
       [0.7286086],
       [0.7286086],
       [0.7286086],
       [0.7286086],
       [0.7286086],
       [0.7286086],
       [0.7286086],
       [0.7286086]], dtype=float32)

In [546]:
word2vec_convert_quake_l.shape

(50, 34, 100)

In [339]:
word2vec_convert_quake_l[1][0].shape

(100,)

In [355]:
df_quake_low[df_quake_low['Priority'] == 'Critical'].shape

(25, 2)

In [358]:
history.on_train_batch_begin

<bound method Callback.on_train_batch_begin of <keras.callbacks.callbacks.History object at 0x000001D269253BA8>>

In [508]:
word2vec_convert_quake_l_svm = word2vec_convert_quake_l.transpose(0,1,2).reshape(word2vec_convert_quake_l.shape[0],word2vec_convert_quake_l.shape[1]*word2vec_convert_quake_l.shape[2])
word2vec_convert_quake_l_svm = word2vec_convert_quake_l_svm[:, :, newaxis]

In [497]:
X_train, X_val, Y_train, Y_val = train_test_split(word2vec_convert_quake_l_svm , t_quake_low, test_size=0.2, random_state=100)

clf = SGDClassifier(loss = 'hinge', alpha = 0.00001, max_iter=10000, tol=1e-6
                                                       , shuffle = True, learning_rate = 'optimal', penalty='l1')
clf.fit(X_train, Y_train)
y_pred = clf.predict(X_val)
'''
    Calibrating above model to yield probabilities. SGD with hinge loss does not spit out prob. It gives the value of 
    the decision function aka the value of the function across th hyperplane but that does not help us in this case. 
    Using loss as log or something else will give prob but we want to do SVM and not logistic regression. 
'''
model = CalibratedClassifierCV(clf)
model.fit(X_train, Y_train)
proba_cl = model.predict_proba(X_val)[:, 1]

  E = np.exp(AB[0] * F + AB[1])
  TEP_minus_T1P = P * (T * E - T1)
  E = np.exp(AB[0] * F + AB[1])
  TEP_minus_T1P = P * (T * E - T1)
  E = np.exp(AB[0] * F + AB[1])
  TEP_minus_T1P = P * (T * E - T1)


In [498]:
metrics.recall_score(Y_val, y_pred)

1.0

In [499]:
metrics.precision_score(Y_val, y_pred)

1.0

In [500]:
y_pred

array([1, 0, 0, 0, 0, 0, 1, 0, 1, 0])

In [501]:
proba_cl

array([0.54305556, 0.54305556, 0.54305556, 0.54305556, 0.54305556,
       0.54305556, 0.54305556, 0.54305556, 0.54305556, 0.54305556])

In [502]:
Y_val

[1, 0, 0, 0, 0, 0, 1, 0, 1, 0]

In [505]:
from sklearn.metrics import roc_auc_score
roc_auc_score(Y_val, proba_cl)

0.5

In [550]:
os.getcwd()

'C:\\Users\\Vishaal\\Documents\\GitHub\\TREC_Distributed_Machine_Learning\\TREC'

In [554]:
os.chdir('30_Models/Embeddings')

In [555]:
df = pd.read_csv('Word2Vec_Earthquake.csv')

In [587]:
len(df['Vector'][0].split(' '))

170

In [568]:
len(df['Vector'])

13732

In [573]:
len(quake_embeddings['Vector'][0])

100

In [588]:
df['Vector'][0]

'[-0.42930934  0.30210933  0.21056513  0.03452663  0.42126256 -0.10403582\n  0.27250922  0.63893014 -0.02373877 -0.21439472  0.00800843 -0.19286107\n  0.34826067  0.1199688   0.27746996 -0.45279133 -0.531329    0.08670937\n -0.05606615  0.35599726  0.256648    0.45442337  0.01842661 -0.2624241\n -0.2438972  -0.07291157 -0.13600281 -0.21213311 -0.51666    -0.25144532\n -0.15743652  0.05947116 -0.2677446  -0.04593701 -0.11057325  0.17864665\n -0.13757832 -0.1789104  -0.28575233 -0.23100701  0.21304555  0.13309859\n  0.01255217  0.21739659 -0.22655548 -0.21831153  0.13235144 -0.31877106\n  0.22851521 -0.20508546  0.01989687  0.28824607 -0.29692942  0.11683045\n  0.0806662   0.10847174 -0.3374259   0.00975608  0.05762445 -0.19405909\n  0.0714239  -0.2795089   0.015348   -0.3778789  -0.04553798  0.31654674\n -0.12942529 -0.07994153  0.17521359 -0.08295401  0.15600537 -0.35787982\n -0.05917545 -0.11063447 -0.5011865  -0.0458384  -0.17793064  0.05883675\n  0.3478436  -0.01042942 -0.1623629   