# Install and import nltk

In [1]:
import nltk
#nltk.download()

In [2]:
#to see what attributes and functions we have with nltk
dir(nltk)

['AbstractLazySequence',
 'AffixTagger',
 'AlignedSent',
 'Alignment',
 'AnnotationTask',
 'ApplicationExpression',
 'Assignment',
 'BigramAssocMeasures',
 'BigramCollocationFinder',
 'BigramTagger',
 'BinaryMaxentFeatureEncoding',
 'BlanklineTokenizer',
 'BllipParser',
 'BottomUpChartParser',
 'BottomUpLeftCornerChartParser',
 'BottomUpProbabilisticChartParser',
 'Boxer',
 'BrillTagger',
 'BrillTaggerTrainer',
 'CFG',
 'CRFTagger',
 'CfgReadingCommand',
 'ChartParser',
 'ChunkParserI',
 'ChunkScore',
 'Cistem',
 'ClassifierBasedPOSTagger',
 'ClassifierBasedTagger',
 'ClassifierI',
 'ConcordanceIndex',
 'ConditionalExponentialClassifier',
 'ConditionalFreqDist',
 'ConditionalProbDist',
 'ConditionalProbDistI',
 'ConfusionMatrix',
 'ContextIndex',
 'ContextTagger',
 'ContingencyMeasures',
 'CoreNLPDependencyParser',
 'CoreNLPParser',
 'Counter',
 'CrossValidationProbDist',
 'DRS',
 'DecisionTreeClassifier',
 'DefaultTagger',
 'DependencyEvaluator',
 'DependencyGrammar',
 'DependencyGrap

In [3]:
#stopwords are words that does not have a meaning seperate from the sentence 
from nltk.corpus import stopwords

stopwords.words('english')[0:5]

['i', 'me', 'my', 'myself', 'we']

In [4]:
# to see the first 500 stopwords with the interval of 25 (1. then 26. then 51...)
stopwords.words('english')[0:500:25]

['i', 'herself', 'been', 'with', 'here', 'very', 'doesn', 'won']

# Reading text data

In [9]:
import pandas as pd

messages = pd.read_csv('spam.csv', encoding ='latin-1')
messages.head()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [10]:
messages = messages.drop(labels = ['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis = 1)
messages.columns = ["label","text"]
messages.head()

Unnamed: 0,label,text
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [11]:
messages.shape

(5572, 2)

In [12]:
#to see the amount of data we have for each category
messages['label'].value_counts()

ham     4825
spam     747
Name: label, dtype: int64

In [13]:
#to see if we have any missing data
print('number of nulls in labels : {}'.format(messages['label'].isnull().sum()))
print('number of nulls in text : {}'.format(messages['text'].isnull().sum()))

number of nulls in labels : 0
number of nulls in text : 0


# Cleaning text data
there are 3 steps to clean text data:
1. removing punctuation
2. Tokenization
3. removing stopwords

In [14]:
import string

string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [15]:
#defining a function that will remove punctuation
def remove_punct(text): 
    text= ''.join([char for char in text if char not in string.punctuation])
    return text

In [16]:
#now we will apply it to our data with lambda function
messages['text_clean'] = messages['text'].apply(lambda x : remove_punct(x))
messages.head()

Unnamed: 0,label,text,text_clean
0,ham,"Go until jurong point, crazy.. Available only ...",Go until jurong point crazy Available only in ...
1,ham,Ok lar... Joking wif u oni...,Ok lar Joking wif u oni
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...,U dun say so early hor U c already then say
4,ham,"Nah I don't think he goes to usf, he lives aro...",Nah I dont think he goes to usf he lives aroun...


In [17]:
#defining a function that will tokenize our cleaned text
import re

def tokenize(text):
    tokens =  re.split('\W+',text)
    return tokens

messages['text_tokenized'] = messages['text_clean'].apply(lambda x: tokenize(x.lower()))
messages.head()

Unnamed: 0,label,text,text_clean,text_tokenized
0,ham,"Go until jurong point, crazy.. Available only ...",Go until jurong point crazy Available only in ...,"[go, until, jurong, point, crazy, available, o..."
1,ham,Ok lar... Joking wif u oni...,Ok lar Joking wif u oni,"[ok, lar, joking, wif, u, oni]"
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,Free entry in 2 a wkly comp to win FA Cup fina...,"[free, entry, in, 2, a, wkly, comp, to, win, f..."
3,ham,U dun say so early hor... U c already then say...,U dun say so early hor U c already then say,"[u, dun, say, so, early, hor, u, c, already, t..."
4,ham,"Nah I don't think he goes to usf, he lives aro...",Nah I dont think he goes to usf he lives aroun...,"[nah, i, dont, think, he, goes, to, usf, he, l..."


In [18]:
#to avoid confusion you need to add this code since stopwords is the variable that's pointing to the CorpusReader object in nltk
#The actual stopwords (i.e. a list of stopwords) you're looking for can be adressed as :
stop_words = set(stopwords.words("english"))

In [19]:
#now we will define a function to eliminate stopwords so our model will have a better understanding from prepared text

def remove_stopwords(tokenized_text):
    text = [word for word in tokenized_text if word not in stop_words]
    return text

messages['text_nostop'] = messages['text_tokenized'].apply(lambda x: remove_stopwords(x))
messages.head()

Unnamed: 0,label,text,text_clean,text_tokenized,text_nostop
0,ham,"Go until jurong point, crazy.. Available only ...",Go until jurong point crazy Available only in ...,"[go, until, jurong, point, crazy, available, o...","[go, jurong, point, crazy, available, bugis, n..."
1,ham,Ok lar... Joking wif u oni...,Ok lar Joking wif u oni,"[ok, lar, joking, wif, u, oni]","[ok, lar, joking, wif, u, oni]"
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,Free entry in 2 a wkly comp to win FA Cup fina...,"[free, entry, in, 2, a, wkly, comp, to, win, f...","[free, entry, 2, wkly, comp, win, fa, cup, fin..."
3,ham,U dun say so early hor... U c already then say...,U dun say so early hor U c already then say,"[u, dun, say, so, early, hor, u, c, already, t...","[u, dun, say, early, hor, u, c, already, say]"
4,ham,"Nah I don't think he goes to usf, he lives aro...",Nah I dont think he goes to usf he lives aroun...,"[nah, i, dont, think, he, goes, to, usf, he, l...","[nah, dont, think, goes, usf, lives, around, t..."


In [20]:
#so we learned how to make this steps part by part. Now we will make it one function to move on with TF-IDF process
def clean_text(text):
    text= ''.join([word.lower() for word in text if word not in string.punctuation])
    tokens =  re.split('\W+',text)
    text = [word for word in tokens if word not in stop_words]
    return text

In [21]:
# now we will use TF-IDF to make our text data numerical.The way that it is stored with this method is: sparse matrix
#use analyzer as preprocessing function and then fit transform the data. The first element (number of text) is same (5572) while
#the second part is not 2 since it represents all the unique word our TF-IDF detects
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vect = TfidfVectorizer(analyzer=clean_text)
X_tfidf = tfidf_vect.fit_transform(messages['text'])
print(X_tfidf.shape)
print(tfidf_vect.get_feature_names())

(5572, 9395)


In [22]:
# now to train a model we need to make our sparse matrix a pandas dataframe 

X_features = pd.DataFrame(X_tfidf.toarray())
X_features.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,9385,9386,9387,9388,9389,9390,9391,9392,9393,9394
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
from sklearn.ensemble import RandomForestClassifier
print(RandomForestClassifier)

<class 'sklearn.ensemble._forest.RandomForestClassifier'>


In [24]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_features, messages['label'],test_size=0.2)

In [25]:
rf = RandomForestClassifier()
rf_model = rf.fit(X_train, y_train)

In [26]:
y_pred = rf_model.predict(X_test)
from sklearn.metrics import precision_score, recall_score

precision= precision_score(y_test, y_pred, pos_label = 'spam')
recall = recall_score(y_test, y_pred, pos_label = 'spam')
print('Precision : {} / Recall : {}'.format(round(precision,3),round(recall,3)))

Precision : 0.992 / Recall : 0.831


# word2vec

In [27]:
#to get better results we can use pretrained embaddings from known websites, they will give better results
!pip install -U gensim



In [28]:
import gensim.downloader as api

wiki_embeddings = api.load('glove-wiki-gigaword-100')



In [29]:
#let's see a vector embedding for a spesific word
wiki_embeddings['king']

array([-0.32307 , -0.87616 ,  0.21977 ,  0.25268 ,  0.22976 ,  0.7388  ,
       -0.37954 , -0.35307 , -0.84369 , -1.1113  , -0.30266 ,  0.33178 ,
       -0.25113 ,  0.30448 , -0.077491, -0.89815 ,  0.092496, -1.1407  ,
       -0.58324 ,  0.66869 , -0.23122 , -0.95855 ,  0.28262 , -0.078848,
        0.75315 ,  0.26584 ,  0.3422  , -0.33949 ,  0.95608 ,  0.065641,
        0.45747 ,  0.39835 ,  0.57965 ,  0.39267 , -0.21851 ,  0.58795 ,
       -0.55999 ,  0.63368 , -0.043983, -0.68731 , -0.37841 ,  0.38026 ,
        0.61641 , -0.88269 , -0.12346 , -0.37928 , -0.38318 ,  0.23868 ,
        0.6685  , -0.43321 , -0.11065 ,  0.081723,  1.1569  ,  0.78958 ,
       -0.21223 , -2.3211  , -0.67806 ,  0.44561 ,  0.65707 ,  0.1045  ,
        0.46217 ,  0.19912 ,  0.25802 ,  0.057194,  0.53443 , -0.43133 ,
       -0.34311 ,  0.59789 , -0.58417 ,  0.068995,  0.23944 , -0.85181 ,
        0.30379 , -0.34177 , -0.25746 , -0.031101, -0.16285 ,  0.45169 ,
       -0.91627 ,  0.64521 ,  0.73281 , -0.22752 , 

In [30]:
#to find to words that are most similar to king based on trained vectors
wiki_embeddings.most_similar('king')

[('prince', 0.7682328820228577),
 ('queen', 0.7507690787315369),
 ('son', 0.7020888328552246),
 ('brother', 0.6985775232315063),
 ('monarch', 0.6977890729904175),
 ('throne', 0.6919989585876465),
 ('kingdom', 0.6811409592628479),
 ('father', 0.6802029013633728),
 ('emperor', 0.6712858080863953),
 ('ii', 0.6676074266433716)]

In [31]:
#now let's see the same example with word2vec
import pandas as pd
messages = pd.read_csv('spam.csv', encoding ='latin-1')
messages = messages.drop(labels = ['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis = 1)
messages.columns = ["label","text"]
messages.head()

Unnamed: 0,label,text
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [32]:
import gensim

In [33]:
messages['text_clean'] = messages['text'].apply(lambda x : gensim.utils.simple_preprocess(x))
messages.head(3)

Unnamed: 0,label,text,text_clean
0,ham,"Go until jurong point, crazy.. Available only ...","[go, until, jurong, point, crazy, available, o..."
1,ham,Ok lar... Joking wif u oni...,"[ok, lar, joking, wif, oni]"
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,"[free, entry, in, wkly, comp, to, win, fa, cup..."


In [34]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(messages['text_clean'], messages['label'],test_size=0.2)

In [35]:
#now we will train the word2vec model which comes with gensim package
w2v_model = gensim.models.Word2Vec(X_train,
                                   vector_size=100,
                                   window=5,
                                   min_count=2)

In [36]:
w2v_model.wv['king']

array([-0.02081725,  0.07449958,  0.01447177, -0.00847738,  0.00161875,
       -0.10837304,  0.04307782,  0.14232516, -0.07051765, -0.01112613,
       -0.01442211, -0.12432382, -0.01226864,  0.03202547,  0.02590023,
       -0.0443382 ,  0.02689186, -0.06018126, -0.0022497 , -0.13999197,
        0.04621976,  0.01644645,  0.04424754, -0.054825  ,  0.00960807,
        0.00436651, -0.06250426, -0.04075088, -0.06527624,  0.00978912,
        0.08871414,  0.03629505,  0.02396675, -0.05548836, -0.02967536,
        0.09529832,  0.02416985, -0.05422799, -0.05215971, -0.10815073,
        0.01894129, -0.0567477 , -0.02595316, -0.0013059 ,  0.06460064,
       -0.00926084, -0.05352699, -0.00282671,  0.0303377 ,  0.0338051 ,
        0.05092454, -0.05619075, -0.03278986,  0.01520036, -0.05309113,
        0.04642779,  0.03700001, -0.00691838, -0.08515224,  0.00495209,
        0.03693974, -0.00403794,  0.02377302, -0.00907095, -0.09080054,
        0.0552916 ,  0.03557732,  0.05203705, -0.10172395,  0.07

In [37]:
w2v_model.wv.most_similar('king')

[('girl', 0.9942992329597473),
 ('finish', 0.994294285774231),
 ('does', 0.9942395687103271),
 ('his', 0.9942319989204407),
 ('gift', 0.9942036867141724),
 ('goin', 0.9941839575767517),
 ('called', 0.9941750168800354),
 ('dunno', 0.9941504597663879),
 ('thing', 0.9941375851631165),
 ('baby', 0.994105875492096)]

In [38]:
#we can observe that when we used wikipedia embedding similar words made more sense

In [39]:
#this represents aLL od the words our model created a vector for. ( all words that appears training data more than 2)
w2v_model.wv.index_to_key

['to',
 'you',
 'the',
 'and',
 'in',
 'is',
 'me',
 'it',
 'my',
 'for',
 'your',
 'of',
 'call',
 'that',
 'have',
 'on',
 'now',
 'are',
 'can',
 'but',
 'so',
 'or',
 'not',
 'at',
 'do',
 'we',
 'get',
 'if',
 'ur',
 'be',
 'no',
 'with',
 'just',
 'will',
 'this',
 'gt',
 'lt',
 'how',
 'free',
 'up',
 'ok',
 'what',
 'from',
 'go',
 'out',
 'when',
 'know',
 'all',
 'll',
 'like',
 'got',
 'good',
 'am',
 'then',
 'day',
 'there',
 'time',
 'come',
 'only',
 'its',
 'was',
 'he',
 'send',
 'love',
 'want',
 'text',
 'as',
 'by',
 'txt',
 'stop',
 'one',
 'need',
 'going',
 'she',
 'back',
 'about',
 'da',
 'see',
 'home',
 'still',
 'today',
 'don',
 'sorry',
 'our',
 'tell',
 'lor',
 'any',
 'dont',
 'new',
 'pls',
 'mobile',
 'phone',
 'hi',
 'take',
 'reply',
 'been',
 'think',
 'please',
 'did',
 'some',
 'week',
 'dear',
 'has',
 'her',
 'much',
 'where',
 'night',
 'later',
 'they',
 'great',
 'well',
 'ì_',
 're',
 'him',
 'more',
 'oh',
 'hope',
 'wat',
 'who',
 'claim',

In [40]:
#now we take the every element in the x_test and turn to an array that will include learned words only
import numpy as np
w2v_vect = np.array([np.array([w2v_model.wv[i] for i in ls if i in w2v_model.wv.index_to_key]) for ls in X_test], dtype=object)

In [41]:
#let's see examples of how many words are learned in a sentence.
for i, v in enumerate(w2v_vect):
    print(len(X_test.iloc[i]), len(v))

5 4
7 7
11 10
14 14
11 10
8 8
16 15
9 9
18 16
19 14
12 10
11 11
1 1
7 7
4 4
30 30
23 23
6 6
8 7
31 27
13 12
17 16
21 19
20 18
6 6
10 9
6 3
26 26
8 8
5 5
5 5
10 10
29 27
8 6
9 9
19 19
7 7
27 25
7 7
36 36
13 13
27 22
7 7
3 3
8 7
12 9
8 7
6 6
9 8
9 9
17 16
24 20
5 5
7 7
11 11
4 1
11 8
14 11
11 11
8 8
17 14
4 4
6 5
3 3
9 9
10 10
13 13
29 27
16 12
25 22
9 9
19 19
5 5
1 1
23 21
31 27
8 7
7 7
5 4
13 12
6 5
5 5
13 13
7 7
25 23
12 10
25 24
21 20
8 4
18 16
46 40
7 5
7 5
24 23
9 9
9 7
21 21
18 18
6 5
8 6
23 22
14 12
7 6
15 12
6 6
24 16
8 7
23 20
18 14
4 2
19 17
9 8
9 7
18 16
9 6
8 8
9 5
18 16
18 16
21 21
9 9
6 6
13 12
10 8
16 14
6 6
22 21
18 17
24 24
26 25
7 7
23 20
6 6
4 4
9 6
19 16
25 21
11 11
36 33
6 6
1 1
8 3
5 4
14 14
6 6
26 23
11 11
10 9
9 9
6 6
8 8
12 10
5 5
18 11
10 9
4 4
13 10
6 6
14 13
5 5
2 2
29 27
4 4
6 6
9 7
8 7
3 3
20 20
1 1
8 7
9 8
27 23
10 9
28 27
7 7
15 15
9 7
3 3
16 14
26 20
5 4
24 24
29 24
8 8
6 6
9 8
4 3
18 17
12 4
10 10
19 17
18 18
23 23
5 5
26 25
27 25
6 6
45 44
23 22
7 6
16

In [42]:
#the vector length must be equal and equal to size of 100 as we gave in model so we will apply: 
w2v_vect_avg = []
for vect in w2v_vect:
    if len(vect)!= 0:
        w2v_vect_avg.append(vect.mean(axis=0))
    else:
        w2v_vect_avg.append(np.zeros(100))

In [43]:
for i, v in enumerate(w2v_vect_avg):
    print(len(X_test.iloc[i]), len(v))

5 100
7 100
11 100
14 100
11 100
8 100
16 100
9 100
18 100
19 100
12 100
11 100
1 100
7 100
4 100
30 100
23 100
6 100
8 100
31 100
13 100
17 100
21 100
20 100
6 100
10 100
6 100
26 100
8 100
5 100
5 100
10 100
29 100
8 100
9 100
19 100
7 100
27 100
7 100
36 100
13 100
27 100
7 100
3 100
8 100
12 100
8 100
6 100
9 100
9 100
17 100
24 100
5 100
7 100
11 100
4 100
11 100
14 100
11 100
8 100
17 100
4 100
6 100
3 100
9 100
10 100
13 100
29 100
16 100
25 100
9 100
19 100
5 100
1 100
23 100
31 100
8 100
7 100
5 100
13 100
6 100
5 100
13 100
7 100
25 100
12 100
25 100
21 100
8 100
18 100
46 100
7 100
7 100
24 100
9 100
9 100
21 100
18 100
6 100
8 100
23 100
14 100
7 100
15 100
6 100
24 100
8 100
23 100
18 100
4 100
19 100
9 100
9 100
18 100
9 100
8 100
9 100
18 100
18 100
21 100
9 100
6 100
13 100
10 100
16 100
6 100
22 100
18 100
24 100
26 100
7 100
23 100
6 100
4 100
9 100
19 100
25 100
11 100
36 100
6 100
1 100
8 100
5 100
14 100
6 100
26 100
11 100
10 100
9 100
6 100
8 100
12 100
5 100
18 

# doc2vec

  doc2vec is a shallow, two layer neural network that accepts a text corpus as an input, and it returns a set of vectors 
(also known as embeddings); each vector is a numeric representation of a given sentence, paragraph or document
instead of creating a vector for each word doc2vec creates a vector for a document(group of words)

In [44]:
#for doc2vec we need to create tagged document objects to train the model
tagged_docs = [gensim.models.doc2vec.TaggedDocument(v, [i]) for i, v in enumerate(X_train)]

In [45]:
#let's look what a tagged document looks like
tagged_docs[0]

TaggedDocument(words=['if', 'india', 'win', 'or', 'level', 'series', 'means', 'this', 'is', 'record'], tags=[0])

In [46]:
#train a basic doc2vec model
d2v_model = gensim.models.Doc2Vec(tagged_docs,
                                 vector_size=100,
                                 window=5,
                                 min_count=2)

In [47]:
#if you try to pass a single word from this model you will get an error so you need to pass a list of words
d2v_model.infer_vector(['i','am','learning','nlp'])

array([ 0.00116184,  0.01872219,  0.01250427, -0.00253981, -0.00983546,
       -0.02537304,  0.01144622,  0.04412089, -0.02573338, -0.01153862,
       -0.00568537, -0.02766524,  0.01213088,  0.00525322, -0.00078333,
       -0.0104916 ,  0.00124987, -0.0119576 ,  0.00568148, -0.0363178 ,
        0.00692236,  0.00304747,  0.00837241, -0.01320306,  0.00366963,
        0.00946252, -0.01272587, -0.00501335, -0.00587354, -0.00536061,
        0.01265274,  0.01118108,  0.01560461, -0.0110861 , -0.00264401,
        0.03086138,  0.005571  , -0.02558787, -0.01181752, -0.03088747,
        0.00241405, -0.00449934, -0.00473376, -0.00561253,  0.00936182,
       -0.01248234, -0.01709388, -0.00763143,  0.01239722,  0.01392219,
        0.01566806, -0.01548217, -0.00331772, -0.0048712 , -0.01581808,
       -0.00148879,  0.00151061,  0.00447852, -0.02207777, -0.00287107,
        0.01130604,  0.01098957,  0.00155177, -0.00144607, -0.02129331,
        0.01262007,  0.01010385,  0.02071502, -0.02062859,  0.02

In [48]:
#how do we prepare these vectors to be used in a machine learning model?
vectors = [[d2v_model.infer_vector(words)] for words in X_test]

In [49]:
vectors[0]

[array([ 1.44406790e-02,  1.09525146e-02,  7.81874172e-03,  8.21193564e-04,
        -2.20896211e-02, -5.43833105e-03,  1.32116461e-02,  1.33500127e-02,
        -1.31168519e-03, -4.40072361e-03,  1.37424050e-02, -1.06445169e-02,
         5.85403899e-03,  6.17255457e-03,  5.36809489e-03, -2.19828868e-03,
        -1.59659795e-02,  9.59840196e-04, -3.63967940e-03, -2.38582701e-03,
         1.97415706e-02, -5.54365152e-03,  1.53111957e-03, -7.44413259e-03,
        -1.20938318e-02,  7.55588105e-03,  4.05377010e-03, -2.17045331e-03,
         5.84712368e-04, -1.49087850e-02, -2.29842542e-03,  9.17390455e-03,
         1.57357287e-02, -1.14900097e-02,  5.49157523e-03,  2.36775056e-02,
         1.53986178e-02, -4.56920685e-03, -2.69234180e-02, -1.71192782e-03,
        -1.70670282e-02,  1.48681095e-02, -3.25408345e-03, -1.53493676e-02,
         2.40477901e-02, -1.36858188e-02,  3.66096920e-03,  1.27181653e-02,
         3.12059186e-02,  1.25802187e-02,  7.24474946e-03,  1.59256738e-02,
        -2.4