# Embeddings 100/300 dims

In [0]:
import os
from google.colab import files


def writeScores(scores, fn):
    with open(fn, 'w') as output_file:
        for idx,x in enumerate(scores):
            output_file.write(f"{x}\n")

In [0]:
from os.path import exists

if not exists('enzh_data.zip'):
    !wget -O enzh_data.zip https://competitions.codalab.org/my/datasets/download/03e23bd7-8084-4542-997b-6a1ca6dd8a5f
    !unzip enzh_data.zip

In [3]:
#English-Chinese
#Checking Data
print("---EN-ZH---")
print()

with open("./train.enzh.src", "r") as enzh_src:
  print("Source: ",enzh_src.readline())
with open("./train.enzh.mt", "r") as enzh_mt:
  print("Translation: ",enzh_mt.readline())
with open("./train.enzh.scores", "r") as enzh_scores:
  print("Score: ",enzh_scores.readline())

---EN-ZH---

Source:  The last conquistador then rides on with his sword drawn.

Translation:  最后的征服者骑着他的剑继续前进.

Score:  -1.5284005772625449



In [4]:
# DON'T RUN IF YOU ALREADY RAN IT IN THE ENGLISH-GERMAN SECTION
# Downloading spacy models for english

!spacy download en_core_web_md
!spacy link en_core_web_md en300

[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_md')

[38;5;1m✘ Link 'en300' already exists[0m
To overwrite an existing link, use the --force flag



In [0]:
import torchtext
import spacy

#dim=100 Embeddings
glove = torchtext.vocab.GloVe(name='6B', dim=100)

#dim=300 Embeddings
#glove = torchtext.vocab.GloVe(name='6B', dim=300)

#tokenizer model
nlp_en =spacy.load('en300')



In [6]:
#ENGLISH EMBEDDINGS methods from the section GERMAN-ENGLISH
# The difference from previous section is that we will use Glove embeddings directly because we are using a smaller model that spacy doesn't have
# We add a method to compute the word embedding and a method to compute the sentence embedding by averaging the word vectors

import numpy as np
import torch
from nltk import download
from nltk.corpus import stopwords

#downloading stopwords from the nltk package
download('stopwords') #stopwords dictionary, run once
stop_words_en = set(stopwords.words('english'))


def preprocess(sentence,nlp):
    text = sentence.lower()
    doc = [token.lemma_ for token in  nlp.tokenizer(text)]
    #doc = [word for word in doc if word not in stop_words_en]
    doc = [word for word in doc if word.isalpha()] #restricts string to alphabetic characters only
    return doc

def get_word_vector(embeddings, word):
    try:
      vec = embeddings.vectors[embeddings.stoi[word]]
      return vec
    except KeyError:
      #print(f"Word {word} does not exist")
      pass

def get_sentence_mean_vector(embeddings,line):
  vectors = []
  for w in line:
    emb = get_word_vector(embeddings,w)
    #do not add if the word is out of vocabulary
    if emb is not None:
      vectors.append(emb)
   
  return torch.mean(torch.stack(vectors))


def get_sentence_vector(embeddings,line):
  vectors = []
  for w in line:
    emb = get_word_vector(embeddings,w)
    #do not add if the word is out of vocabulary
    if emb is not None:
      vectors.append(emb)
   
  return torch.stack(vectors)


def get_embeddings(f,embeddings,lang):
  file = open(f) 
  lines = file.readlines() 
  sentences_vectors =[]

  for l in lines:
    sentence= preprocess(l,lang)
    try:
      vec = get_sentence_vector(embeddings,sentence)
      sentences_vectors.append(vec)
    except:
      sentences_vectors.append(0)

  return sentences_vectors


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [7]:

!wget -c https://github.com/Tony607/Chinese_sentiment_analysis/blob/master/data/chinese_stop_words.txt

!wget -O zh.zip http://vectors.nlpl.eu/repository/20/35.zip

!unzip zh.zip 


--2020-02-28 14:30:34--  https://github.com/Tony607/Chinese_sentiment_analysis/blob/master/data/chinese_stop_words.txt
Resolving github.com (github.com)... 140.82.114.4
Connecting to github.com (github.com)|140.82.114.4|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘chinese_stop_words.txt’

chinese_stop_words.     [  <=>               ] 417.16K  1.22MB/s    in 0.3s    

2020-02-28 14:30:35 (1.22 MB/s) - ‘chinese_stop_words.txt’ saved [427175]

--2020-02-28 14:30:36--  http://vectors.nlpl.eu/repository/20/35.zip
Resolving vectors.nlpl.eu (vectors.nlpl.eu)... 129.240.189.225
Connecting to vectors.nlpl.eu (vectors.nlpl.eu)|129.240.189.225|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1458485917 (1.4G) [application/zip]
Saving to: ‘zh.zip’


2020-02-28 14:32:06 (15.5 MB/s) - ‘zh.zip’ saved [1458485917/1458485917]

Archive:  zh.zip
  inflating: LIST                    
  inflating: meta.json           

In [7]:
from google.colab import files, drive
%matplotlib inline

# Mount your google drive to get the data from
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
#dim=300
!unzip '/content/gdrive/My Drive/Colab Notebooks/zh.zip'

Archive:  /content/gdrive/My Drive/Colab Notebooks/zh.zip
  inflating: zh.bin                  
  inflating: zh.tsv                  
  inflating: zh.bin.syn1neg.npy      
  inflating: zh.bin.syn0.npy         


In [8]:
from gensim.models import Word2Vec
from gensim.models import KeyedVectors

#dim=100
wv_from_bin = KeyedVectors.load_word2vec_format("model.bin", binary=True)

#dim=300
#wv_from_bin = Word2Vec.load('zh.bin')

  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


In [0]:
import string
import jieba
import gensim 
import spacy

import numpy as np

stop_words = [ line.rstrip() for line in open('./chinese_stop_words.txt',"r", encoding="utf-8") ]


def get_sentence_vector_mean_zh(line):
  vectors = []
  for w in line:
    try:
      emb = wv_from_bin[w]
      vectors.append(emb)
    except:
      pass #Do not add if the word is out of vocabulary
  if vectors:
    vectors = np.array(vectors)
    return np.mean(vectors)  
  else:
    return 0


def get_sentence_vector_zh(line):
  vectors = []
  for w in line:
    try:
      emb = wv_from_bin[w]
      emb = torch.from_numpy(emb)
      vectors.append(emb)
    except:
      vectors.append(torch.zeros(100))
      #vectors.append(torch.zeros(300))
  vectors = torch.stack(vectors)
  return vectors  

    

def processing_zh(sentence):
  seg_list = jieba.lcut(sentence,cut_all=True)
  #doc = [word for word in seg_list if word not in stop_words]
  doc = [word for word in seg_list]
  docs = [e for e in doc if e.isalnum()]
  return docs


def get_sentence_embeddings_zh(f):
  file = open(f) 
  lines = file.readlines() 
  sentences_vectors =[]
  for l in lines:
    sent  = processing_zh(l)
    vec = get_sentence_vector_zh(sent)

    if vec is not None:
      sentences_vectors.append(vec)
    else:
      print(l)
  return sentences_vectors




In [10]:
import spacy
import torchtext
from torchtext import data


zh_train_mt = get_sentence_embeddings_zh("./train.enzh.mt")
zh_train_src = get_embeddings("./train.enzh.src",glove,nlp_en)
f_train_scores = open("./train.enzh.scores",'r')
zh_train_scores = f_train_scores.readlines()


zh_val_mt = get_sentence_embeddings_zh("./dev.enzh.mt")
zh_val_src = get_embeddings("./dev.enzh.src",glove,nlp_en)
f_val_scores = open("./dev.enzh.scores",'r')
zh_val_scores = f_val_scores.readlines()

zh_train_val_mt = zh_train_mt + zh_val_mt
zh_train_val_src = zh_train_src + zh_val_src
zh_train_val_scores = zh_train_scores + zh_val_scores

zh_test_mt = get_sentence_embeddings_zh("./test.enzh.mt")
zh_test_src = get_embeddings("./test.enzh.src", glove, nlp_en)



Building prefix dict from the default dictionary ...
Dumping model to file cache /tmp/jieba.cache
Loading model cost 0.960 seconds.
Prefix dict has been built successfully.


In [11]:
print(len(zh_train_val_mt))
print(zh_train_val_mt[0].shape)

8000
torch.Size([12, 100])


## Simple Feed-forward Neural Network model

In [12]:
mean_train_src = [torch.mean(sentence, 0) for sentence in zh_train_src]
mean_train_mt = [torch.mean(sentence, 0) for sentence in zh_train_mt]
mean_train = [torch.cat(tensor_pair) for tensor_pair in zip(mean_train_src, mean_train_mt)]

train_scores = np.array(zh_train_scores).astype(float)

mean_val_src = [torch.mean(sentence, 0) for sentence in zh_val_src]
mean_val_mt = [torch.mean(sentence, 0) for sentence in zh_val_mt]
mean_val = [torch.cat(tensor_pair) for tensor_pair in zip(mean_val_src, mean_val_mt)]

val_scores = np.array(zh_val_scores).astype(float)

mean_train_val_src = [torch.mean(sentence, 0) for sentence in zh_train_val_src]
mean_train_val_mt = [torch.mean(sentence, 0) for sentence in zh_train_val_mt]
mean_train_val = [torch.cat(tensor_pair) for tensor_pair in zip(mean_train_val_src, mean_train_val_mt)]

train_val_scores = np.array(zh_train_val_scores).astype(float)

mean_test_src = [torch.mean(sentence, 0) for sentence in zh_test_src]
mean_test_mt = [torch.mean(sentence, 0) for sentence in zh_test_mt]
mean_test = [torch.cat(tensor_pair) for tensor_pair in zip(mean_test_src, mean_test_mt)]

print(mean_train[0])
print(mean_train[0].shape)
print(train_scores)

tensor([-0.0971, -0.1589,  0.2974, -0.1212,  0.1301,  0.3532, -0.0863,  0.3983,
        -0.4788,  0.0355,  0.1081,  0.0569,  0.1406, -0.0605,  0.1998, -0.0047,
         0.2876, -0.1321, -0.3366, -0.0536,  0.4954, -0.3619,  0.0776,  0.0929,
         0.5494, -0.0104, -0.3992, -0.2867,  0.1072, -0.3325, -0.2420,  0.3146,
        -0.1398,  0.1598, -0.0720,  0.2631, -0.2362,  0.2532, -0.0221, -0.0299,
        -0.2158, -0.2325,  0.3343, -0.3234, -0.0130,  0.0079,  0.1050, -0.2598,
         0.1370, -0.4183, -0.2272,  0.1312,  0.2082,  1.1499, -0.1149, -2.3472,
        -0.1731, -0.0631,  1.2857,  0.4795, -0.0464,  0.8244, -0.1479,  0.1270,
         0.3285, -0.0874,  0.0932,  0.3004, -0.0557, -0.0512, -0.1607, -0.0959,
         0.0359, -0.3896, -0.0448,  0.1520, -0.2362, -0.0675, -0.6403,  0.0127,
         0.6615,  0.0997, -0.4177,  0.1539, -0.7382, -0.3118, -0.1255, -0.1064,
        -0.1151, -0.4154, -0.2467, -0.2316, -0.1042,  0.3197, -0.3741, -0.2296,
        -0.3258, -0.0690,  0.4570,  0.03

In [0]:
print(np.max(train_scores))
print(np.min(train_scores))

print(np.max(val_scores))
print(np.min(val_scores))

1.5053460474460045
-4.704407511656437
1.3631834314516038
-4.494291459817248


In [0]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import scipy

class FFNN(nn.Module):
    def __init__(self, hidden_dim, num_classes=1, input_dim=200):  
        super(FFNN, self).__init__()
        
        # embedding (lookup layer) layer
        # padding_idx argument makes sure that the 0-th token in the vocabulary
        # is used for padding purposes i.e. its embedding will be a 0-vector
        # self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        
        # hidden layer
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        #self.fc2 = nn.Linear(hidden_dim, 10)
        
        # activation
        self.act1 = nn.ReLU()
        #self.act2 = nn.ReLU()


        # output layer
        self.out = nn.Linear(hidden_dim, num_classes)
        self.tanh = nn.Tanh()
    
    def forward(self, x):
        # x has shape (1, 2)

        out = self.fc1(x)
        out = self.act1(out)
        #out = self.fc2(out)
        #out = self.act2(out)

        out = self.out(out)
        out = self.tanh(out)
        return out

In [13]:
from torch.utils.data import TensorDataset, DataLoader

mean_train_t = torch.stack(mean_train)
mean_val_t = torch.stack(mean_val)
mean_train_val_t = torch.stack(mean_train_val)
mean_test_t = torch.stack(mean_test)

train_scores_t = torch.Tensor(train_scores)
val_scores_t = torch.Tensor(val_scores)
train_val_scores_t = torch.Tensor(train_val_scores)

# Input and label tensors
train_dataset = TensorDataset(mean_train_t, train_scores_t)

train_loader = DataLoader(train_dataset, batch_size=32)

print(train_loader)

<torch.utils.data.dataloader.DataLoader object at 0x7f4d7e87e2b0>


In [0]:
# we will train for N epochs (The model will see the corpus N times)
EPOCHS = 100

# Learning rate is initially set to 0.5
LRATE = 0.01

# dimensionality of the output of the second hidden layer
HIDDEN_DIM = 100

# Construct the model
model = FFNN(HIDDEN_DIM)

# Print the model
print(model)

# we use the stochastic gradient descent (SGD) optimizer
optimizer = optim.SGD(model.parameters(), lr=LRATE)

# we use the binary cross-entropy loss with sigmoid (applied to logits) 
# Recall that we did not apply any activation to our output layer, hence we need
# to make our outputs look like probabilities.
loss_fn = nn.MSELoss()

################
# Start training
################
print(f'Will train for {EPOCHS} epochs')
for epoch in range(1, EPOCHS + 1):
  for batch_idx, (feature, target) in enumerate(train_loader):
    # to ensure the dropout (explained later) is "turned on" while training
    # good practice to include even if do not use here
    model.train()
  
    # we zero the gradients as they are not removed automatically
    optimizer.zero_grad()
  
    # squeeze is needed as the predictions will have the shape (batch size, 1)
    # and we need to remove the dimension of size 1
    predictions = model(feature).squeeze(1)

    # Compute the loss
    loss = loss_fn(predictions, target)
    train_loss = loss.item()

    # calculate the gradient of each parameter
    loss.backward()

    # update the parameters using the gradients and optimizer algorithm 
    optimizer.step()
  
  # this puts the model in "evaluation mode" (turns off dropout and batch normalization)
  # good practise to include even if we do not use them right now
  model.eval()

  # we do not compute gradients within this block, i.e. no training
  with torch.no_grad():
    predictions_valid = model(mean_val_t).squeeze(1)
    valid_loss = loss_fn(predictions_valid, val_scores_t).item()
    valid_pearson = scipy.stats.pearsonr(predictions_valid, val_scores_t)[0]

    predictions_train = model(mean_train_t).squeeze(1)
    train_pearson = scipy.stats.pearsonr(predictions_train, train_scores_t)[0]
  
  print(f'| Epoch: {epoch:02} | Train Loss: {train_loss:.3f} | Val. Loss: {valid_loss:.3f} | Train Pearson: {train_pearson:.3f} | Val. Pearson: {valid_pearson:.3f}')

FFNN(
  (fc1): Linear(in_features=200, out_features=100, bias=True)
  (act1): ReLU()
  (out): Linear(in_features=100, out_features=1, bias=True)
  (tanh): Tanh()
)
Will train for 100 epochs
| Epoch: 01 | Train Loss: 1.340 | Val. Loss: 0.814 | Train Pearson: 0.225 | Val. Pearson: 0.168
| Epoch: 02 | Train Loss: 1.300 | Val. Loss: 0.802 | Train Pearson: 0.248 | Val. Pearson: 0.200
| Epoch: 03 | Train Loss: 1.267 | Val. Loss: 0.793 | Train Pearson: 0.257 | Val. Pearson: 0.216
| Epoch: 04 | Train Loss: 1.243 | Val. Loss: 0.787 | Train Pearson: 0.266 | Val. Pearson: 0.227
| Epoch: 05 | Train Loss: 1.228 | Val. Loss: 0.783 | Train Pearson: 0.275 | Val. Pearson: 0.238
| Epoch: 06 | Train Loss: 1.219 | Val. Loss: 0.780 | Train Pearson: 0.284 | Val. Pearson: 0.248
| Epoch: 07 | Train Loss: 1.213 | Val. Loss: 0.776 | Train Pearson: 0.292 | Val. Pearson: 0.258
| Epoch: 08 | Train Loss: 1.208 | Val. Loss: 0.774 | Train Pearson: 0.299 | Val. Pearson: 0.266
| Epoch: 09 | Train Loss: 1.203 | Val. Los

In [0]:
import scipy

model.eval()

with torch.no_grad():
  predictions = model(mean_val_t).squeeze(1)
  
  pearson_corr = scipy.stats.pearsonr(predictions, val_scores_t)

  print(predictions)
  print(pearson_corr)

tensor([-5.7613e-01, -4.2422e-01, -3.2663e-01, -3.4373e-01, -2.7768e-01,
         3.1056e-01, -8.0722e-01,  5.3888e-01,  3.9542e-01,  5.0608e-01,
        -5.3578e-01, -4.7029e-01, -5.0387e-01,  3.9389e-01, -3.6459e-01,
        -5.0361e-01,  4.5471e-01, -1.0749e-01, -4.4565e-01, -1.8392e-01,
        -1.2759e-01, -2.0696e-02, -2.3228e-01, -2.6273e-01, -2.0555e-01,
        -5.6314e-01, -1.2995e-01,  1.8809e-01, -6.7057e-02, -4.0258e-01,
        -3.2406e-02, -6.8032e-01, -4.4420e-01,  2.1091e-01,  1.2197e-01,
         1.8178e-01, -4.2727e-01, -1.6787e-01, -4.0592e-01, -3.8352e-01,
        -1.4399e-01, -3.0573e-01, -7.6351e-02,  1.9145e-01, -5.1621e-01,
        -3.6085e-01, -8.0244e-01,  8.2469e-02, -1.0620e-01, -3.6718e-01,
         3.1615e-01,  9.8524e-02,  2.1264e-01, -3.6903e-01,  8.5483e-02,
         9.5246e-02, -1.6452e-01, -7.9122e-01, -6.3585e-01, -7.0090e-02,
        -3.8040e-01, -2.7578e-02,  2.8637e-01, -4.2717e-01,  2.2785e-01,
        -3.6628e-01,  3.6359e-01,  4.0958e-01, -6.5

## Linear Regression

In [0]:
import numpy as np
from scipy.stats.stats import pearsonr


def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())

In [18]:
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error

for k in ['linear','poly','rbf','sigmoid']:
    clf_t = SVR(kernel=k)
    clf_t.fit(mean_train_t, train_scores_t)
    print(k)
    predictions = torch.tensor(clf_t.predict(mean_val_t))
    pearson = pearsonr(val_scores_t, predictions)
    mae = mean_absolute_error(val_scores_t, predictions)
    print(f'RMSE: {rmse(predictions, val_scores_t)} Pearson {pearson[0]}, MAE {mae}')
    print()


linear
RMSE: 0.9104197100505785 Pearson 0.28177236206307227, MAE 0.6657526608822311

poly
RMSE: 0.9028564482657653 Pearson 0.30126486089786997, MAE 0.6569537994633171

rbf
RMSE: 0.8973870196668569 Pearson 0.3316393762543504, MAE 0.6494591305143613

sigmoid
RMSE: 7.815960344582831 Pearson 0.023161468380610863, MAE 4.750423923801584



In [0]:
clf_t = SVR(kernel='rbf')
clf_t.fit(mean_train_val_t, train_val_scores_t)

predictions = clf_t.predict(mean_test_t)
print(predictions)

writeScores(predictions, 'SVR_rbf.txt')
files.download('SVR_rbf.txt')

[ 2.72158199e-01  1.22621336e-01  6.68095998e-01  3.27555127e-01
 -6.72099777e-02 -3.55866291e-02  2.75672045e-04  7.42447047e-01
  1.95610696e-01  2.98002333e-01  1.05141316e+00 -1.66610520e-01
  1.24349182e-01 -6.17666255e-02  1.22057635e-02  2.56122084e-01
 -5.87100306e-02 -3.72150821e-02 -2.83545694e-03  5.41129318e-01
  3.84357799e-02  4.69772411e-01  4.29898773e-01  9.03572528e-01
  1.05517682e-01  3.41356069e-02  3.67741669e-01  4.53923685e-01
  1.89974599e-01  1.13849746e-01  2.32724313e-02  2.35024696e-01
  1.53019173e-02  2.53089639e-01  3.58803260e-01 -1.46519124e-03
 -3.18689888e-02  1.30915163e-01  5.01894074e-01  5.29350147e-01
  5.52228013e-01  3.58913565e-01  3.07463223e-01  1.02418140e-01
  2.62418447e-01  5.23656582e-01 -2.42017784e-01  1.05273707e-01
 -8.14570844e-02  2.17838297e-01  3.80969380e-01  4.39290413e-01
  1.14859235e-01 -5.92210921e-02  4.51709435e-02  1.73901061e-01
 -3.38875271e-01  6.40323937e-01  2.81913238e-01  6.28312159e-01
  1.68475373e-01  5.50289

## Random Tree Forest


In [19]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators = 500, random_state = 777)
rf.fit(mean_train_t, train_scores_t);
predictions = torch.Tensor(rf.predict(mean_val_t))

pearson = pearsonr(val_scores_t, predictions)
mae  = mean_absolute_error(val_scores_t, predictions)
print(f'RMSE: {rmse(predictions, val_scores_t)} Pearson {pearson[0]} MAE {mae}')
print()

RMSE: 0.8802624344825745 Pearson 0.2523580204246022 MAE 0.7092980742454529



## Simple FFNN with sklearn

In [20]:
from sklearn.neural_network import MLPRegressor
from scipy.stats.stats import pearsonr


mlpr_model = MLPRegressor(hidden_layer_sizes=(50,), batch_size=32, solver='sgd', learning_rate='adaptive', learning_rate_init=0.01, early_stopping=True)
mlpr_model.fit(mean_train_t, train_scores_t)

predictions = torch.tensor(mlpr_model.predict(mean_val_t))
print(predictions)
print(torch.var_mean(predictions))
pearson = pearsonr(val_scores_t, predictions)
mae  = mean_absolute_error(val_scores_t, predictions)
print(f'RMSE: {rmse(predictions, val_scores_t)} Pearson {pearson[0]} MAE {mae}')

tensor([-3.0276e-01, -9.4897e-02,  2.8105e-01, -2.9537e-01, -3.7779e-01,
         7.4241e-01, -5.3593e-01,  2.1731e-01,  5.8119e-01,  4.8507e-01,
        -2.7196e-01, -7.2199e-02, -3.7083e-01,  4.5651e-01, -4.4499e-01,
        -4.8511e-01,  9.3002e-01,  5.4222e-01,  6.6252e-03, -2.2299e-01,
         6.8237e-02,  4.5647e-02,  4.1723e-02, -1.5052e-01, -5.8786e-02,
         2.2627e-01,  2.7944e-01,  3.6873e-01,  2.1625e-01, -1.9149e-01,
         1.0953e-01, -7.5491e-01, -6.7379e-01,  4.2473e-01,  6.9424e-01,
         6.5938e-01, -3.1465e-01,  5.0444e-03, -1.5383e-01, -2.0142e-01,
        -1.3576e-01,  1.9079e-01, -2.1217e-02,  4.0349e-01,  4.6571e-02,
        -2.5732e-02, -9.2815e-01, -1.0448e-01,  5.2452e-02,  1.5903e-01,
         7.9887e-01,  4.9013e-01,  2.5842e-01, -2.7954e-01,  2.0021e-01,
         3.5936e-02,  1.2197e-01, -3.4765e-01, -3.2995e-01,  1.1234e-01,
        -5.3899e-04,  2.3027e-01,  5.1485e-01, -4.5353e-01,  3.4784e-01,
         1.3395e-01,  4.1402e-01,  2.1461e-01, -6.9

In [0]:
mlpr_model = MLPRegressor(hidden_layer_sizes=(50,), batch_size=32, solver='sgd', learning_rate='adaptive', learning_rate_init=0.01, early_stopping=True)
mlpr_model.fit(mean_train_val_t, train_val_scores_t)

predictions = mlpr_model.predict(mean_test_t)
print(predictions)

writeScores(predictions, 'FFNN.txt')
files.download('FFNN.txt')

[ 2.10889232e-02 -7.60931838e-02  7.43149609e-01 -1.38666407e-02
 -4.23213237e-01 -2.63102570e-01 -6.89221848e-01  2.59093571e-01
 -1.40938215e-01 -2.91391971e-02  9.02422338e-01 -6.17110392e-01
 -3.00688367e-01 -4.98844642e-01 -1.03486218e-01  1.15930911e-01
 -2.85600752e-02 -3.78749727e-01 -1.91574493e-01  2.09574557e-01
 -4.78055960e-01  2.66993229e-01 -1.22469746e-01  7.23622502e-01
 -1.22150925e-01 -3.85848283e-01  2.31523218e-01  6.38018315e-01
 -3.94990415e-02 -7.59014040e-02 -2.18929507e-01 -4.83231935e-02
 -4.76917700e-01 -1.02907972e-01  1.24083786e-01 -3.02937976e-01
 -3.06348548e-01 -2.12300965e-01  7.14254602e-02  3.01063101e-01
  1.71962415e-01 -7.07028578e-02  2.41537699e-01 -3.02225065e-02
 -7.27219379e-02  4.03575073e-01 -5.08590199e-01  9.58694378e-02
 -4.84927063e-01 -1.78342424e-01  1.89810587e-01  1.89827423e-01
 -3.64909352e-01 -5.69975245e-01 -3.34035953e-01 -8.32102674e-02
 -1.13434782e+00 -2.42537174e-01  3.27560205e-02  2.72445215e-01
 -2.15320975e-01  6.53788