In [None]:
import numpy as np
np.random.seed(1)
from tensorflow import set_random_seed
set_random_seed(1)

import os
import gc 

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"]="0";  

import glob
import datetime
import zipfile

import pandas as pd
import re
import spacy
import math
from collections import Counter
from ftfy import fix_text

from gensim.models.phrases import Phrases, Phraser
from gensim.models import KeyedVectors
from gensim.models.wrappers import FastText 

from keras.preprocessing.sequence import make_sampling_table, skipgrams
from keras.layers import Input, Dense, Embedding, Activation, Dot, Flatten, GRU, Dropout, LSTM, Bidirectional, GlobalMaxPooling1D, Concatenate, Lambda, CuDNNGRU, CuDNNLSTM, GaussianNoise, GaussianDropout, Conv1D, BatchNormalization
from keras.models import Model
from keras.optimizers import RMSprop
from keras.models import load_model

from tensorflow.python.client import device_lib
# print(device_lib.list_local_devices())

from sklearn.metrics import f1_score

In [None]:
nlp = spacy.load("en_core_web_lg")
fasttext = FastText.load_fasttext_format("wiki.en.bin")

In [None]:
TAG_RE = re.compile(r'<[^>]+>')

entity_map = {"PERSON":"person",
              "NORP":"nationality",
              "FAC":"facility",
              "ORG":"organization",
              "GPE":"government",
              "LOC":"location",
              "PRODUCT":"product",
              "EVENT":"event",
              "WORK_OF_ART":"artwork",
              "LAW":"law",
              "LANGUAGE":"language",
              "DATE":"date",
              "TIME":"time",
              "PERCENT":"percent",
              "MONEY":"money",
              "QUANTITY":"amount",
              "ORDINAL":"first",
              "CARDINAL":"number"}

def remove_tags(text):
    return TAG_RE.sub(' ', text)

def strip_whitespace(text):
    return " ".join(text.split())

def clean_text(text):
    return(remove_punctuation(fix_text(strip_whitespace(remove_tags(text)))).lower())

def remove_punctuation(text):
    return(re.sub(r"[^A-Za-z0-9 ]","",text))

def sub_entities(text, spacy_model):
#     replacements = [(e.text,e.label_) for e in spacy_model(text).ents]
#     replacements = sorted(replacements, key=lambda x: len(x[0]), reverse=True)
#     replacements = [(a,entity_map[b]) for a,b in replacements]
#     for k,v in replacements:
#         text = re.sub(r"\b"+re.escape(k)+r"\b", v, text)
# #     text = re.sub(r"[^a-zA-Z0-9 ]","",text).lower()
    return(text)

def df_to_dic(df):
    data_dic = {}
    for item in df.iterrows():
        data = item[1]
        _id = data["id"]
        data_dic[_id] = {}
        data_dic[_id]["label"] = data["label"]
        data_dic[_id]["text"] = data["text"]
        data_dic[_id]["url"] = data["url"]
    return data_dic

In [None]:
min_count = 5
window_size = 5
negative_samples = 10
dim = 100


In [None]:
## News Data

file_name_train_task1 = "data/Document/train_filled.json"
file_name_dev_task1 = "data/Document/dev_filled.json"
file_name_test_task1 = "data/Document/test_filled.json"
file_name_china_task1 = "data/Document/test_china_filled.json"

file_name_train_task2 = "data/Sentence/train_filled.json"
file_name_dev_task2 = "data/Sentence/dev_filled.json"
file_name_test_task2 = "data/Sentence/test_filled.json"
file_name_china_task2 = "data/Sentence/test_china_filled.json"

## JSON - to - DF

df_train_task1 = pd.read_json(file_name_train_task1, orient="records", lines = True)
df_train_task1["text"] = [sub_entities(clean_text(t), nlp) for t in df_train_task1.text]

df_dev_task1 = pd.read_json(file_name_dev_task1, orient="records", lines = True)
df_dev_task1["text"] = [sub_entities(clean_text(t), nlp) for t in df_dev_task1.text]

df_test_task1 = pd.read_json(file_name_test_task1, orient="records", lines = True)
df_test_task1["text"] = [sub_entities(clean_text(t), nlp) for t in df_test_task1.text]
df_test_task1["label"] = -1

df_train_task2 = pd.read_json(file_name_train_task2, orient="records", lines = True)
df_train_task2["text"] = [sub_entities(clean_text(t), nlp) for t in df_train_task2.sentence]

df_dev_task2 = pd.read_json(file_name_dev_task2, orient="records", lines = True)
df_dev_task2["text"] = [sub_entities(clean_text(t), nlp) for t in df_dev_task2.sentence]

df_test_task2 = pd.read_json(file_name_test_task2, orient="records", lines = True)
df_test_task2["text"] = [sub_entities(clean_text(t), nlp) for t in df_test_task2.sentence]
df_test_task2["label"] = -1

df_china_task1 = pd.read_json(file_name_china_task1, orient="records", lines = True)
df_china_task1["text"] = [sub_entities(clean_text(t), nlp) for t in df_china_task1.text]
df_china_task1["label"] = -1

df_china_task2 = pd.read_json(file_name_china_task2, orient="records", lines = True)
df_china_task2["text"] = [sub_entities(clean_text(t), nlp) for t in df_china_task2.sentence]
df_china_task2["label"] = -1

In [None]:
print(df_china_task1.head())
print(df_china_task2.head())

In [None]:
print(f"Max len task1: {max([len(x.split()) for x in df_train_task1.text.tolist()])}")
print(f"Min len task1: {min([len(x.split()) for x in df_train_task1.text.tolist()])}")
print(f"Min len task1: {sum([len(x.split()) for x in df_train_task1.text.tolist()])/len(df_train_task1.text.tolist())}")

print(f"Max len task2: {max([len(x.split()) for x in df_train_task2.text.tolist()])}")
print(f"Min len task2: {min([len(x.split()) for x in df_train_task2.text.tolist() if len(x.split())>0])}")
print(f"Min len task2: {sum([len(x.split()) for x in df_train_task2.text.tolist()])/len(df_train_task2.text.tolist())}")

df_train_task2.loc[df_train_task2.sentence.apply(len)<15]


In [None]:
def try_pass(text, model):
    try:
        return(model[text])
    except:
        pass

In [None]:
## Dataframe to Dictionary

dict_train_task1 = df_to_dic(df_train_task1)
dict_test_task1 = df_to_dic(df_test_task1)
dict_dev_task1 = df_to_dic(df_dev_task1)
dict_china_task1 = df_to_dic(df_china_task1)

dict_train_task2 = df_to_dic(df_train_task2)
dict_test_task2 = df_to_dic(df_test_task2)
dict_dev_task2 = df_to_dic(df_dev_task2)
dict_china_task2 = df_to_dic(df_china_task2)

## Sub in Word Vectors

for id_, dic in dict_train_task1.items():
    dict_train_task1[id_]["tokenized"] = [try_pass(a,fasttext) for a in dic["text"].split()]# if a in fasttext.vocab.keys()]
    dict_train_task1[id_]["tokenized"] = [a for a in dict_train_task1[id_]["tokenized"] if a is not None]
for id_, dic in dict_test_task1.items(): 
    dict_test_task1[id_]["tokenized"] = [try_pass(a,fasttext) for a in dic["text"].split()]# if a in fasttext.vocab.keys()]
    dict_test_task1[id_]["tokenized"] = [a for a in dict_test_task1[id_]["tokenized"] if a is not None]
for id_, dic in dict_dev_task1.items():
    dict_dev_task1[id_]["tokenized"] = [try_pass(a,fasttext) for a in dic["text"].split()]# if a in fasttext.vocab.keys()]
    dict_dev_task1[id_]["tokenized"] = [a for a in dict_dev_task1[id_]["tokenized"] if a is not None]
for id_, dic in dict_china_task1.items():
    dict_china_task1[id_]["tokenized"] = [try_pass(a,fasttext) for a in dic["text"].split()]# if a in fasttext.vocab.keys()]
    dict_china_task1[id_]["tokenized"] = [a for a in dict_china_task1[id_]["tokenized"] if a is not None]
    
for id_, dic in dict_train_task2.items():
    dict_train_task2[id_]["tokenized"] = [try_pass(a,fasttext) for a in dic["text"].split()]# if a in fasttext.vocab.keys()]
    dict_train_task2[id_]["tokenized"] = [a for a in dict_train_task2[id_]["tokenized"] if a is not None]
for id_, dic in dict_test_task2.items(): 
    dict_test_task2[id_]["tokenized"] = [try_pass(a,fasttext) for a in dic["text"].split()]# if a in fasttext.vocab.keys()]
    dict_test_task2[id_]["tokenized"] = [a for a in dict_test_task2[id_]["tokenized"] if a is not None]
for id_, dic in dict_dev_task2.items():
    dict_dev_task2[id_]["tokenized"] = [try_pass(a,fasttext) for a in dic["text"].split()]# if a in fasttext.vocab.keys()]
    dict_dev_task2[id_]["tokenized"] = [a for a in dict_dev_task2[id_]["tokenized"] if a is not None]
for id_, dic in dict_china_task2.items():
    dict_china_task2[id_]["tokenized"] = [try_pass(a,fasttext) for a in dic["text"].split()]# if a in fasttext.vocab.keys()]
    dict_china_task2[id_]["tokenized"] = [a for a in dict_china_task2[id_]["tokenized"] if a is not None]
                                  

In [None]:
seq_x_train_task1 = [v["tokenized"] for k,v in dict_train_task1.items()]
seq_y_train_task1 = np.array([v["label"] for k,v in dict_train_task1.items()])
max_len_train_task1 = max([len(x) for x in seq_x_train_task1])

seq_x_dev_task1 = [v["tokenized"] for k,v in dict_dev_task1.items()]
seq_y_dev_task1 = np.array([v["label"] for k,v in dict_dev_task1.items()])
max_len_dev_task1 = max([len(x) for x in seq_x_dev_task1])

seq_x_test_task1 = [v["tokenized"] for k,v in dict_test_task1.items()]
max_len_test_task1 = max([len(x) for x in seq_x_test_task1])

seq_x_china_task1 = [v["tokenized"] for k,v in dict_china_task1.items()]
max_len_china_task1 = max([len(x) for x in seq_x_china_task1])

max_len_task1 = max(max_len_dev_task1, max_len_train_task1, max_len_test_task1, max_len_china_task1)
seq_x_train_task1 = np.stack([[np.zeros(300)]*(max_len_task1-len(x))+x for x in seq_x_train_task1])
seq_x_dev_task1 = np.stack([[np.zeros(300)]*(max_len_task1-len(x))+x for x in seq_x_dev_task1])
seq_x_test_task1 = np.stack([[np.zeros(300)]*(max_len_task1-len(x))+x for x in seq_x_test_task1])
seq_x_china_task1 = np.stack([[np.zeros(300)]*(max_len_task1-len(x))+x for x in seq_x_china_task1])

print(max_len_task1)

seq_x_train_task2 = [v["tokenized"] for k,v in dict_train_task2.items()]
seq_y_train_task2 = np.array([v["label"] for k,v in dict_train_task2.items()])
max_len_train_task2 = max([len(x) for x in seq_x_train_task2])

seq_x_dev_task2 = [v["tokenized"] for k,v in dict_dev_task2.items()]
seq_y_dev_task2 = np.array([v["label"] for k,v in dict_dev_task2.items()])
max_len_dev_task2 = max([len(x) for x in seq_x_dev_task2])

seq_x_test_task2 = [v["tokenized"] for k,v in dict_test_task2.items()]
max_len_test_task2 = max([len(x) for x in seq_x_test_task2])

seq_x_china_task2 = [v["tokenized"] for k,v in dict_china_task2.items()]
max_len_china_task2 = max([len(x) for x in seq_x_china_task2])

max_len_task2 = max(max_len_dev_task2, max_len_train_task2, max_len_test_task2, max_len_china_task2)
seq_x_train_task2 = np.stack([[np.zeros(300)]*(max_len_task2-len(x))+x for x in seq_x_train_task2])
seq_x_dev_task2 = np.stack([[np.zeros(300)]*(max_len_task2-len(x))+x for x in seq_x_dev_task2])
seq_x_test_task2 = np.stack([[np.zeros(300)]*(max_len_task2-len(x))+x for x in seq_x_test_task2])
seq_x_china_task2 = np.stack([[np.zeros(300)]*(max_len_task2-len(x))+x for x in seq_x_china_task2])


In [None]:
del fasttext
del df_train_task2
del df_dev_task2
del df_test_task2
del df_train_task1
del df_dev_task1
del df_test_task1

gc.collect()

In [None]:
n_train_task1 = seq_x_train_task1.shape[0]
n_train_task2 = seq_x_train_task2.shape[0]
batch_size = 128

def generate_data(x_doc, y_doc, batch_size):
    i_doc = 0
    n_doc = x_doc.shape[0]
    while True:  
        j_doc = i_doc+batch_size
        if j_doc > n_doc:
            j_doc = n_doc
        output = (x_doc[i_doc:j_doc,:], y_doc[i_doc:j_doc])
        i_doc = j_doc
        if i_doc == n_doc:
            i_doc = 0
        yield output

In [None]:
import math

seq_in = Input(shape=(None,300))

lstm_layer = Bidirectional(CuDNNLSTM(10, return_sequences=False))
doc_out = Dense(1, activation="sigmoid")(GaussianDropout(0.4)(lstm_layer(GaussianDropout(0.5)(seq_in))))
sent_out = Dense(1, activation="sigmoid")(GaussianDropout(0.6)(lstm_layer(GaussianDropout(0.5)(seq_in))))

rmsprop_1 = RMSprop(lr=0.005, rho=0.9, epsilon=None, decay=0.0)
rmsprop_2 = RMSprop(lr=0.005, rho=0.9, epsilon=None, decay=0.0)

doc_model = Model(inputs=seq_in,outputs=doc_out)
doc_model.compile(rmsprop_1, loss="binary_crossentropy", metrics=["accuracy"])
print(doc_model.summary())

sent_model = Model(inputs=seq_in,outputs=sent_out)
sent_model.compile(rmsprop_2, loss="binary_crossentropy", metrics=["accuracy"])
print(sent_model.summary())

gen_doc = generate_data(seq_x_train_task1, seq_y_train_task1, batch_size)
gen_sen = generate_data(seq_x_train_task2, seq_y_train_task2, batch_size)

d_train_metrics = []
d_dev_metrics = []
s_train_metrics = []
s_dev_metrics = []

for ii in range(100):
    print(f"========== EPOCH {ii} ==========")
    
    (loss_d_train, acc_d_train) = doc_model.evaluate(seq_x_train_task1, seq_y_train_task1, batch_size=256, verbose=0)
    (loss_d_dev, acc_d_dev) = doc_model.evaluate([np.stack(seq_x_dev_task1)], np.array(seq_y_dev_task1), batch_size=256, verbose=0)
    (loss_s_train, acc_s_train) = sent_model.evaluate(seq_x_train_task2, seq_y_train_task2, batch_size=256, verbose=0)
    (loss_s_dev, acc_s_dev) = sent_model.evaluate([np.stack(seq_x_dev_task2)], np.array(seq_y_dev_task2), batch_size=256, verbose=0)
    print(f"[{loss_d_train:.3f}\t{loss_d_dev:.3f}]\t[{acc_d_train:.3f}\t{acc_d_dev:.3f}]\t[{loss_s_train:.3f}\t{loss_s_dev:.3f}]\t[{acc_s_train:.3f}\t{acc_s_dev:.3f}]")

    d_train_metrics.append((loss_d_train, acc_d_train))
    d_dev_metrics.append((loss_d_dev, acc_d_dev))
    s_train_metrics.append((loss_s_train, acc_s_train))
    s_dev_metrics.append((loss_s_dev, acc_s_dev))
    
    for ee in range(20):
        
        batch = next(gen_doc)
        doc_model.train_on_batch(batch[0],batch[1],class_weight="auto")
        batch = next(gen_sen)
        sent_model.train_on_batch(batch[0],batch[1],class_weight="auto")


In [None]:
print("Task 1 Dev results on Model 1:")
print(doc_model.evaluate([np.stack(seq_x_dev_task1)], np.array(seq_y_dev_task1)))
print(f1_score(np.array(seq_y_dev_task1), np.round(doc_model.predict([np.stack(seq_x_dev_task1)]))))

print("Task 1 Dev results on Model 2:")
print(sent_model.evaluate([np.stack(seq_x_dev_task1)], np.array(seq_y_dev_task1)))
print(f1_score(np.array(seq_y_dev_task1), np.round(sent_model.predict([np.stack(seq_x_dev_task1)]))))

print("Task 2 Dev results on Model 2:")
print(sent_model.evaluate([np.stack(seq_x_dev_task2)], np.array(seq_y_dev_task2)))
print(f1_score(np.array(seq_y_dev_task2), np.round(sent_model.predict([np.stack(seq_x_dev_task2)]))))

print("Task 2 Dev results on Model 1:")
print(doc_model.evaluate([np.stack(seq_x_dev_task2)], np.array(seq_y_dev_task2)))
print(f1_score(np.array(seq_y_dev_task2), np.round(doc_model.predict([np.stack(seq_x_dev_task2)]))))

In [None]:
task1_train_preds = np.round(doc_model.predict([np.stack(seq_x_train_task1)]))
task1_dev_preds = np.round(doc_model.predict([np.stack(seq_x_dev_task1)]))
task2_train_preds = np.round(sent_model.predict(np.stack(seq_x_train_task2)))
task2_dev_preds = np.round(sent_model.predict(np.stack(seq_x_dev_task2)))
task2_train_preds_mod1 = np.round(doc_model.predict([np.stack(seq_x_train_task2)]))
task2_dev_preds_mod1 = np.round(doc_model.predict(np.stack(seq_x_dev_task2)))

In [None]:
import matplotlib.pyplot as plt

pal = ["#FF0000", "#00A08A", "#F2AD00", "#F98400", "#5BBCD6"]

plt.figure(figsize=(3.5,3.5))
plt.plot(range(100), np.array([a[0] for a in d_train_metrics]), c=pal[0], ls='-', label="Task 1 train")
plt.plot(range(100), np.array([a[0] for a in d_dev_metrics]), c=pal[0], ls='--', label="Task 1 dev")
plt.plot(range(100), np.array([a[0] for a in s_train_metrics]), c=pal[2], ls='-', label="Task 2 train")
plt.plot(range(100), np.array([a[0] for a in s_dev_metrics]), c=pal[2], ls='--', label="Task 2 dev")
plt.xlabel('Epoch')
plt.ylabel('Log Loss')
plt.legend()
# plt.savefig("final_task_12/task12_loss.pdf", transparent=True, bbox_inches="tight")
plt.show()

In [None]:
plt.figure(figsize=(3.5,3.5))
plt.plot(range(100), np.array([a[1] for a in d_train_metrics]), c=pal[0], ls='-', label="Task 1 train")
plt.plot(range(100), np.array([a[1] for a in d_dev_metrics]), c=pal[0], ls='--', label="Task 1 dev")
plt.plot(range(100), np.array([a[1] for a in s_train_metrics]), c=pal[2], ls='-', label="Task 2 train")
plt.plot(range(100), np.array([a[1] for a in s_dev_metrics]), c=pal[2], ls='--', label="Task 2 dev")
plt.xlabel('Epoch')
plt.ylabel('Classification Accuracy')
plt.legend()
# plt.savefig("final_task_12/task12_accuracy.pdf", transparent=True, bbox_inches="tight")
plt.show()

In [None]:
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels

def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
#     classes = classes[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
#     ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return ax


np.set_printoptions(precision=2)

plt.figure(figsize=(3.5,3.5))
plot_confusion_matrix(np.array(seq_y_dev_task1), task1_dev_preds, classes=["No Protest","Protest"], normalize=True,
                      title='Normalized confusion matrix')
# plt.savefig("final_task_12/task1_dev_confusion.pdf",transparent=True, bbox_inches="tight")
plt.show()

plt.figure(figsize=(3.5,3.5))
plot_confusion_matrix(np.array(seq_y_dev_task2), task2_dev_preds, classes=["No Protest","Protest"], normalize=True,
                      title='Normalized confusion matrix')
# plt.savefig("final_task_12/task2_dev_confusion.pdf",transparent=True, bbox_inches="tight")
plt.show()

plt.figure(figsize=(3.5,3.5))
plot_confusion_matrix(np.array(seq_y_dev_task2), task2_dev_preds_mod1, classes=["No Protest","Protest"], normalize=True,
                      title='Normalized confusion matrix')
# plt.savefig("final_task_12/task2_model1_dev_confusion.pdf",transparent=True, bbox_inches="tight")
plt.show()

In [None]:
with open("public_data/phase2/task1_test.data") as f:
    task1_test_ids = f.read().splitlines()
with open("public_data/phase2/task2_test.data") as f:
    task2_test_ids = f.read().splitlines()
with open("public_data/phase2/china_test_task1.data") as f:
    task1_china_ids = f.read().splitlines()
with open("public_data/phase2/china_test_task2.data") as f:
    task2_china_ids = f.read().splitlines()
with open("solutions/task1_train.data") as f:
    task1_train_ids = f.read().splitlines()
with open("solutions/task2_train.data") as f:
    task2_train_ids = f.read().splitlines()
with open("solutions/task1_dev.data") as f:
    task1_dev_ids = f.read().splitlines()
with open("solutions/task2_dev.data") as f:
    task2_dev_ids = f.read().splitlines()
    
print("All Task1 IDs accounted for: "+str(all([str(id_) in set(task1_test_ids) for id_ in dict_test_task1.keys()])))
print("All Task2 IDs accounted for: "+str(all([str(id_) in set([a.replace("_","",) for a in task2_test_ids]) for id_ in dict_test_task2.keys()])))
print("All China1 IDs accounted for: "+str(all([str(id_) in set(task1_china_ids) for id_ in dict_china_task1.keys()])))
print("All China2 IDs accounted for: "+str(all([str(id_) in set([a.replace("_","",) for a in task2_china_ids]) for id_ in dict_china_task2.keys()])))

task1_train_results = list(zip(task1_train_ids, [int(a) for a in list(task1_train_preds[:,0])]))
task2_train_results = list(zip(task2_train_ids, [int(a) for a in list(task2_train_preds[:,0])]))
task1_dev_results = list(zip(task1_dev_ids, [int(a) for a in list(task1_dev_preds[:,0])]))
task2_dev_results = list(zip(task2_dev_ids, [int(a) for a in list(task2_dev_preds[:,0])]))

task1_train_results = [str(a) + "\t" + str(b) for a,b in task1_train_results]
task2_train_results = [str(a) + "\t" + str(b) for a,b in task2_train_results]
task1_dev_results = [str(a) + "\t" + str(b) for a,b in task1_dev_results]
task2_dev_results = [str(a) + "\t" + str(b) for a,b in task2_dev_results]

In [None]:
# with open("final_task_12/task1_train.predict","w") as f:
#     for ll in task1_train_results:
#         f.write("{}\n".format(ll))
        
# with open("final_task_12/task1_dev.predict","w") as f:
#     for ll in task1_dev_results:
#         f.write("{}\n".format(ll))
        
# with open("final_task_12/task2_train.predict","w") as f:
#     for ll in task2_train_results:
#         f.write("{}\n".format(ll))
        
# with open("final_task_12/task2_dev.predict","w") as f:
#     for ll in task2_dev_results:
#         f.write("{}\n".format(ll))
        
# doc_model.save('final_task_12/model_doc.h5') 
# sent_model.save('final_task_12/model_sent.h5')

