**NLP**

*CC6205-1 - Otoño 2022*

Autor: Felipe Urrutia Vargas

In [1]:
# !pip install --upgrade pandas

import pandas as pd
import numpy as np
# pd.set_option("max_rows", None)
import pickle
from string import punctuation
import re

from sklearn.metrics import cohen_kappa_score
import matplotlib.pyplot as plt
import seaborn as sns
# from astropy.visualization import hist
sns.set_theme(style="whitegrid")
plt.rc('axes', titlesize=14)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=12)
plt.rc('ytick', labelsize=12)
plt.rcParams.update({'font.size': 16})
plt.rcParams['axes.titlesize'] = 16
plt.rcParams["figure.figsize"] = (10, 6)
plt.rcParams.update({'lines.markeredgewidth': 1})
plt.rcParams.update({'errorbar.capsize': 2})
import random

import plotly.express as px

In [2]:
sentiments = "anger fear joy sadness".split()
intensities = "low medium high".split()

In [6]:
df_train = pickle.load(open("df_train.pickle", "rb"))

In [185]:
#import spacy library
import spacy

!python -m spacy download en_core_web_sm

nlp = spacy.load("en_core_web_sm")

Collecting en-core-web-sm==3.2.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl (13.9 MB)
[+] Download and installation successful
You can now load the package via spacy.load('en_core_web_sm')


In [186]:
import collections

In [187]:
!pip install unidecode
from unidecode import unidecode

!pip install Levenshtein
import Levenshtein as lev



In [188]:
import emojilib

In [189]:
vowel = "aeiou"
digit = "0123456789"

def replace_multiple(string, list_replace, replace_ch):
    for ch in list_replace:
        if ch in string:
            string = string.replace(ch, replace_ch)
    return string

def sim_lev(a, b):
    return 1 - lev.distance(a, b) / max(len(a), len(b)) if len(a) != 0 else 0

In [190]:
def get_retro_attrib(tweet):
    o = {}
    o["retro<&>num_tokens"] = len(tweet.split())
    o["retro<&>lenght"] = len(" ".join(tweet.split()))
    o["retro<&>num_numbs"] = len(re.findall(r"\d+",tweet))
    o["retro<&>num_alpha"] = len(re.findall(r"\w+", tweet))
    o["retro<&>num_with_uppercase"] = len(re.findall(r"\S*[A-Z]+\S*", tweet))
    o["retro<&>num_tokens_upper"] = sum(int(t.isupper()) for t in tweet.split())
    
    def prop_vowels(w):
        N = len(w.replace(" ", ""))
        if N>0:
            return len(re.findall(r"[aeiou]", tweet)) / N
        else:
            return 0
    
    def len_max_rep_char(w):
        w=w+" "
        c0 = w[0]
        lens = [0]
        clen = 1
        for c in w[1:]:
            if c == c0:
                clen += 1
            else:
                if c0.isalpha():
                    lens.append(clen)
                c0 = c
                clen = 1
        return max(lens)  
    
    o["retro<&>prop_vowels"] = prop_vowels(tweet.lower())
    o["retro<&>len_max_rep_char"] = len_max_rep_char(tweet.lower())
    
    def max_char_fre_per_token(w, c="k"):
        tw = w.split()
        fmax = 0
        for t in tw:
            f = sum(int(ch==c) for ch in t)
            if f>fmax:
                fmax = f
        return fmax
    
    o["retro<&>max_char_fre_per_token(o)"] = max_char_fre_per_token(tweet.lower(), c="o")
    o["retro<&>max_char_fre_per_token(s)"] = max_char_fre_per_token(tweet.lower(), c="s")
    o["retro<&>max_char_fre_per_token(g)"] = max_char_fre_per_token(tweet.lower(), c="g")    
    o["retro<&>max_char_fre_per_token(l)"] = max_char_fre_per_token(tweet.lower(), c="l")    
    
    def max_type_rep_char_per_token(w, t="vowel"):
        w=unidecode(w+" ")
        c0 = w[0]
        lens = [0]
        clen = 1
        for c in w[1:]:
            if (c0.isalpha() and c.isalpha()) and ((c in "aeiou" and c0 in "aeiou") or (c not in "aeiou" and c0 not in "aeiou")):
                clen += 1
            else:
                if t=="vowel":
                    if c0 in "aeiou":
                        lens.append(clen)
                else:
                    if c0 not in "aeiou":
                        lens.append(clen) 
                c0 = c
                clen = 1
        return max(lens) 
    
    o["retro<&>max_type_rep_char_per_token(vowel)"] = max_type_rep_char_per_token(tweet.lower(), t="vowel")
    
    return o

In [191]:
def get_punct_attrib(tweet):
    o = {}
    o["punct<&>[\.]{3}"] = len(re.findall(r"[\.]{3}", tweet))
    o["punct<&>[!]"] = len(re.findall(r"[!]", tweet))
    o["punct<&>[#]"] = len(re.findall(r"[#]", tweet))
    o["punct<&>[#]{1}\S+"] = len(re.findall(r"[#]{1}\S+", tweet))
    o["punct<&>[\*]"] = len(re.findall(r"[\*]", tweet))
    o["punct<&>[@]{1}\S+"] = len(re.findall(r"[@]{1}\S+", tweet))
    o["punct<&>\S*[?]{1}\S*"] = len(re.findall(r"\S*[?]{1}\S*", tweet))
    return o

In [192]:
def get_emojilib_attrib(tweet):
    emo_list = emojilib.emoji_list(tweet)
    emo_names = list([d['name'] for d in emo_list if 'name' in d])
    o = {}
    for emo in emo_names:
        if emo not in o.keys():
            o["emoji<&>"+emo] = 0
        o["emoji<&>"+emo] += 1
    return o

In [193]:
def get_linguistics_attrib(tweet):
    o = {}
    nlp_tweet = nlp(tweet)
    for token in nlp_tweet:
        label = "text lemma pos tag dep shape is_alpha is_stop".split()
        vals = [token.text, token.lemma_.lower(), token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop]
        dict_vals = dict(zip(label[1:], vals[1:]))
        for k, v in dict_vals.items():
            l = f"linguistics<&>{k}<&>{v}"
            if l not in o.keys():
                o[l] = 0
            o[l] += 1
    return o    

In [384]:
from nltk import bigrams
from nltk import trigrams

In [388]:
def get_n_grams_attrib(tweet):
    o = {}
    nlp_tweet = nlp(tweet)
    
    def not_stop(tup: tuple) -> bool:
        for element in tup:
            if element.is_stop:
                return False
        return True
    
    bi_tokens = [(w[0].lemma_.lower(), w[1].lemma_.lower()) for w in bigrams(nlp_tweet) if not_stop(w)]
    for bigram in bi_tokens:
        ling = f"linguistics<&>bigram<&>{bigram}"
        if ling not in o.keys():
            o[ling] = 0
        o[ling] += 1

    tri_tokens = [(w[0].lemma_.lower(), w[1].lemma_.lower(), w[2].lemma_.lower()) for w in trigrams(nlp_tweet) if not_stop(w)]
    for trigram in tri_tokens:
        ling = f"linguistics<&>trigram<&>{trigram}"
        if ling not in o.keys():
            o[ling] = 0
        o[ling] += 1
    return o 

In [389]:
get_retro_attrib(tweet), get_punct_attrib(tweet), get_emojilib_attrib(tweet), get_linguistics_attrib(tweet), get_n_grams_attrib(tweet)

({'retro<&>num_tokens': 22,
  'retro<&>lenght': 126,
  'retro<&>num_numbs': 0,
  'retro<&>num_alpha': 22,
  'retro<&>num_with_uppercase': 5,
  'retro<&>num_tokens_upper': 1,
  'retro<&>prop_vowels': 0.29523809523809524,
  'retro<&>len_max_rep_char': 2,
  'retro<&>max_char_fre_per_token(o)': 1,
  'retro<&>max_char_fre_per_token(s)': 3,
  'retro<&>max_char_fre_per_token(g)': 1,
  'retro<&>max_char_fre_per_token(l)': 1,
  'retro<&>max_type_rep_char_per_token(vowel)': 2},
 {'punct<&>[\\.]{3}': 0,
  'punct<&>[!]': 1,
  'punct<&>[#]': 0,
  'punct<&>[#]{1}\\S+': 0,
  'punct<&>[\\*]': 0,
  'punct<&>[@]{1}\\S+': 1,
  'punct<&>\\S*[?]{1}\\S*': 1},
 {},
 {'linguistics<&>lemma<&>@sainsburys': 1,
  'linguistics<&>pos<&>PROPN': 2,
  'linguistics<&>tag<&>NNP': 2,
  'linguistics<&>dep<&>dep': 1,
  'linguistics<&>shape<&>@xxxx': 1,
  'linguistics<&>is_alpha<&>False': 4,
  'linguistics<&>is_stop<&>False': 16,
  'linguistics<&>lemma<&>could': 1,
  'linguistics<&>pos<&>AUX': 1,
  'linguistics<&>tag<&>MD':

In [195]:
import time
import datetime

In [390]:
data_all_attributes = []
times = []
for k, ix in enumerate(df_train.index):
    start_time = time.time()
    tweet = df_train.loc[ix]["text"]
    o = {"id": ix}
    o = {**o, **get_retro_attrib(tweet)}
    o = {**o, **get_punct_attrib(tweet)}
    o = {**o, **get_emojilib_attrib(tweet)}
    o = {**o, **get_linguistics_attrib(tweet)}
    o = {**o, **get_n_grams_attrib(tweet)}
    data_all_attributes.append(o)
    dt = time.time()-start_time
    times.append(dt)
    print(f"""
    row: {k+1}, total_rows: {df_train.shape[0]}
    progress: {np.round(100*(k+1)/df_train.shape[0], 3)}%
    wait time: {datetime.timedelta(seconds = np.mean(times)*(df_train.shape[0]-k-1))}s
    """)


    row: 1, total_rows: 3960
    progress: 0.025%
    wait time: 0:03:22.095416s
    

    row: 2, total_rows: 3960
    progress: 0.051%
    wait time: 0:02:28.561925s
    

    row: 3, total_rows: 3960
    progress: 0.076%
    wait time: 0:02:00.138228s
    

    row: 4, total_rows: 3960
    progress: 0.101%
    wait time: 0:01:52.849124s
    

    row: 5, total_rows: 3960
    progress: 0.126%
    wait time: 0:01:42.131368s
    

    row: 6, total_rows: 3960
    progress: 0.152%
    wait time: 0:01:29.705333s
    

    row: 7, total_rows: 3960
    progress: 0.177%
    wait time: 0:01:27.044923s
    

    row: 8, total_rows: 3960
    progress: 0.202%
    wait time: 0:01:23.067370s
    

    row: 9, total_rows: 3960
    progress: 0.227%
    wait time: 0:01:19.970708s
    

    row: 10, total_rows: 3960
    progress: 0.253%
    wait time: 0:01:16.304250s
    

    row: 11, total_rows: 3960
    progress: 0.278%
    wait time: 0:01:14.021137s
    

    row: 12, total_rows: 3960
    progre


    row: 105, total_rows: 3960
    progress: 2.652%
    wait time: 0:00:45.273119s
    

    row: 106, total_rows: 3960
    progress: 2.677%
    wait time: 0:00:45.343857s
    

    row: 107, total_rows: 3960
    progress: 2.702%
    wait time: 0:00:45.160697s
    

    row: 108, total_rows: 3960
    progress: 2.727%
    wait time: 0:00:45.195065s
    

    row: 109, total_rows: 3960
    progress: 2.753%
    wait time: 0:00:45.157765s
    

    row: 110, total_rows: 3960
    progress: 2.778%
    wait time: 0:00:45.085953s
    

    row: 111, total_rows: 3960
    progress: 2.803%
    wait time: 0:00:45.188746s
    

    row: 112, total_rows: 3960
    progress: 2.828%
    wait time: 0:00:45.323870s
    

    row: 113, total_rows: 3960
    progress: 2.854%
    wait time: 0:00:45.251859s
    

    row: 114, total_rows: 3960
    progress: 2.879%
    wait time: 0:00:45.316041s
    

    row: 115, total_rows: 3960
    progress: 2.904%
    wait time: 0:00:45.412224s
    

    row: 116, total_


    row: 204, total_rows: 3960
    progress: 5.152%
    wait time: 0:00:44.099372s
    

    row: 205, total_rows: 3960
    progress: 5.177%
    wait time: 0:00:44.037577s
    

    row: 206, total_rows: 3960
    progress: 5.202%
    wait time: 0:00:44.031021s
    

    row: 207, total_rows: 3960
    progress: 5.227%
    wait time: 0:00:44.096981s
    

    row: 208, total_rows: 3960
    progress: 5.253%
    wait time: 0:00:44.071881s
    

    row: 209, total_rows: 3960
    progress: 5.278%
    wait time: 0:00:43.957117s
    

    row: 210, total_rows: 3960
    progress: 5.303%
    wait time: 0:00:43.968473s
    

    row: 211, total_rows: 3960
    progress: 5.328%
    wait time: 0:00:43.961832s
    

    row: 212, total_rows: 3960
    progress: 5.354%
    wait time: 0:00:43.990526s
    

    row: 213, total_rows: 3960
    progress: 5.379%
    wait time: 0:00:43.983608s
    

    row: 214, total_rows: 3960
    progress: 5.404%
    wait time: 0:00:43.994155s
    

    row: 215, total_


    row: 306, total_rows: 3960
    progress: 7.727%
    wait time: 0:00:42.680684s
    

    row: 307, total_rows: 3960
    progress: 7.753%
    wait time: 0:00:42.672937s
    

    row: 308, total_rows: 3960
    progress: 7.778%
    wait time: 0:00:42.629550s
    

    row: 309, total_rows: 3960
    progress: 7.803%
    wait time: 0:00:42.598217s
    

    row: 310, total_rows: 3960
    progress: 7.828%
    wait time: 0:00:42.590586s
    

    row: 311, total_rows: 3960
    progress: 7.854%
    wait time: 0:00:42.606424s
    

    row: 312, total_rows: 3960
    progress: 7.879%
    wait time: 0:00:42.610360s
    

    row: 313, total_rows: 3960
    progress: 7.904%
    wait time: 0:00:42.544230s
    

    row: 314, total_rows: 3960
    progress: 7.929%
    wait time: 0:00:42.548187s
    

    row: 315, total_rows: 3960
    progress: 7.955%
    wait time: 0:00:42.517308s
    

    row: 316, total_rows: 3960
    progress: 7.98%
    wait time: 0:00:42.463472s
    

    row: 317, total_r


    row: 419, total_rows: 3960
    progress: 10.581%
    wait time: 0:00:40.060660s
    

    row: 420, total_rows: 3960
    progress: 10.606%
    wait time: 0:00:40.055225s
    

    row: 421, total_rows: 3960
    progress: 10.631%
    wait time: 0:00:40.058172s
    

    row: 422, total_rows: 3960
    progress: 10.657%
    wait time: 0:00:40.019088s
    

    row: 423, total_rows: 3960
    progress: 10.682%
    wait time: 0:00:39.980152s
    

    row: 424, total_rows: 3960
    progress: 10.707%
    wait time: 0:00:39.966397s
    

    row: 425, total_rows: 3960
    progress: 10.732%
    wait time: 0:00:39.911026s
    

    row: 426, total_rows: 3960
    progress: 10.758%
    wait time: 0:00:39.889111s
    

    row: 427, total_rows: 3960
    progress: 10.783%
    wait time: 0:00:39.842403s
    

    row: 428, total_rows: 3960
    progress: 10.808%
    wait time: 0:00:39.787620s
    

    row: 429, total_rows: 3960
    progress: 10.833%
    wait time: 0:00:39.782502s
    

    row: 


    row: 514, total_rows: 3960
    progress: 12.98%
    wait time: 0:00:38.235679s
    

    row: 515, total_rows: 3960
    progress: 13.005%
    wait time: 0:00:38.237400s
    

    row: 516, total_rows: 3960
    progress: 13.03%
    wait time: 0:00:38.205659s
    

    row: 517, total_rows: 3960
    progress: 13.056%
    wait time: 0:00:38.194012s
    

    row: 518, total_rows: 3960
    progress: 13.081%
    wait time: 0:00:38.182363s
    

    row: 519, total_rows: 3960
    progress: 13.106%
    wait time: 0:00:38.157449s
    

    row: 520, total_rows: 3960
    progress: 13.131%
    wait time: 0:00:38.112729s
    

    row: 521, total_rows: 3960
    progress: 13.157%
    wait time: 0:00:38.087978s
    

    row: 522, total_rows: 3960
    progress: 13.182%
    wait time: 0:00:38.076473s
    

    row: 523, total_rows: 3960
    progress: 13.207%
    wait time: 0:00:38.045236s
    

    row: 524, total_rows: 3960
    progress: 13.232%
    wait time: 0:00:38.000962s
    

    row: 52


    row: 611, total_rows: 3960
    progress: 15.429%
    wait time: 0:00:36.537787s
    

    row: 612, total_rows: 3960
    progress: 15.455%
    wait time: 0:00:36.538377s
    

    row: 613, total_rows: 3960
    progress: 15.48%
    wait time: 0:00:36.534773s
    

    row: 614, total_rows: 3960
    progress: 15.505%
    wait time: 0:00:36.528514s
    

    row: 615, total_rows: 3960
    progress: 15.53%
    wait time: 0:00:36.518101s
    

    row: 616, total_rows: 3960
    progress: 15.556%
    wait time: 0:00:36.502253s
    

    row: 617, total_rows: 3960
    progress: 15.581%
    wait time: 0:00:36.481000s
    

    row: 618, total_rows: 3960
    progress: 15.606%
    wait time: 0:00:36.476031s
    

    row: 619, total_rows: 3960
    progress: 15.631%
    wait time: 0:00:36.454825s
    

    row: 620, total_rows: 3960
    progress: 15.657%
    wait time: 0:00:36.422876s
    

    row: 621, total_rows: 3960
    progress: 15.682%
    wait time: 0:00:36.401776s
    

    row: 62


    row: 705, total_rows: 3960
    progress: 17.803%
    wait time: 0:00:35.259850s
    

    row: 706, total_rows: 3960
    progress: 17.828%
    wait time: 0:00:35.235995s
    

    row: 707, total_rows: 3960
    progress: 17.854%
    wait time: 0:00:35.235206s
    

    row: 708, total_rows: 3960
    progress: 17.879%
    wait time: 0:00:35.215998s
    

    row: 709, total_rows: 3960
    progress: 17.904%
    wait time: 0:00:35.206000s
    

    row: 710, total_rows: 3960
    progress: 17.929%
    wait time: 0:00:35.200579s
    

    row: 711, total_rows: 3960
    progress: 17.955%
    wait time: 0:00:35.176845s
    

    row: 712, total_rows: 3960
    progress: 17.98%
    wait time: 0:00:35.166855s
    

    row: 713, total_rows: 3960
    progress: 18.005%
    wait time: 0:00:35.152300s
    

    row: 714, total_rows: 3960
    progress: 18.03%
    wait time: 0:00:35.142310s
    

    row: 715, total_rows: 3960
    progress: 18.056%
    wait time: 0:00:35.118690s
    

    row: 71


    row: 803, total_rows: 3960
    progress: 20.278%
    wait time: 0:00:33.851526s
    

    row: 804, total_rows: 3960
    progress: 20.303%
    wait time: 0:00:33.841934s
    

    row: 805, total_rows: 3960
    progress: 20.328%
    wait time: 0:00:33.828411s
    

    row: 806, total_rows: 3960
    progress: 20.354%
    wait time: 0:00:33.830567s
    

    row: 807, total_rows: 3960
    progress: 20.379%
    wait time: 0:00:33.809216s
    

    row: 808, total_rows: 3960
    progress: 20.404%
    wait time: 0:00:33.791803s
    

    row: 809, total_rows: 3960
    progress: 20.429%
    wait time: 0:00:33.770514s
    

    row: 810, total_rows: 3960
    progress: 20.455%
    wait time: 0:00:33.760933s
    

    row: 811, total_rows: 3960
    progress: 20.48%
    wait time: 0:00:33.759123s
    

    row: 812, total_rows: 3960
    progress: 20.505%
    wait time: 0:00:33.749524s
    

    row: 813, total_rows: 3960
    progress: 20.53%
    wait time: 0:00:33.728300s
    

    row: 81


    row: 900, total_rows: 3960
    progress: 22.727%
    wait time: 0:00:32.508025s
    

    row: 901, total_rows: 3960
    progress: 22.753%
    wait time: 0:00:32.515705s
    

    row: 902, total_rows: 3960
    progress: 22.778%
    wait time: 0:00:32.509759s
    

    row: 903, total_rows: 3960
    progress: 22.803%
    wait time: 0:00:32.510575s
    

    row: 904, total_rows: 3960
    progress: 22.828%
    wait time: 0:00:32.491058s
    

    row: 905, total_rows: 3960
    progress: 22.854%
    wait time: 0:00:32.468188s
    

    row: 906, total_rows: 3960
    progress: 22.879%
    wait time: 0:00:32.458847s
    

    row: 907, total_rows: 3960
    progress: 22.904%
    wait time: 0:00:32.439398s
    

    row: 908, total_rows: 3960
    progress: 22.929%
    wait time: 0:00:32.416609s
    

    row: 909, total_rows: 3960
    progress: 22.955%
    wait time: 0:00:32.417370s
    

    row: 910, total_rows: 3960
    progress: 22.98%
    wait time: 0:00:32.397970s
    

    row: 9


    row: 994, total_rows: 3960
    progress: 25.101%
    wait time: 0:00:31.434144s
    

    row: 995, total_rows: 3960
    progress: 25.126%
    wait time: 0:00:31.415827s
    

    row: 996, total_rows: 3960
    progress: 25.152%
    wait time: 0:00:31.407965s
    

    row: 997, total_rows: 3960
    progress: 25.177%
    wait time: 0:00:31.389673s
    

    row: 998, total_rows: 3960
    progress: 25.202%
    wait time: 0:00:31.380314s
    

    row: 999, total_rows: 3960
    progress: 25.227%
    wait time: 0:00:31.373919s
    

    row: 1000, total_rows: 3960
    progress: 25.253%
    wait time: 0:00:31.352699s
    

    row: 1001, total_rows: 3960
    progress: 25.278%
    wait time: 0:00:31.340383s
    

    row: 1002, total_rows: 3960
    progress: 25.303%
    wait time: 0:00:31.333984s
    

    row: 1003, total_rows: 3960
    progress: 25.328%
    wait time: 0:00:31.324627s
    

    row: 1004, total_rows: 3960
    progress: 25.354%
    wait time: 0:00:31.324096s
    

    


    row: 1087, total_rows: 3960
    progress: 27.449%
    wait time: 0:00:30.379124s
    

    row: 1088, total_rows: 3960
    progress: 27.475%
    wait time: 0:00:30.359131s
    

    row: 1089, total_rows: 3960
    progress: 27.5%
    wait time: 0:00:30.341804s
    

    row: 1090, total_rows: 3960
    progress: 27.525%
    wait time: 0:00:30.324490s
    

    row: 1091, total_rows: 3960
    progress: 27.551%
    wait time: 0:00:30.317725s
    

    row: 1092, total_rows: 3960
    progress: 27.576%
    wait time: 0:00:30.318836s
    

    row: 1093, total_rows: 3960
    progress: 27.601%
    wait time: 0:00:30.306788s
    

    row: 1094, total_rows: 3960
    progress: 27.626%
    wait time: 0:00:30.299990s
    

    row: 1095, total_rows: 3960
    progress: 27.652%
    wait time: 0:00:30.293183s
    

    row: 1096, total_rows: 3960
    progress: 27.677%
    wait time: 0:00:30.291594s
    

    row: 1097, total_rows: 3960
    progress: 27.702%
    wait time: 0:00:30.282148s
    




    row: 1179, total_rows: 3960
    progress: 29.773%
    wait time: 0:00:29.412401s
    

    row: 1180, total_rows: 3960
    progress: 29.798%
    wait time: 0:00:29.400486s
    

    row: 1181, total_rows: 3960
    progress: 29.823%
    wait time: 0:00:29.390934s
    

    row: 1182, total_rows: 3960
    progress: 29.848%
    wait time: 0:00:29.376673s
    

    row: 1183, total_rows: 3960
    progress: 29.874%
    wait time: 0:00:29.364770s
    

    row: 1184, total_rows: 3960
    progress: 29.899%
    wait time: 0:00:29.352871s
    

    row: 1185, total_rows: 3960
    progress: 29.924%
    wait time: 0:00:29.343318s
    

    row: 1186, total_rows: 3960
    progress: 29.949%
    wait time: 0:00:29.333764s
    

    row: 1187, total_rows: 3960
    progress: 29.975%
    wait time: 0:00:29.326545s
    

    row: 1188, total_rows: 3960
    progress: 30.0%
    wait time: 0:00:29.316982s
    

    row: 1189, total_rows: 3960
    progress: 30.025%
    wait time: 0:00:29.300418s
    




    row: 1272, total_rows: 3960
    progress: 32.121%
    wait time: 0:00:28.338511s
    

    row: 1273, total_rows: 3960
    progress: 32.146%
    wait time: 0:00:28.333181s
    

    row: 1274, total_rows: 3960
    progress: 32.172%
    wait time: 0:00:28.321508s
    

    row: 1275, total_rows: 3960
    progress: 32.197%
    wait time: 0:00:28.311945s
    

    row: 1276, total_rows: 3960
    progress: 32.222%
    wait time: 0:00:28.296064s
    

    row: 1277, total_rows: 3960
    progress: 32.247%
    wait time: 0:00:28.284401s
    

    row: 1278, total_rows: 3960
    progress: 32.273%
    wait time: 0:00:28.276941s
    

    row: 1279, total_rows: 3960
    progress: 32.298%
    wait time: 0:00:28.258985s
    

    row: 1280, total_rows: 3960
    progress: 32.323%
    wait time: 0:00:28.245236s
    

    row: 1281, total_rows: 3960
    progress: 32.348%
    wait time: 0:00:28.233587s
    

    row: 1282, total_rows: 3960
    progress: 32.374%
    wait time: 0:00:28.226124s
    


    row: 1366, total_rows: 3960
    progress: 34.495%
    wait time: 0:00:27.354177s
    

    row: 1367, total_rows: 3960
    progress: 34.52%
    wait time: 0:00:27.344514s
    

    row: 1368, total_rows: 3960
    progress: 34.545%
    wait time: 0:00:27.336745s
    

    row: 1369, total_rows: 3960
    progress: 34.571%
    wait time: 0:00:27.332757s
    

    row: 1370, total_rows: 3960
    progress: 34.596%
    wait time: 0:00:27.324972s
    

    row: 1371, total_rows: 3960
    progress: 34.621%
    wait time: 0:00:27.315290s
    

    row: 1372, total_rows: 3960
    progress: 34.646%
    wait time: 0:00:27.305606s
    

    row: 1373, total_rows: 3960
    progress: 34.672%
    wait time: 0:00:27.299693s
    

    row: 1374, total_rows: 3960
    progress: 34.697%
    wait time: 0:00:27.293769s
    

    row: 1375, total_rows: 3960
    progress: 34.722%
    wait time: 0:00:27.284070s
    

    row: 1376, total_rows: 3960
    progress: 34.747%
    wait time: 0:00:27.281889s
    



    row: 1457, total_rows: 3960
    progress: 36.793%
    wait time: 0:00:26.410491s
    

    row: 1458, total_rows: 3960
    progress: 36.818%
    wait time: 0:00:26.404162s
    

    row: 1459, total_rows: 3960
    progress: 36.843%
    wait time: 0:00:26.397823s
    

    row: 1460, total_rows: 3960
    progress: 36.869%
    wait time: 0:00:26.384620s
    

    row: 1461, total_rows: 3960
    progress: 36.894%
    wait time: 0:00:26.371422s
    

    row: 1462, total_rows: 3960
    progress: 36.919%
    wait time: 0:00:26.361651s
    

    row: 1463, total_rows: 3960
    progress: 36.944%
    wait time: 0:00:26.346753s
    

    row: 1464, total_rows: 3960
    progress: 36.97%
    wait time: 0:00:26.336983s
    

    row: 1465, total_rows: 3960
    progress: 36.995%
    wait time: 0:00:26.325508s
    

    row: 1466, total_rows: 3960
    progress: 37.02%
    wait time: 0:00:26.317439s
    

    row: 1467, total_rows: 3960
    progress: 37.045%
    wait time: 0:00:26.300860s
    




    row: 1551, total_rows: 3960
    progress: 39.167%
    wait time: 0:00:25.407847s
    

    row: 1552, total_rows: 3960
    progress: 39.192%
    wait time: 0:00:25.401124s
    

    row: 1553, total_rows: 3960
    progress: 39.217%
    wait time: 0:00:25.392842s
    

    row: 1554, total_rows: 3960
    progress: 39.242%
    wait time: 0:00:25.383006s
    

    row: 1555, total_rows: 3960
    progress: 39.268%
    wait time: 0:00:25.368523s
    

    row: 1556, total_rows: 3960
    progress: 39.293%
    wait time: 0:00:25.358690s
    

    row: 1557, total_rows: 3960
    progress: 39.318%
    wait time: 0:00:25.348851s
    

    row: 1558, total_rows: 3960
    progress: 39.343%
    wait time: 0:00:25.337470s
    

    row: 1559, total_rows: 3960
    progress: 39.369%
    wait time: 0:00:25.327633s
    

    row: 1560, total_rows: 3960
    progress: 39.394%
    wait time: 0:00:25.316255s
    

    row: 1561, total_rows: 3960
    progress: 39.419%
    wait time: 0:00:25.301800s
    


    row: 1644, total_rows: 3960
    progress: 41.515%
    wait time: 0:00:24.422756s
    

    row: 1645, total_rows: 3960
    progress: 41.54%
    wait time: 0:00:24.415682s
    

    row: 1646, total_rows: 3960
    progress: 41.566%
    wait time: 0:00:24.408600s
    

    row: 1647, total_rows: 3960
    progress: 41.591%
    wait time: 0:00:24.398701s
    

    row: 1648, total_rows: 3960
    progress: 41.616%
    wait time: 0:00:24.388800s
    

    row: 1649, total_rows: 3960
    progress: 41.641%
    wait time: 0:00:24.377495s
    

    row: 1650, total_rows: 3960
    progress: 41.667%
    wait time: 0:00:24.364790s
    

    row: 1651, total_rows: 3960
    progress: 41.692%
    wait time: 0:00:24.356289s
    

    row: 1652, total_rows: 3960
    progress: 41.717%
    wait time: 0:00:24.349183s
    

    row: 1653, total_rows: 3960
    progress: 41.742%
    wait time: 0:00:24.340671s
    

    row: 1654, total_rows: 3960
    progress: 41.768%
    wait time: 0:00:24.330761s
    



    row: 1747, total_rows: 3960
    progress: 44.116%
    wait time: 0:00:23.428940s
    

    row: 1748, total_rows: 3960
    progress: 44.141%
    wait time: 0:00:23.420155s
    

    row: 1749, total_rows: 3960
    progress: 44.167%
    wait time: 0:00:23.410193s
    

    row: 1750, total_rows: 3960
    progress: 44.192%
    wait time: 0:00:23.396345s
    

    row: 1751, total_rows: 3960
    progress: 44.217%
    wait time: 0:00:23.383768s
    

    row: 1752, total_rows: 3960
    progress: 44.242%
    wait time: 0:00:23.380023s
    

    row: 1753, total_rows: 3960
    progress: 44.268%
    wait time: 0:00:23.378786s
    

    row: 1754, total_rows: 3960
    progress: 44.293%
    wait time: 0:00:23.368717s
    

    row: 1755, total_rows: 3960
    progress: 44.318%
    wait time: 0:00:23.358647s
    

    row: 1756, total_rows: 3960
    progress: 44.343%
    wait time: 0:00:23.348577s
    

    row: 1757, total_rows: 3960
    progress: 44.369%
    wait time: 0:00:23.338505s
    


    row: 1847, total_rows: 3960
    progress: 46.641%
    wait time: 0:00:22.475407s
    

    row: 1848, total_rows: 3960
    progress: 46.667%
    wait time: 0:00:22.461765s
    

    row: 1849, total_rows: 3960
    progress: 46.692%
    wait time: 0:00:22.454986s
    

    row: 1850, total_rows: 3960
    progress: 46.717%
    wait time: 0:00:22.444774s
    

    row: 1851, total_rows: 3960
    progress: 46.742%
    wait time: 0:00:22.434561s
    

    row: 1852, total_rows: 3960
    progress: 46.768%
    wait time: 0:00:22.427766s
    

    row: 1853, total_rows: 3960
    progress: 46.793%
    wait time: 0:00:22.419825s
    

    row: 1854, total_rows: 3960
    progress: 46.818%
    wait time: 0:00:22.411877s
    

    row: 1855, total_rows: 3960
    progress: 46.843%
    wait time: 0:00:22.398246s
    

    row: 1856, total_rows: 3960
    progress: 46.869%
    wait time: 0:00:22.385754s
    

    row: 1857, total_rows: 3960
    progress: 46.894%
    wait time: 0:00:22.374401s
    


    row: 1940, total_rows: 3960
    progress: 48.99%
    wait time: 0:00:21.418122s
    

    row: 1941, total_rows: 3960
    progress: 49.015%
    wait time: 0:00:21.405861s
    

    row: 1942, total_rows: 3960
    progress: 49.04%
    wait time: 0:00:21.395682s
    

    row: 1943, total_rows: 3960
    progress: 49.066%
    wait time: 0:00:21.386542s
    

    row: 1944, total_rows: 3960
    progress: 49.091%
    wait time: 0:00:21.372208s
    

    row: 1945, total_rows: 3960
    progress: 49.116%
    wait time: 0:00:21.364104s
    

    row: 1946, total_rows: 3960
    progress: 49.141%
    wait time: 0:00:21.353924s
    

    row: 1947, total_rows: 3960
    progress: 49.167%
    wait time: 0:00:21.342707s
    

    row: 1948, total_rows: 3960
    progress: 49.192%
    wait time: 0:00:21.332526s
    

    row: 1949, total_rows: 3960
    progress: 49.217%
    wait time: 0:00:21.321311s
    

    row: 1950, total_rows: 3960
    progress: 49.242%
    wait time: 0:00:21.314224s
    




    row: 2034, total_rows: 3960
    progress: 51.364%
    wait time: 0:00:20.380889s
    

    row: 2035, total_rows: 3960
    progress: 51.389%
    wait time: 0:00:20.370712s
    

    row: 2036, total_rows: 3960
    progress: 51.414%
    wait time: 0:00:20.363371s
    

    row: 2037, total_rows: 3960
    progress: 51.439%
    wait time: 0:00:20.350355s
    

    row: 2038, total_rows: 3960
    progress: 51.465%
    wait time: 0:00:20.338288s
    

    row: 2039, total_rows: 3960
    progress: 51.49%
    wait time: 0:00:20.322451s
    

    row: 2040, total_rows: 3960
    progress: 51.515%
    wait time: 0:00:20.309452s
    

    row: 2041, total_rows: 3960
    progress: 51.54%
    wait time: 0:00:20.300222s
    

    row: 2042, total_rows: 3960
    progress: 51.566%
    wait time: 0:00:20.289109s
    

    row: 2043, total_rows: 3960
    progress: 51.591%
    wait time: 0:00:20.274239s
    

    row: 2044, total_rows: 3960
    progress: 51.616%
    wait time: 0:00:20.262193s
    




    row: 2130, total_rows: 3960
    progress: 53.788%
    wait time: 0:00:19.297990s
    

    row: 2131, total_rows: 3960
    progress: 53.813%
    wait time: 0:00:19.285266s
    

    row: 2132, total_rows: 3960
    progress: 53.838%
    wait time: 0:00:19.274262s
    

    row: 2133, total_rows: 3960
    progress: 53.864%
    wait time: 0:00:19.263260s
    

    row: 2134, total_rows: 3960
    progress: 53.889%
    wait time: 0:00:19.253115s
    

    row: 2135, total_rows: 3960
    progress: 53.914%
    wait time: 0:00:19.244681s
    

    row: 2136, total_rows: 3960
    progress: 53.939%
    wait time: 0:00:19.233678s
    

    row: 2137, total_rows: 3960
    progress: 53.965%
    wait time: 0:00:19.223531s
    

    row: 2138, total_rows: 3960
    progress: 53.99%
    wait time: 0:00:19.215088s
    

    row: 2139, total_rows: 3960
    progress: 54.015%
    wait time: 0:00:19.205790s
    

    row: 2140, total_rows: 3960
    progress: 54.04%
    wait time: 0:00:19.197339s
    




    row: 2223, total_rows: 3960
    progress: 56.136%
    wait time: 0:00:18.286906s
    

    row: 2224, total_rows: 3960
    progress: 56.162%
    wait time: 0:00:18.275972s
    

    row: 2225, total_rows: 3960
    progress: 56.187%
    wait time: 0:00:18.264656s
    

    row: 2226, total_rows: 3960
    progress: 56.212%
    wait time: 0:00:18.253725s
    

    row: 2227, total_rows: 3960
    progress: 56.237%
    wait time: 0:00:18.242795s
    

    row: 2228, total_rows: 3960
    progress: 56.263%
    wait time: 0:00:18.234200s
    

    row: 2229, total_rows: 3960
    progress: 56.288%
    wait time: 0:00:18.224047s
    

    row: 2230, total_rows: 3960
    progress: 56.313%
    wait time: 0:00:18.215445s
    

    row: 2231, total_rows: 3960
    progress: 56.338%
    wait time: 0:00:18.202186s
    

    row: 2232, total_rows: 3960
    progress: 56.364%
    wait time: 0:00:18.193582s
    

    row: 2233, total_rows: 3960
    progress: 56.389%
    wait time: 0:00:18.181103s
    


    row: 2316, total_rows: 3960
    progress: 58.485%
    wait time: 0:00:17.303767s
    

    row: 2317, total_rows: 3960
    progress: 58.51%
    wait time: 0:00:17.292166s
    

    row: 2318, total_rows: 3960
    progress: 58.535%
    wait time: 0:00:17.281984s
    

    row: 2319, total_rows: 3960
    progress: 58.561%
    wait time: 0:00:17.268970s
    

    row: 2320, total_rows: 3960
    progress: 58.586%
    wait time: 0:00:17.258083s
    

    row: 2321, total_rows: 3960
    progress: 58.611%
    wait time: 0:00:17.245783s
    

    row: 2322, total_rows: 3960
    progress: 58.636%
    wait time: 0:00:17.232780s
    

    row: 2323, total_rows: 3960
    progress: 58.662%
    wait time: 0:00:17.221194s
    

    row: 2324, total_rows: 3960
    progress: 58.687%
    wait time: 0:00:17.211724s
    

    row: 2325, total_rows: 3960
    progress: 58.712%
    wait time: 0:00:17.200139s
    

    row: 2326, total_rows: 3960
    progress: 58.737%
    wait time: 0:00:17.189261s
    



    row: 2411, total_rows: 3960
    progress: 60.884%
    wait time: 0:00:16.255620s
    

    row: 2412, total_rows: 3960
    progress: 60.909%
    wait time: 0:00:16.246742s
    

    row: 2413, total_rows: 3960
    progress: 60.934%
    wait time: 0:00:16.235293s
    

    row: 2414, total_rows: 3960
    progress: 60.96%
    wait time: 0:00:16.225128s
    

    row: 2415, total_rows: 3960
    progress: 60.985%
    wait time: 0:00:16.214962s
    

    row: 2416, total_rows: 3960
    progress: 61.01%
    wait time: 0:00:16.202238s
    

    row: 2417, total_rows: 3960
    progress: 61.035%
    wait time: 0:00:16.190796s
    

    row: 2418, total_rows: 3960
    progress: 61.061%
    wait time: 0:00:16.180632s
    

    row: 2419, total_rows: 3960
    progress: 61.086%
    wait time: 0:00:16.168555s
    

    row: 2420, total_rows: 3960
    progress: 61.111%
    wait time: 0:00:16.158392s
    

    row: 2421, total_rows: 3960
    progress: 61.136%
    wait time: 0:00:16.149501s
    




    row: 2505, total_rows: 3960
    progress: 63.258%
    wait time: 0:00:15.239477s
    

    row: 2506, total_rows: 3960
    progress: 63.283%
    wait time: 0:00:15.229023s
    

    row: 2507, total_rows: 3960
    progress: 63.308%
    wait time: 0:00:15.218279s
    

    row: 2508, total_rows: 3960
    progress: 63.333%
    wait time: 0:00:15.208695s
    

    row: 2509, total_rows: 3960
    progress: 63.359%
    wait time: 0:00:15.197373s
    

    row: 2510, total_rows: 3960
    progress: 63.384%
    wait time: 0:00:15.185474s
    

    row: 2511, total_rows: 3960
    progress: 63.409%
    wait time: 0:00:15.173001s
    

    row: 2512, total_rows: 3960
    progress: 63.434%
    wait time: 0:00:15.162840s
    

    row: 2513, total_rows: 3960
    progress: 63.46%
    wait time: 0:00:15.153255s
    

    row: 2514, total_rows: 3960
    progress: 63.485%
    wait time: 0:00:15.141366s
    

    row: 2515, total_rows: 3960
    progress: 63.51%
    wait time: 0:00:15.129479s
    




    row: 2596, total_rows: 3960
    progress: 65.556%
    wait time: 0:00:14.288587s
    

    row: 2597, total_rows: 3960
    progress: 65.581%
    wait time: 0:00:14.277342s
    

    row: 2598, total_rows: 3960
    progress: 65.606%
    wait time: 0:00:14.267672s
    

    row: 2599, total_rows: 3960
    progress: 65.631%
    wait time: 0:00:14.259049s
    

    row: 2600, total_rows: 3960
    progress: 65.657%
    wait time: 0:00:14.248327s
    

    row: 2601, total_rows: 3960
    progress: 65.682%
    wait time: 0:00:14.236560s
    

    row: 2602, total_rows: 3960
    progress: 65.707%
    wait time: 0:00:14.226363s
    

    row: 2603, total_rows: 3960
    progress: 65.732%
    wait time: 0:00:14.218253s
    

    row: 2604, total_rows: 3960
    progress: 65.758%
    wait time: 0:00:14.209095s
    

    row: 2605, total_rows: 3960
    progress: 65.783%
    wait time: 0:00:14.197851s
    

    row: 2606, total_rows: 3960
    progress: 65.808%
    wait time: 0:00:14.186609s
    


    row: 2693, total_rows: 3960
    progress: 68.005%
    wait time: 0:00:13.236471s
    

    row: 2694, total_rows: 3960
    progress: 68.03%
    wait time: 0:00:13.227700s
    

    row: 2695, total_rows: 3960
    progress: 68.056%
    wait time: 0:00:13.217985s
    

    row: 2696, total_rows: 3960
    progress: 68.081%
    wait time: 0:00:13.208268s
    

    row: 2697, total_rows: 3960
    progress: 68.106%
    wait time: 0:00:13.199956s
    

    row: 2698, total_rows: 3960
    progress: 68.131%
    wait time: 0:00:13.191170s
    

    row: 2699, total_rows: 3960
    progress: 68.157%
    wait time: 0:00:13.181913s
    

    row: 2700, total_rows: 3960
    progress: 68.182%
    wait time: 0:00:13.173588s
    

    row: 2701, total_rows: 3960
    progress: 68.207%
    wait time: 0:00:13.163857s
    

    row: 2702, total_rows: 3960
    progress: 68.232%
    wait time: 0:00:13.155057s
    

    row: 2703, total_rows: 3960
    progress: 68.258%
    wait time: 0:00:13.146253s
    



    row: 2791, total_rows: 3960
    progress: 70.48%
    wait time: 0:00:12.260496s
    

    row: 2792, total_rows: 3960
    progress: 70.505%
    wait time: 0:00:12.250226s
    

    row: 2793, total_rows: 3960
    progress: 70.53%
    wait time: 0:00:12.238701s
    

    row: 2794, total_rows: 3960
    progress: 70.556%
    wait time: 0:00:12.226343s
    

    row: 2795, total_rows: 3960
    progress: 70.581%
    wait time: 0:00:12.216494s
    

    row: 2796, total_rows: 3960
    progress: 70.606%
    wait time: 0:00:12.205809s
    

    row: 2797, total_rows: 3960
    progress: 70.631%
    wait time: 0:00:12.196373s
    

    row: 2798, total_rows: 3960
    progress: 70.657%
    wait time: 0:00:12.185687s
    

    row: 2799, total_rows: 3960
    progress: 70.682%
    wait time: 0:00:12.174587s
    

    row: 2800, total_rows: 3960
    progress: 70.707%
    wait time: 0:00:12.165147s
    

    row: 2801, total_rows: 3960
    progress: 70.732%
    wait time: 0:00:12.154462s
    




    row: 2886, total_rows: 3960
    progress: 72.879%
    wait time: 0:00:11.243848s
    

    row: 2887, total_rows: 3960
    progress: 72.904%
    wait time: 0:00:11.232836s
    

    row: 2888, total_rows: 3960
    progress: 72.929%
    wait time: 0:00:11.222196s
    

    row: 2889, total_rows: 3960
    progress: 72.955%
    wait time: 0:00:11.211929s
    

    row: 2890, total_rows: 3960
    progress: 72.98%
    wait time: 0:00:11.202031s
    

    row: 2891, total_rows: 3960
    progress: 73.005%
    wait time: 0:00:11.192132s
    

    row: 2892, total_rows: 3960
    progress: 73.03%
    wait time: 0:00:11.181862s
    

    row: 2893, total_rows: 3960
    progress: 73.056%
    wait time: 0:00:11.171222s
    

    row: 2894, total_rows: 3960
    progress: 73.081%
    wait time: 0:00:11.159845s
    

    row: 2895, total_rows: 3960
    progress: 73.106%
    wait time: 0:00:11.149207s
    

    row: 2896, total_rows: 3960
    progress: 73.131%
    wait time: 0:00:11.138937s
    




    row: 2979, total_rows: 3960
    progress: 75.227%
    wait time: 0:00:10.271621s
    

    row: 2980, total_rows: 3960
    progress: 75.253%
    wait time: 0:00:10.262644s
    

    row: 2981, total_rows: 3960
    progress: 75.278%
    wait time: 0:00:10.252349s
    

    row: 2982, total_rows: 3960
    progress: 75.303%
    wait time: 0:00:10.242053s
    

    row: 2983, total_rows: 3960
    progress: 75.328%
    wait time: 0:00:10.231429s
    

    row: 2984, total_rows: 3960
    progress: 75.354%
    wait time: 0:00:10.219495s
    

    row: 2985, total_rows: 3960
    progress: 75.379%
    wait time: 0:00:10.208220s
    

    row: 2986, total_rows: 3960
    progress: 75.404%
    wait time: 0:00:10.197273s
    

    row: 2987, total_rows: 3960
    progress: 75.429%
    wait time: 0:00:10.186327s
    

    row: 2988, total_rows: 3960
    progress: 75.455%
    wait time: 0:00:10.174732s
    

    row: 2989, total_rows: 3960
    progress: 75.48%
    wait time: 0:00:10.164766s
    



    row: 3072, total_rows: 3960
    progress: 77.576%
    wait time: 0:00:09.288925s
    

    row: 3073, total_rows: 3960
    progress: 77.601%
    wait time: 0:00:09.277468s
    

    row: 3074, total_rows: 3960
    progress: 77.626%
    wait time: 0:00:09.265725s
    

    row: 3075, total_rows: 3960
    progress: 77.652%
    wait time: 0:00:09.256290s
    

    row: 3076, total_rows: 3960
    progress: 77.677%
    wait time: 0:00:09.245989s
    

    row: 3077, total_rows: 3960
    progress: 77.702%
    wait time: 0:00:09.235114s
    

    row: 3078, total_rows: 3960
    progress: 77.727%
    wait time: 0:00:09.224526s
    

    row: 3079, total_rows: 3960
    progress: 77.753%
    wait time: 0:00:09.213366s
    

    row: 3080, total_rows: 3960
    progress: 77.778%
    wait time: 0:00:09.203352s
    

    row: 3081, total_rows: 3960
    progress: 77.803%
    wait time: 0:00:09.192766s
    

    row: 3082, total_rows: 3960
    progress: 77.828%
    wait time: 0:00:09.181324s
    


    row: 3164, total_rows: 3960
    progress: 79.899%
    wait time: 0:00:08.319386s
    

    row: 3165, total_rows: 3960
    progress: 79.924%
    wait time: 0:00:08.308321s
    

    row: 3166, total_rows: 3960
    progress: 79.949%
    wait time: 0:00:08.298261s
    

    row: 3167, total_rows: 3960
    progress: 79.975%
    wait time: 0:00:08.288702s
    

    row: 3168, total_rows: 3960
    progress: 80.0%
    wait time: 0:00:08.278389s
    

    row: 3169, total_rows: 3960
    progress: 80.025%
    wait time: 0:00:08.268575s
    

    row: 3170, total_rows: 3960
    progress: 80.051%
    wait time: 0:00:08.257512s
    

    row: 3171, total_rows: 3960
    progress: 80.076%
    wait time: 0:00:08.247697s
    

    row: 3172, total_rows: 3960
    progress: 80.101%
    wait time: 0:00:08.236884s
    

    row: 3173, total_rows: 3960
    progress: 80.126%
    wait time: 0:00:08.226321s
    

    row: 3174, total_rows: 3960
    progress: 80.152%
    wait time: 0:00:08.215511s
    




    row: 3257, total_rows: 3960
    progress: 82.247%
    wait time: 0:00:07.342623s
    

    row: 3258, total_rows: 3960
    progress: 82.273%
    wait time: 0:00:07.331869s
    

    row: 3259, total_rows: 3960
    progress: 82.298%
    wait time: 0:00:07.321762s
    

    row: 3260, total_rows: 3960
    progress: 82.323%
    wait time: 0:00:07.312298s
    

    row: 3261, total_rows: 3960
    progress: 82.348%
    wait time: 0:00:07.302402s
    

    row: 3262, total_rows: 3960
    progress: 82.374%
    wait time: 0:00:07.292075s
    

    row: 3263, total_rows: 3960
    progress: 82.399%
    wait time: 0:00:07.282176s
    

    row: 3264, total_rows: 3960
    progress: 82.424%
    wait time: 0:00:07.271848s
    

    row: 3265, total_rows: 3960
    progress: 82.449%
    wait time: 0:00:07.262159s
    

    row: 3266, total_rows: 3960
    progress: 82.475%
    wait time: 0:00:07.252042s
    

    row: 3267, total_rows: 3960
    progress: 82.5%
    wait time: 0:00:07.241286s
    




    row: 3350, total_rows: 3960
    progress: 84.596%
    wait time: 0:00:06.369721s
    

    row: 3351, total_rows: 3960
    progress: 84.621%
    wait time: 0:00:06.359746s
    

    row: 3352, total_rows: 3960
    progress: 84.646%
    wait time: 0:00:06.349588s
    

    row: 3353, total_rows: 3960
    progress: 84.672%
    wait time: 0:00:06.339428s
    

    row: 3354, total_rows: 3960
    progress: 84.697%
    wait time: 0:00:06.329086s
    

    row: 3355, total_rows: 3960
    progress: 84.722%
    wait time: 0:00:06.318745s
    

    row: 3356, total_rows: 3960
    progress: 84.747%
    wait time: 0:00:06.308402s
    

    row: 3357, total_rows: 3960
    progress: 84.773%
    wait time: 0:00:06.298419s
    

    row: 3358, total_rows: 3960
    progress: 84.798%
    wait time: 0:00:06.288614s
    

    row: 3359, total_rows: 3960
    progress: 84.823%
    wait time: 0:00:06.279701s
    

    row: 3360, total_rows: 3960
    progress: 84.848%
    wait time: 0:00:06.270425s
    


    row: 3445, total_rows: 3960
    progress: 86.995%
    wait time: 0:00:05.396852s
    

    row: 3446, total_rows: 3960
    progress: 87.02%
    wait time: 0:00:05.386899s
    

    row: 3447, total_rows: 3960
    progress: 87.045%
    wait time: 0:00:05.376647s
    

    row: 3448, total_rows: 3960
    progress: 87.071%
    wait time: 0:00:05.366096s
    

    row: 3449, total_rows: 3960
    progress: 87.096%
    wait time: 0:00:05.355694s
    

    row: 3450, total_rows: 3960
    progress: 87.121%
    wait time: 0:00:05.345439s
    

    row: 3451, total_rows: 3960
    progress: 87.146%
    wait time: 0:00:05.335331s
    

    row: 3452, total_rows: 3960
    progress: 87.172%
    wait time: 0:00:05.324338s
    

    row: 3453, total_rows: 3960
    progress: 87.197%
    wait time: 0:00:05.313787s
    

    row: 3454, total_rows: 3960
    progress: 87.222%
    wait time: 0:00:05.303091s
    

    row: 3455, total_rows: 3960
    progress: 87.247%
    wait time: 0:00:05.292103s
    



    row: 3545, total_rows: 3960
    progress: 89.52%
    wait time: 0:00:04.360170s
    

    row: 3546, total_rows: 3960
    progress: 89.545%
    wait time: 0:00:04.349371s
    

    row: 3547, total_rows: 3960
    progress: 89.571%
    wait time: 0:00:04.338342s
    

    row: 3548, total_rows: 3960
    progress: 89.596%
    wait time: 0:00:04.327896s
    

    row: 3549, total_rows: 3960
    progress: 89.621%
    wait time: 0:00:04.317450s
    

    row: 3550, total_rows: 3960
    progress: 89.646%
    wait time: 0:00:04.306541s
    

    row: 3551, total_rows: 3960
    progress: 89.672%
    wait time: 0:00:04.296096s
    

    row: 3552, total_rows: 3960
    progress: 89.697%
    wait time: 0:00:04.285650s
    

    row: 3553, total_rows: 3960
    progress: 89.722%
    wait time: 0:00:04.275204s
    

    row: 3554, total_rows: 3960
    progress: 89.747%
    wait time: 0:00:04.264186s
    

    row: 3555, total_rows: 3960
    progress: 89.773%
    wait time: 0:00:04.253969s
    



    row: 3644, total_rows: 3960
    progress: 92.02%
    wait time: 0:00:03.326612s
    

    row: 3645, total_rows: 3960
    progress: 92.045%
    wait time: 0:00:03.315867s
    

    row: 3646, total_rows: 3960
    progress: 92.071%
    wait time: 0:00:03.305210s
    

    row: 3647, total_rows: 3960
    progress: 92.096%
    wait time: 0:00:03.294553s
    

    row: 3648, total_rows: 3960
    progress: 92.121%
    wait time: 0:00:03.284240s
    

    row: 3649, total_rows: 3960
    progress: 92.146%
    wait time: 0:00:03.273584s
    

    row: 3650, total_rows: 3960
    progress: 92.172%
    wait time: 0:00:03.263270s
    

    row: 3651, total_rows: 3960
    progress: 92.197%
    wait time: 0:00:03.253038s
    

    row: 3652, total_rows: 3960
    progress: 92.222%
    wait time: 0:00:03.242636s
    

    row: 3653, total_rows: 3960
    progress: 92.247%
    wait time: 0:00:03.232148s
    

    row: 3654, total_rows: 3960
    progress: 92.273%
    wait time: 0:00:03.221325s
    



    row: 3747, total_rows: 3960
    progress: 94.621%
    wait time: 0:00:02.244448s
    

    row: 3748, total_rows: 3960
    progress: 94.646%
    wait time: 0:00:02.234108s
    

    row: 3749, total_rows: 3960
    progress: 94.672%
    wait time: 0:00:02.223596s
    

    row: 3750, total_rows: 3960
    progress: 94.697%
    wait time: 0:00:02.213084s
    

    row: 3751, total_rows: 3960
    progress: 94.722%
    wait time: 0:00:02.202739s
    

    row: 3752, total_rows: 3960
    progress: 94.747%
    wait time: 0:00:02.192392s
    

    row: 3753, total_rows: 3960
    progress: 94.773%
    wait time: 0:00:02.182099s
    

    row: 3754, total_rows: 3960
    progress: 94.798%
    wait time: 0:00:02.171528s
    

    row: 3755, total_rows: 3960
    progress: 94.823%
    wait time: 0:00:02.160793s
    

    row: 3756, total_rows: 3960
    progress: 94.848%
    wait time: 0:00:02.150333s
    

    row: 3757, total_rows: 3960
    progress: 94.874%
    wait time: 0:00:02.139871s
    


    row: 3844, total_rows: 3960
    progress: 97.071%
    wait time: 0:00:01.224890s
    

    row: 3845, total_rows: 3960
    progress: 97.096%
    wait time: 0:00:01.214285s
    

    row: 3846, total_rows: 3960
    progress: 97.121%
    wait time: 0:00:01.203798s
    

    row: 3847, total_rows: 3960
    progress: 97.146%
    wait time: 0:00:01.193252s
    

    row: 3848, total_rows: 3960
    progress: 97.172%
    wait time: 0:00:01.182676s
    

    row: 3849, total_rows: 3960
    progress: 97.197%
    wait time: 0:00:01.171985s
    

    row: 3850, total_rows: 3960
    progress: 97.222%
    wait time: 0:00:01.161583s
    

    row: 3851, total_rows: 3960
    progress: 97.247%
    wait time: 0:00:01.151064s
    

    row: 3852, total_rows: 3960
    progress: 97.273%
    wait time: 0:00:01.140432s
    

    row: 3853, total_rows: 3960
    progress: 97.298%
    wait time: 0:00:01.129913s
    

    row: 3854, total_rows: 3960
    progress: 97.323%
    wait time: 0:00:01.119393s
    


    row: 3935, total_rows: 3960
    progress: 99.369%
    wait time: 0:00:00.263832s
    

    row: 3936, total_rows: 3960
    progress: 99.394%
    wait time: 0:00:00.253276s
    

    row: 3937, total_rows: 3960
    progress: 99.419%
    wait time: 0:00:00.242731s
    

    row: 3938, total_rows: 3960
    progress: 99.444%
    wait time: 0:00:00.232180s
    

    row: 3939, total_rows: 3960
    progress: 99.47%
    wait time: 0:00:00.221634s
    

    row: 3940, total_rows: 3960
    progress: 99.495%
    wait time: 0:00:00.211087s
    

    row: 3941, total_rows: 3960
    progress: 99.52%
    wait time: 0:00:00.200540s
    

    row: 3942, total_rows: 3960
    progress: 99.545%
    wait time: 0:00:00.189997s
    

    row: 3943, total_rows: 3960
    progress: 99.571%
    wait time: 0:00:00.179452s
    

    row: 3944, total_rows: 3960
    progress: 99.596%
    wait time: 0:00:00.168886s
    

    row: 3945, total_rows: 3960
    progress: 99.621%
    wait time: 0:00:00.158332s
    



In [391]:
df_representation_v1_1 = pd.DataFrame(data_all_attributes).set_index("id").fillna(0)
df_representation_v1_1

Unnamed: 0_level_0,retro<&>num_tokens,retro<&>lenght,retro<&>num_numbs,retro<&>num_alpha,retro<&>num_with_uppercase,retro<&>num_tokens_upper,retro<&>prop_vowels,retro<&>len_max_rep_char,retro<&>max_char_fre_per_token(o),retro<&>max_char_fre_per_token(s),...,"linguistics<&>bigram<&>(',', 'evil')","linguistics<&>bigram<&>('evil', 'immoral')","linguistics<&>bigram<&>('immoral', 'disaster')","linguistics<&>trigram<&>('despicable', 'trump', ',')","linguistics<&>trigram<&>('policy', ',', 'campaign')","linguistics<&>trigram<&>('bigotry', '&', 'amp')","linguistics<&>trigram<&>('amp', ';', 'rancour')","linguistics<&>trigram<&>('close', ',', 'evil')","linguistics<&>trigram<&>(',', 'evil', 'immoral')","linguistics<&>trigram<&>('evil', 'immoral', 'disaster')"
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10000,18,96,0,18,3,1,0.253165,2,2,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10001,23,119,0,25,7,2,0.329897,2,2,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10002,19,108,0,19,2,2,0.333333,2,3,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10003,24,134,0,23,1,1,0.315315,2,1,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10004,24,125,0,25,3,0,0.352941,2,1,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40855,24,112,0,23,2,1,0.325843,2,2,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
40856,20,121,0,21,1,0,0.352941,2,1,3,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
40857,23,139,0,23,2,1,0.316239,2,2,4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
40858,14,83,2,15,5,1,0.285714,2,1,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [392]:
pickle.dump(df_representation_v1_1, open("df_representation_v1_1.pickle", "wb"))
# df_representation = pickle.load(open("df_representation_v1_1.pickle", "rb"))

In [203]:
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.feature_selection import SelectKBest, chi2

In [None]:
indexs = df_train[df_train["sen"] == sentiments[3]].index
indexsLH = df_train.loc[indexs][(df_train.loc[indexs]["int"].isin(["low", "high"]))].index

X = df_representation.loc[indexsLH]#[rel_cols]
y = df_train.loc[X.index]["int"]

dic_label_count = y.value_counts().to_dict()
min_label = min(dic_label_count.items(), key=lambda x: x[1])[0]
max_label = max(dic_label_count.items(), key=lambda x: x[1])[0]
index_label_1 = y[y==min_label].index
oversampling_steps = int(dic_label_count[max_label] / dic_label_count[min_label]) - 1
X_res, y_res = X.copy(), y.copy()

for step in range(oversampling_steps):
    new_indexs = [f"{ix}+{step + 1}" for ix in index_label_1]
    copied_sub_X = pd.DataFrame(X.loc[index_label_1].values, columns=X.columns, index=new_indexs)
    copied_sub_y = pd.Series(y.loc[index_label_1].values, index=new_indexs)
    X_res = pd.concat([X_res, copied_sub_X], axis=0)
    y_res = pd.concat([y_res, copied_sub_y], axis=0)

X_res = pd.DataFrame(StandardScaler().fit_transform(X_res), columns=X_res.columns, index=X_res.index)
selector = SelectKBest(chi2, k=X.shape[1])
X_res_ = X_res - X_res.min()
selector.fit(X_res_, y_res)
scores_selector = {col: selector.scores_[i] if str(selector.scores_[i]) != "nan" else 0 for i, col in enumerate(X.columns.tolist())}
ranked_cols = [x[0] for x in sorted(scores_selector.items(), key=lambda y: y[1], reverse=True)]
ranked_cols


In [20]:
indexs = df_train[df_train["sen"] == sentiments[3]].index
scores_selector = {col: [] for col in df_representation.columns.tolist()}
comp_l = [(l1, l2) for l1 in intensities for l2 in intensities if l1 < l2]
for l1 in intensities:
    for l2 in intensities:
        if l1 < l2:
            indexsLH = df_train.loc[indexs][(df_train.loc[indexs]["int"].isin([l1, l2]))].index

            X = df_representation.loc[indexsLH]#[rel_cols]
            y = df_train.loc[X.index]["int"]

            dic_label_count = y.value_counts().to_dict()
            min_label = min(dic_label_count.items(), key=lambda x: x[1])[0]
            max_label = max(dic_label_count.items(), key=lambda x: x[1])[0]
            index_label_1 = y[y==min_label].index
            oversampling_steps = int(dic_label_count[max_label] / dic_label_count[min_label]) - 1
            X_res, y_res = X.copy(), y.copy()

            for step in range(oversampling_steps):
                new_indexs = [f"{ix}+{step + 1}" for ix in index_label_1]
                copied_sub_X = pd.DataFrame(X.loc[index_label_1].values, columns=X.columns, index=new_indexs)
                copied_sub_y = pd.Series(y.loc[index_label_1].values, index=new_indexs)
                X_res = pd.concat([X_res, copied_sub_X], axis=0)
                y_res = pd.concat([y_res, copied_sub_y], axis=0)

            X_res = pd.DataFrame(StandardScaler().fit_transform(X_res), columns=X_res.columns, index=X_res.index)
            selector = SelectKBest(chi2, k=X.shape[1])
            X_res_ = X_res - X_res.min()
            selector.fit(X_res_, y_res)
            for i, col in enumerate(X.columns.tolist()):
                scores_selector[col] += [selector.scores_[i]] if str(selector.scores_[i]) != "nan" else [0]

ranked_cols = [x[0] for x in sorted(scores_selector.items(), key=lambda y: max(y[1]), reverse=True)]
comp_l, ranked_cols

In [28]:
ranked_cols = [x[0] for x in sorted(scores_selector.items(), key=lambda y: max(y[1]), reverse=True)]
comp_l, ranked_cols

([('low', 'medium'), ('high', 'low'), ('high', 'medium')],
 ['linguistics<&>lemma<&>depress',
  'linguistics<&>lemma<&>depressing',
  'linguistics<&>lemma<&>grim',
  'linguistics<&>lemma<&>depression',
  'linguistics<&>lemma<&>sadness',
  'linguistics<&>lemma<&>honestly',
  'linguistics<&>lemma<&>️',
  'linguistics<&>shape<&>️',
  'linguistics<&>tag<&>-RRB-',
  'linguistics<&>lemma<&>frown',
  'linguistics<&>lemma<&>)',
  'linguistics<&>shape<&>)',
  'linguistics<&>tag<&>-LRB-',
  'linguistics<&>lemma<&>either',
  'linguistics<&>lemma<&>sadly',
  'linguistics<&>lemma<&>true',
  'linguistics<&>lemma<&>(',
  'linguistics<&>shape<&>(',
  'linguistics<&>lemma<&>something',
  'linguistics<&>lemma<&>sad',
  'linguistics<&>lemma<&>blue',
  'emoji<&>_face_with_tears_of_joy_',
  'linguistics<&>lemma<&>soul',
  'linguistics<&>lemma<&>fucking',
  'linguistics<&>lemma<&>tired',
  'emoji<&>_disappointed_face_',
  'linguistics<&>lemma<&>😞',
  'linguistics<&>shape<&>😞',
  'linguistics<&>lemma<&>body'

In [204]:
from sklearn.model_selection import cross_validate
from sklearn.metrics import precision_recall_fscore_support

In [50]:
sen = sentiments[0]

# indexs = df_train[df_train["sen"] == sen].index
# indexsLH = df_train.loc[indexs][(df_train.loc[indexs]["int"].isin(["low", "high"]))].index

# X = df_representation.loc[indexsLH]#[rel_cols]
# y = df_train.loc[X.index]["int"]

# dic_label_count = y.value_counts().to_dict()
# min_label = min(dic_label_count.items(), key=lambda x: x[1])[0]
# max_label = max(dic_label_count.items(), key=lambda x: x[1])[0]
# index_label_1 = y[y==min_label].index
# oversampling_steps = int(dic_label_count[max_label] / dic_label_count[min_label]) - 1
# X_res, y_res = X.copy(), y.copy()

# for step in range(oversampling_steps):
#     new_indexs = [f"{ix}+{step + 1}" for ix in index_label_1]
#     copied_sub_X = pd.DataFrame(X.loc[index_label_1].values, columns=X.columns, index=new_indexs)
#     copied_sub_y = pd.Series(y.loc[index_label_1].values, index=new_indexs)
#     X_res = pd.concat([X_res, copied_sub_X], axis=0)
#     y_res = pd.concat([y_res, copied_sub_y], axis=0)

# X_res = pd.DataFrame(StandardScaler().fit_transform(X_res), columns=X_res.columns, index=X_res.index)
# selector = SelectKBest(chi2, k=X.shape[1])
# X_res_ = X_res - X_res.min()
# selector.fit(X_res_, y_res)
# scores_selector = {col: selector.scores_[i] if str(selector.scores_[i]) != "nan" else 0 for i, col in enumerate(X.columns.tolist())}
# ranked_cols = [x[0] for x in sorted(scores_selector.items(), key=lambda x: x[1], reverse=True)]
# ranked_cols

indexs = df_train[df_train["sen"] == sen].index
scores_selector = {col: [] for col in df_representation.columns.tolist()}
comp_l = [(l1, l2) for l1 in intensities for l2 in intensities if l1 < l2]
for l1 in intensities:
    for l2 in intensities:
        if l1 < l2:
            indexsLH = df_train.loc[indexs][(df_train.loc[indexs]["int"].isin([l1, l2]))].index

            X = df_representation.loc[indexsLH]#[rel_cols]
            y = df_train.loc[X.index]["int"]

            dic_label_count = y.value_counts().to_dict()
            min_label = min(dic_label_count.items(), key=lambda x: x[1])[0]
            max_label = max(dic_label_count.items(), key=lambda x: x[1])[0]
            index_label_1 = y[y==min_label].index
            oversampling_steps = int(dic_label_count[max_label] / dic_label_count[min_label]) - 1
            X_res, y_res = X.copy(), y.copy()

            for step in range(oversampling_steps):
                new_indexs = [f"{ix}+{step + 1}" for ix in index_label_1]
                copied_sub_X = pd.DataFrame(X.loc[index_label_1].values, columns=X.columns, index=new_indexs)
                copied_sub_y = pd.Series(y.loc[index_label_1].values, index=new_indexs)
                X_res = pd.concat([X_res, copied_sub_X], axis=0)
                y_res = pd.concat([y_res, copied_sub_y], axis=0)

            X_res = pd.DataFrame(StandardScaler().fit_transform(X_res), columns=X_res.columns, index=X_res.index)
            selector = SelectKBest(chi2, k=X.shape[1])
            X_res_ = X_res - X_res.min()
            selector.fit(X_res_, y_res)
            for i, col in enumerate(X.columns.tolist()):
                scores_selector[col] += [selector.scores_[i]] if str(selector.scores_[i]) != "nan" else [0]

ranked_cols = [x[0] for x in sorted(scores_selector.items(), key=lambda y: max(y[1]), reverse=True)]
# comp_l, ranked_cols

f1_weight = []

for num_cols in range(1, len(ranked_cols), 10):
    X = df_representation.loc[indexs][ranked_cols[:num_cols+1]]
    y = df_train.loc[X.index]["int"]

    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    clf = make_pipeline(StandardScaler(), SVC(kernel="rbf", gamma='auto', class_weight="balanced"))
    # clf.fit(X_train, y_train)
    cv_results = cross_validate(clf, X, y, cv=5, scoring="f1_weighted")
    test_score = cv_results["test_score"]
    f1_weight.append([num_cols, np.mean(test_score), np.std(test_score)])
    print(f1_weight[-1])

[1, 0.10424886540056362, 0.019309878014991615]
[11, 0.6411688350633156, 0.029228674134960844]
[21, 0.6609119066959994, 0.02203836650268399]
[31, 0.6588559737273805, 0.012008818616047677]
[41, 0.66215257900806, 0.004897267596503262]
[51, 0.6819780207011394, 0.018291375701466977]
[61, 0.6891776153090093, 0.018405652049383794]
[71, 0.6948351734809612, 0.026925106785193546]
[81, 0.698515744491627, 0.030620007364956892]
[91, 0.6995012251275626, 0.029302400431879483]
[101, 0.7001325185391131, 0.029258873024053687]
[111, 0.7115093975305629, 0.02715503116164831]
[121, 0.710112986921863, 0.02801785329972967]
[131, 0.7000463757225261, 0.03418087393899213]
[141, 0.7062307214260791, 0.03812506665278833]
[151, 0.7079706264171842, 0.03664070574426643]
[161, 0.7036688956016317, 0.038892980889282364]
[171, 0.7010408157744935, 0.04568406926917083]
[181, 0.6933352645072454, 0.032473129834692624]
[191, 0.6942896187745584, 0.04011906590259415]
[201, 0.6928814435108482, 0.043088935779638876]
[211, 0.686060

KeyboardInterrupt: 

In [51]:
best_f1 = sorted(f1_weight, key=lambda x: x[1], reverse=True)[0][0]
best_f1

721

In [52]:
fine_f1_weight = []

for num_cols in range(best_f1-10, best_f1+10, 1):
    X = df_representation.loc[indexs][ranked_cols[:num_cols+1]]
    y = df_train.loc[X.index]["int"]

    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    clf = make_pipeline(StandardScaler(), SVC(kernel="rbf", gamma='auto', class_weight="balanced"))
    # clf.fit(X_train, y_train)
    cv_results = cross_validate(clf, X, y, cv=5, scoring="f1_weighted")
    test_score = cv_results["test_score"]
    fine_f1_weight.append([num_cols, np.mean(test_score), np.std(test_score)])
    print(fine_f1_weight[-1])

[711, 0.7119090707998, 0.027855017461954637]
[712, 0.7136188549235528, 0.025519407164058552]
[713, 0.7136188549235528, 0.025519407164058552]
[714, 0.7136188549235528, 0.025519407164058552]
[715, 0.7147670371058765, 0.02471609889494386]
[716, 0.7152799717726143, 0.02237807015865809]
[717, 0.7152205637645912, 0.02236798949000108]
[718, 0.7152205637645912, 0.02236798949000108]
[719, 0.7152205637645912, 0.02236798949000108]
[720, 0.7139066277577931, 0.023421575299425103]
[721, 0.7139066277577931, 0.023421575299425103]
[722, 0.7130921410103506, 0.02330435003286564]
[723, 0.7130204092882748, 0.02329945517365635]
[724, 0.7130204092882748, 0.02329945517365635]
[725, 0.7130204092882748, 0.02329945517365635]
[726, 0.7138116315671187, 0.022539456984139863]
[727, 0.7138116315671187, 0.022539456984139863]
[728, 0.7138116315671187, 0.022539456984139863]
[729, 0.7138116315671187, 0.022539456984139863]
[730, 0.7138116315671187, 0.022539456984139863]


In [53]:
fine_best_f1 = sorted(fine_f1_weight, key=lambda x: x[1], reverse=True)[0][0]
fine_best_f1#0.577

716

In [54]:
pickle.dump(ranked_cols[:fine_best_f1+1], open(f"cols_selected_{sen}_v1.pickle", "wb"))

In [378]:
sen = sentiments[0]
print(sen)
cols_selected_sen = pickle.load(open(f"cols_selected_{sen}_v4.pickle", "rb"))
print(len(cols_selected_sen))
cols_selected_sen

anger
718


['linguistics<&>lemma<&>fume',
 'linguistics<&>lemma<&>fucking',
 'linguistics<&>lemma<&>piss',
 'linguistics<&>lemma<&>furious',
 'linguistics<&>lemma<&>17',
 'linguistics<&>lemma<&>kik',
 'linguistics<&>lemma<&>kikme',
 'linguistics<&>lemma<&>destroy',
 'linguistics<&>lemma<&>wtf',
 'linguistics<&>lemma<&>fuck',
 'linguistics<&>lemma<&>incense',
 'linguistics<&>lemma<&>because',
 'linguistics<&>lemma<&>happiness',
 'linguistics<&>lemma<&>cheap',
 'linguistics<&>lemma<&>disgusted',
 'linguistics<&>lemma<&>natalie',
 'linguistics<&>lemma<&>bitch',
 'linguistics<&>tag<&>JJR',
 'linguistics<&>lemma<&>look',
 'linguistics<&>lemma<&>snapchat',
 'linguistics<&>lemma<&>smile',
 'linguistics<&>lemma<&>song',
 'linguistics<&>lemma<&>fun',
 'linguistics<&>lemma<&>whole',
 'linguistics<&>lemma<&>then',
 'linguistics<&>lemma<&>outrage',
 'linguistics<&>lemma<&>dragrace',
 'linguistics<&>lemma<&>allstars2',
 'linguistics<&>shape<&>XxxXxxxxd',
 'linguistics<&>lemma<&>episode',
 'linguistics<&>lemma

In [205]:
!pip install torchvision



In [206]:
!pip install transformers



In [207]:
import torch
from transformers import AutoModel, AutoTokenizer

bertweet = AutoModel.from_pretrained("vinai/bertweet-base")

# For transformers v4.x+:
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False, normalization=True)

# For transformers v3.x:
# tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")


Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
emoji is not installed, thus not converting emoticons or emojis into text. Please install emoji: pip3 install emoji
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [30]:
data_bertweet = []
col_names = [f"BERTweet_{i}" for i in range(768)]
for k, ix in enumerate(df_train.index):
    print(k+1, df_train.shape[0], 100*(k+1)/df_train.shape[0])
    tweet = df_train.loc[ix]["text"]
    input_ids = torch.tensor([tokenizer.encode(tweet)])
    with torch.no_grad():
        outputs = bertweet(input_ids)
        hidden_states = outputs[0]

    token_embeddings = np.array([ll.numpy() for ll in hidden_states[0]])
    sentence_embedding = np.mean(token_embeddings, axis=0)
    o = {"id": ix}
    o = {**o, **dict(zip(col_names, sentence_embedding))}
    data_bertweet.append(o)

1 3960 0.025252525252525252
2 3960 0.050505050505050504
3 3960 0.07575757575757576
4 3960 0.10101010101010101
5 3960 0.12626262626262627
6 3960 0.15151515151515152
7 3960 0.17676767676767677
8 3960 0.20202020202020202
9 3960 0.22727272727272727
10 3960 0.25252525252525254
11 3960 0.2777777777777778
12 3960 0.30303030303030304
13 3960 0.3282828282828283
14 3960 0.35353535353535354
15 3960 0.3787878787878788
16 3960 0.40404040404040403
17 3960 0.4292929292929293
18 3960 0.45454545454545453
19 3960 0.4797979797979798
20 3960 0.5050505050505051
21 3960 0.5303030303030303
22 3960 0.5555555555555556
23 3960 0.5808080808080808
24 3960 0.6060606060606061
25 3960 0.6313131313131313
26 3960 0.6565656565656566
27 3960 0.6818181818181818
28 3960 0.7070707070707071
29 3960 0.7323232323232324
30 3960 0.7575757575757576
31 3960 0.7828282828282829
32 3960 0.8080808080808081
33 3960 0.8333333333333334
34 3960 0.8585858585858586
35 3960 0.8838383838383839
36 3960 0.9090909090909091
37 3960 0.93434343434

307 3960 7.752525252525253
308 3960 7.777777777777778
309 3960 7.803030303030303
310 3960 7.828282828282828
311 3960 7.853535353535354
312 3960 7.878787878787879
313 3960 7.904040404040404
314 3960 7.929292929292929
315 3960 7.954545454545454
316 3960 7.97979797979798
317 3960 8.005050505050505
318 3960 8.030303030303031
319 3960 8.055555555555555
320 3960 8.080808080808081
321 3960 8.106060606060606
322 3960 8.131313131313131
323 3960 8.156565656565656
324 3960 8.181818181818182
325 3960 8.207070707070708
326 3960 8.232323232323232
327 3960 8.257575757575758
328 3960 8.282828282828282
329 3960 8.308080808080808
330 3960 8.333333333333334
331 3960 8.358585858585858
332 3960 8.383838383838384
333 3960 8.409090909090908
334 3960 8.434343434343434
335 3960 8.45959595959596
336 3960 8.484848484848484
337 3960 8.51010101010101
338 3960 8.535353535353535
339 3960 8.56060606060606
340 3960 8.585858585858587
341 3960 8.61111111111111
342 3960 8.636363636363637
343 3960 8.66161616161616
344 396

609 3960 15.378787878787879
610 3960 15.404040404040405
611 3960 15.429292929292929
612 3960 15.454545454545455
613 3960 15.47979797979798
614 3960 15.505050505050505
615 3960 15.530303030303031
616 3960 15.555555555555555
617 3960 15.580808080808081
618 3960 15.606060606060606
619 3960 15.631313131313131
620 3960 15.656565656565656
621 3960 15.681818181818182
622 3960 15.707070707070708
623 3960 15.732323232323232
624 3960 15.757575757575758
625 3960 15.782828282828282
626 3960 15.808080808080808
627 3960 15.833333333333334
628 3960 15.858585858585858
629 3960 15.883838383838384
630 3960 15.909090909090908
631 3960 15.934343434343434
632 3960 15.95959595959596
633 3960 15.984848484848484
634 3960 16.01010101010101
635 3960 16.035353535353536
636 3960 16.060606060606062
637 3960 16.085858585858585
638 3960 16.11111111111111
639 3960 16.136363636363637
640 3960 16.161616161616163
641 3960 16.18686868686869
642 3960 16.21212121212121
643 3960 16.237373737373737
644 3960 16.26262626262626

907 3960 22.904040404040405
908 3960 22.92929292929293
909 3960 22.954545454545453
910 3960 22.97979797979798
911 3960 23.005050505050505
912 3960 23.03030303030303
913 3960 23.055555555555557
914 3960 23.08080808080808
915 3960 23.106060606060606
916 3960 23.13131313131313
917 3960 23.156565656565657
918 3960 23.181818181818183
919 3960 23.207070707070706
920 3960 23.232323232323232
921 3960 23.257575757575758
922 3960 23.282828282828284
923 3960 23.30808080808081
924 3960 23.333333333333332
925 3960 23.358585858585858
926 3960 23.383838383838384
927 3960 23.40909090909091
928 3960 23.434343434343436
929 3960 23.45959595959596
930 3960 23.484848484848484
931 3960 23.51010101010101
932 3960 23.535353535353536
933 3960 23.560606060606062
934 3960 23.585858585858585
935 3960 23.61111111111111
936 3960 23.636363636363637
937 3960 23.661616161616163
938 3960 23.68686868686869
939 3960 23.71212121212121
940 3960 23.737373737373737
941 3960 23.762626262626263
942 3960 23.78787878787879
943 3

1199 3960 30.27777777777778
1200 3960 30.303030303030305
1201 3960 30.328282828282827
1202 3960 30.353535353535353
1203 3960 30.37878787878788
1204 3960 30.404040404040405
1205 3960 30.42929292929293
1206 3960 30.454545454545453
1207 3960 30.47979797979798
1208 3960 30.505050505050505
1209 3960 30.53030303030303
1210 3960 30.555555555555557
1211 3960 30.58080808080808
1212 3960 30.606060606060606
1213 3960 30.63131313131313
1214 3960 30.656565656565657
1215 3960 30.681818181818183
1216 3960 30.707070707070706
1217 3960 30.732323232323232
1218 3960 30.757575757575758
1219 3960 30.782828282828284
1220 3960 30.80808080808081
1221 3960 30.833333333333332
1222 3960 30.858585858585858
1223 3960 30.883838383838384
1224 3960 30.90909090909091
1225 3960 30.934343434343436
1226 3960 30.95959595959596
1227 3960 30.984848484848484
1228 3960 31.01010101010101
1229 3960 31.035353535353536
1230 3960 31.060606060606062
1231 3960 31.085858585858585
1232 3960 31.11111111111111
1233 3960 31.1363636363636

1492 3960 37.676767676767675
1493 3960 37.7020202020202
1494 3960 37.72727272727273
1495 3960 37.75252525252525
1496 3960 37.77777777777778
1497 3960 37.803030303030305
1498 3960 37.82828282828283
1499 3960 37.85353535353536
1500 3960 37.878787878787875
1501 3960 37.9040404040404
1502 3960 37.92929292929293
1503 3960 37.95454545454545
1504 3960 37.97979797979798
1505 3960 38.005050505050505
1506 3960 38.03030303030303
1507 3960 38.05555555555556
1508 3960 38.08080808080808
1509 3960 38.10606060606061
1510 3960 38.13131313131313
1511 3960 38.156565656565654
1512 3960 38.18181818181818
1513 3960 38.207070707070706
1514 3960 38.23232323232323
1515 3960 38.25757575757576
1516 3960 38.282828282828284
1517 3960 38.30808080808081
1518 3960 38.333333333333336
1519 3960 38.35858585858586
1520 3960 38.38383838383838
1521 3960 38.40909090909091
1522 3960 38.43434343434343
1523 3960 38.45959595959596
1524 3960 38.484848484848484
1525 3960 38.51010101010101
1526 3960 38.535353535353536
1527 3960 38

1786 3960 45.101010101010104
1787 3960 45.12626262626262
1788 3960 45.15151515151515
1789 3960 45.176767676767675
1790 3960 45.2020202020202
1791 3960 45.22727272727273
1792 3960 45.25252525252525
1793 3960 45.27777777777778
1794 3960 45.303030303030305
1795 3960 45.32828282828283
1796 3960 45.35353535353536
1797 3960 45.378787878787875
1798 3960 45.4040404040404
1799 3960 45.42929292929293
1800 3960 45.45454545454545
1801 3960 45.47979797979798
1802 3960 45.505050505050505
1803 3960 45.53030303030303
1804 3960 45.55555555555556
1805 3960 45.58080808080808
1806 3960 45.60606060606061
1807 3960 45.63131313131313
1808 3960 45.656565656565654
1809 3960 45.68181818181818
1810 3960 45.707070707070706
1811 3960 45.73232323232323
1812 3960 45.75757575757576
1813 3960 45.782828282828284
1814 3960 45.80808080808081
1815 3960 45.833333333333336
1816 3960 45.85858585858586
1817 3960 45.88383838383838
1818 3960 45.90909090909091
1819 3960 45.93434343434343
1820 3960 45.95959595959596
1821 3960 45.

2078 3960 52.474747474747474
2079 3960 52.5
2080 3960 52.525252525252526
2081 3960 52.55050505050505
2082 3960 52.57575757575758
2083 3960 52.601010101010104
2084 3960 52.62626262626262
2085 3960 52.65151515151515
2086 3960 52.676767676767675
2087 3960 52.7020202020202
2088 3960 52.72727272727273
2089 3960 52.75252525252525
2090 3960 52.77777777777778
2091 3960 52.803030303030305
2092 3960 52.82828282828283
2093 3960 52.85353535353536
2094 3960 52.878787878787875
2095 3960 52.9040404040404
2096 3960 52.92929292929293
2097 3960 52.95454545454545
2098 3960 52.97979797979798
2099 3960 53.005050505050505
2100 3960 53.03030303030303
2101 3960 53.05555555555556
2102 3960 53.08080808080808
2103 3960 53.10606060606061
2104 3960 53.13131313131313
2105 3960 53.156565656565654
2106 3960 53.18181818181818
2107 3960 53.207070707070706
2108 3960 53.23232323232323
2109 3960 53.25757575757576
2110 3960 53.282828282828284
2111 3960 53.30808080808081
2112 3960 53.333333333333336
2113 3960 53.35858585858

2373 3960 59.92424242424242
2374 3960 59.94949494949495
2375 3960 59.974747474747474
2376 3960 60.0
2377 3960 60.025252525252526
2378 3960 60.05050505050505
2379 3960 60.07575757575758
2380 3960 60.101010101010104
2381 3960 60.12626262626262
2382 3960 60.15151515151515
2383 3960 60.176767676767675
2384 3960 60.2020202020202
2385 3960 60.22727272727273
2386 3960 60.25252525252525
2387 3960 60.27777777777778
2388 3960 60.303030303030305
2389 3960 60.32828282828283
2390 3960 60.35353535353536
2391 3960 60.378787878787875
2392 3960 60.4040404040404
2393 3960 60.42929292929293
2394 3960 60.45454545454545
2395 3960 60.47979797979798
2396 3960 60.505050505050505
2397 3960 60.53030303030303
2398 3960 60.55555555555556
2399 3960 60.58080808080808
2400 3960 60.60606060606061
2401 3960 60.63131313131313
2402 3960 60.656565656565654
2403 3960 60.68181818181818
2404 3960 60.707070707070706
2405 3960 60.73232323232323
2406 3960 60.75757575757576
2407 3960 60.782828282828284
2408 3960 60.808080808080

2670 3960 67.42424242424242
2671 3960 67.44949494949495
2672 3960 67.47474747474747
2673 3960 67.5
2674 3960 67.52525252525253
2675 3960 67.55050505050505
2676 3960 67.57575757575758
2677 3960 67.6010101010101
2678 3960 67.62626262626263
2679 3960 67.65151515151516
2680 3960 67.67676767676768
2681 3960 67.70202020202021
2682 3960 67.72727272727273
2683 3960 67.75252525252525
2684 3960 67.77777777777777
2685 3960 67.8030303030303
2686 3960 67.82828282828282
2687 3960 67.85353535353535
2688 3960 67.87878787878788
2689 3960 67.9040404040404
2690 3960 67.92929292929293
2691 3960 67.95454545454545
2692 3960 67.97979797979798
2693 3960 68.0050505050505
2694 3960 68.03030303030303
2695 3960 68.05555555555556
2696 3960 68.08080808080808
2697 3960 68.10606060606061
2698 3960 68.13131313131314
2699 3960 68.15656565656566
2700 3960 68.18181818181819
2701 3960 68.20707070707071
2702 3960 68.23232323232324
2703 3960 68.25757575757575
2704 3960 68.28282828282828
2705 3960 68.3080808080808
2706 3960 

2967 3960 74.92424242424242
2968 3960 74.94949494949495
2969 3960 74.97474747474747
2970 3960 75.0
2971 3960 75.02525252525253
2972 3960 75.05050505050505
2973 3960 75.07575757575758
2974 3960 75.1010101010101
2975 3960 75.12626262626263
2976 3960 75.15151515151516
2977 3960 75.17676767676768
2978 3960 75.20202020202021
2979 3960 75.22727272727273
2980 3960 75.25252525252525
2981 3960 75.27777777777777
2982 3960 75.3030303030303
2983 3960 75.32828282828282
2984 3960 75.35353535353535
2985 3960 75.37878787878788
2986 3960 75.4040404040404
2987 3960 75.42929292929293
2988 3960 75.45454545454545
2989 3960 75.47979797979798
2990 3960 75.5050505050505
2991 3960 75.53030303030303
2992 3960 75.55555555555556
2993 3960 75.58080808080808
2994 3960 75.60606060606061
2995 3960 75.63131313131314
2996 3960 75.65656565656566
2997 3960 75.68181818181819
2998 3960 75.70707070707071
2999 3960 75.73232323232324
3000 3960 75.75757575757575
3001 3960 75.78282828282828
3002 3960 75.8080808080808
3003 3960 

3264 3960 82.42424242424242
3265 3960 82.44949494949495
3266 3960 82.47474747474747
3267 3960 82.5
3268 3960 82.52525252525253
3269 3960 82.55050505050505
3270 3960 82.57575757575758
3271 3960 82.6010101010101
3272 3960 82.62626262626263
3273 3960 82.65151515151516
3274 3960 82.67676767676768
3275 3960 82.70202020202021
3276 3960 82.72727272727273
3277 3960 82.75252525252525
3278 3960 82.77777777777777
3279 3960 82.8030303030303
3280 3960 82.82828282828282
3281 3960 82.85353535353535
3282 3960 82.87878787878788
3283 3960 82.9040404040404
3284 3960 82.92929292929293
3285 3960 82.95454545454545
3286 3960 82.97979797979798
3287 3960 83.0050505050505
3288 3960 83.03030303030303
3289 3960 83.05555555555556
3290 3960 83.08080808080808
3291 3960 83.10606060606061
3292 3960 83.13131313131314
3293 3960 83.15656565656566
3294 3960 83.18181818181819
3295 3960 83.20707070707071
3296 3960 83.23232323232324
3297 3960 83.25757575757575
3298 3960 83.28282828282828
3299 3960 83.3080808080808
3300 3960 

3560 3960 89.8989898989899
3561 3960 89.92424242424242
3562 3960 89.94949494949495
3563 3960 89.97474747474747
3564 3960 90.0
3565 3960 90.02525252525253
3566 3960 90.05050505050505
3567 3960 90.07575757575758
3568 3960 90.1010101010101
3569 3960 90.12626262626263
3570 3960 90.15151515151516
3571 3960 90.17676767676768
3572 3960 90.20202020202021
3573 3960 90.22727272727273
3574 3960 90.25252525252525
3575 3960 90.27777777777777
3576 3960 90.3030303030303
3577 3960 90.32828282828282
3578 3960 90.35353535353535
3579 3960 90.37878787878788
3580 3960 90.4040404040404
3581 3960 90.42929292929293
3582 3960 90.45454545454545
3583 3960 90.47979797979798
3584 3960 90.5050505050505
3585 3960 90.53030303030303
3586 3960 90.55555555555556
3587 3960 90.58080808080808
3588 3960 90.60606060606061
3589 3960 90.63131313131314
3590 3960 90.65656565656566
3591 3960 90.68181818181819
3592 3960 90.70707070707071
3593 3960 90.73232323232324
3594 3960 90.75757575757575
3595 3960 90.78282828282828
3596 3960 

3856 3960 97.37373737373737
3857 3960 97.3989898989899
3858 3960 97.42424242424242
3859 3960 97.44949494949495
3860 3960 97.47474747474747
3861 3960 97.5
3862 3960 97.52525252525253
3863 3960 97.55050505050505
3864 3960 97.57575757575758
3865 3960 97.6010101010101
3866 3960 97.62626262626263
3867 3960 97.65151515151516
3868 3960 97.67676767676768
3869 3960 97.70202020202021
3870 3960 97.72727272727273
3871 3960 97.75252525252525
3872 3960 97.77777777777777
3873 3960 97.8030303030303
3874 3960 97.82828282828282
3875 3960 97.85353535353535
3876 3960 97.87878787878788
3877 3960 97.9040404040404
3878 3960 97.92929292929293
3879 3960 97.95454545454545
3880 3960 97.97979797979798
3881 3960 98.0050505050505
3882 3960 98.03030303030303
3883 3960 98.05555555555556
3884 3960 98.08080808080808
3885 3960 98.10606060606061
3886 3960 98.13131313131314
3887 3960 98.15656565656566
3888 3960 98.18181818181819
3889 3960 98.20707070707071
3890 3960 98.23232323232324
3891 3960 98.25757575757575
3892 3960 

In [31]:
# pickle.dump(pd.DataFrame(data_bertweet).set_index("id"), open("df_representation_v2.pickle", "wb"))

In [57]:
df_representation_v2 = pickle.load(open("df_representation_v2.pickle", "rb"))

In [413]:
df_representation_v2

Unnamed: 0_level_0,BERTweet_0,BERTweet_1,BERTweet_2,BERTweet_3,BERTweet_4,BERTweet_5,BERTweet_6,BERTweet_7,BERTweet_8,BERTweet_9,...,BERTweet_758,BERTweet_759,BERTweet_760,BERTweet_761,BERTweet_762,BERTweet_763,BERTweet_764,BERTweet_765,BERTweet_766,BERTweet_767
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10000,0.011935,-0.122075,0.029030,0.120095,0.093651,0.022820,0.006946,0.168774,0.282526,-0.009636,...,0.091612,0.013699,0.048113,0.180316,0.025295,-0.094723,0.094302,-0.039285,0.234481,-0.249347
10001,0.019873,-0.154840,0.128666,-0.013688,0.124787,-0.060633,0.029985,0.192368,0.358541,-0.000735,...,0.061139,0.057788,0.016066,0.114631,-0.075937,0.161129,0.088782,0.099523,0.195573,-0.094360
10002,0.089303,0.005199,0.056339,0.029113,0.114146,0.105520,0.041722,0.180096,0.075384,0.047837,...,0.058741,-0.221301,0.011673,0.277279,0.015945,0.010225,0.090561,0.177221,0.102671,-0.183728
10003,-0.174818,-0.169233,0.108006,0.089053,0.113685,-0.024810,0.045306,0.109823,0.190615,-0.004136,...,0.041260,0.074323,0.122148,0.129413,0.010133,-0.035607,0.095457,0.020883,0.222822,-0.049975
10004,0.016567,-0.013723,0.086389,-0.020837,0.122210,0.105154,0.083129,0.222264,0.327119,0.121932,...,0.152741,-0.128246,0.041543,0.219807,0.010124,0.019276,0.062688,0.044342,0.058079,-0.121317
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40855,0.025625,-0.161436,0.085601,0.067647,0.092613,0.041396,0.085973,0.192643,0.252361,-0.007928,...,0.094263,0.087653,-0.054003,-0.014100,-0.015075,0.043808,0.258219,-0.013177,0.014293,0.006944
40856,-0.107447,0.002433,0.046188,0.105640,0.028643,0.141425,-0.002773,0.256521,0.372382,0.066727,...,0.090047,-0.136675,-0.018746,-0.072087,0.148523,-0.059273,0.200686,0.004628,0.182843,-0.168076
40857,-0.043438,0.011180,0.206375,0.071783,0.163780,-0.062001,-0.111886,0.200515,0.215893,0.117610,...,0.214575,-0.152968,-0.036713,0.054679,-0.097778,-0.037324,0.141307,0.059210,0.080930,-0.205299
40858,-0.153162,-0.090465,0.118767,0.049426,0.058023,0.186721,-0.056635,0.212744,0.232755,0.131198,...,-0.018443,-0.159911,0.110586,0.067621,-0.096703,0.073693,0.085268,-0.123595,0.264524,0.105249


In [85]:
sen = sentiments[3]

# indexs = df_train[df_train["sen"] == sen].index
# indexsLH = df_train.loc[indexs][(df_train.loc[indexs]["int"].isin(["low", "high"]))].index

# X = df_representation_v2.loc[indexsLH]#[rel_cols]
# y = df_train.loc[X.index]["int"]

# dic_label_count = y.value_counts().to_dict()
# min_label = min(dic_label_count.items(), key=lambda x: x[1])[0]
# max_label = max(dic_label_count.items(), key=lambda x: x[1])[0]
# index_label_1 = y[y==min_label].index
# oversampling_steps = int(dic_label_count[max_label] / dic_label_count[min_label]) - 1

# X_res, y_res = X.copy(), y.copy()
# for step in range(oversampling_steps):
#     new_indexs = [f"{ix}+{step + 1}" for ix in index_label_1]
#     copied_sub_X = pd.DataFrame(X.loc[index_label_1].values, columns=X.columns, index=new_indexs)
#     copied_sub_y = pd.Series(y.loc[index_label_1].values, index=new_indexs)
#     X_res = pd.concat([X_res, copied_sub_X], axis=0)
#     y_res = pd.concat([y_res, copied_sub_y], axis=0)

# X_res = pd.DataFrame(StandardScaler().fit_transform(X_res), columns=X_res.columns, index=X_res.index)
# selector = SelectKBest(chi2, k=X.shape[1])
# X_res_ = X_res - X_res.min()
# selector.fit(X_res_, y_res)
# scores_selector = {col: selector.scores_[i] if str(selector.scores_[i]) != "nan" else 0 for i, col in enumerate(X.columns.tolist())}
# ranked_cols = [x[0] for x in sorted(scores_selector.items(), key=lambda x: x[1], reverse=True)]
# print(ranked_cols)

indexs = df_train[df_train["sen"] == sen].index
scores_selector = {col: [] for col in df_representation_v2.columns.tolist()}
comp_l = [(l1, l2) for l1 in intensities for l2 in intensities if l1 < l2]
for l1 in intensities:
    for l2 in intensities:
        if l1 < l2:
            indexsLH = df_train.loc[indexs][(df_train.loc[indexs]["int"].isin([l1, l2]))].index

            X = df_representation_v2.loc[indexsLH]#[rel_cols]
            y = df_train.loc[X.index]["int"]

            dic_label_count = y.value_counts().to_dict()
            min_label = min(dic_label_count.items(), key=lambda x: x[1])[0]
            max_label = max(dic_label_count.items(), key=lambda x: x[1])[0]
            index_label_1 = y[y==min_label].index
            oversampling_steps = int(dic_label_count[max_label] / dic_label_count[min_label]) - 1
            X_res, y_res = X.copy(), y.copy()

            for step in range(oversampling_steps):
                new_indexs = [f"{ix}+{step + 1}" for ix in index_label_1]
                copied_sub_X = pd.DataFrame(X.loc[index_label_1].values, columns=X.columns, index=new_indexs)
                copied_sub_y = pd.Series(y.loc[index_label_1].values, index=new_indexs)
                X_res = pd.concat([X_res, copied_sub_X], axis=0)
                y_res = pd.concat([y_res, copied_sub_y], axis=0)

            X_res = pd.DataFrame(StandardScaler().fit_transform(X_res), columns=X_res.columns, index=X_res.index)
            selector = SelectKBest(chi2, k=X.shape[1])
            X_res_ = X_res - X_res.min()
            selector.fit(X_res_, y_res)
            for i, col in enumerate(X.columns.tolist()):
                scores_selector[col] += [selector.scores_[i]] if str(selector.scores_[i]) != "nan" else [0]

ranked_cols = [x[0] for x in sorted(scores_selector.items(), key=lambda y: max(y[1]), reverse=True)]

f1_weight = []

for num_cols in range(1, len(ranked_cols)+1, 10):
    X = df_representation_v2.loc[indexs][ranked_cols[:num_cols+1]]
    y = df_train.loc[X.index]["int"]

    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    clf = make_pipeline(StandardScaler(), SVC(kernel="rbf", gamma='auto', class_weight="balanced"))
    # clf.fit(X_train, y_train)
    cv_results = cross_validate(clf, X, y, cv=5, scoring="f1_weighted")
    test_score = cv_results["test_score"]
    f1_weight.append([num_cols, np.mean(test_score), np.std(test_score)])
    print(f1_weight[-1])

[1, 0.4266742580862261, 0.029000652362772134]
[11, 0.5117476531712855, 0.019080276048811858]
[21, 0.5691244234970079, 0.04990272605701973]
[31, 0.595363860354104, 0.06558170336976499]
[41, 0.6146930984747271, 0.0577397523311529]
[51, 0.6061797724110066, 0.051271561695346364]
[61, 0.6217853427952491, 0.05838768871150769]
[71, 0.6232044194188641, 0.05301549697307111]
[81, 0.6214658103794599, 0.05771478690843295]
[91, 0.624423847601709, 0.044536946833991856]
[101, 0.6262091752425574, 0.028067527409609536]
[111, 0.6185092326001929, 0.036389237557653296]
[121, 0.6235964982309857, 0.03725058865965338]
[131, 0.6284264570252145, 0.03253518489236862]
[141, 0.6195900873245852, 0.0296541695143281]
[151, 0.617291930346149, 0.03715468479851581]
[161, 0.6260251365223966, 0.03725010209419756]
[171, 0.6226468402527421, 0.045070654836315664]
[181, 0.608469030245583, 0.053909784873922874]
[191, 0.6115024692531071, 0.053040255651115885]
[201, 0.6024820168684965, 0.053295301481507115]
[211, 0.610737709918

In [86]:
best_f1 = sorted(f1_weight, key=lambda x: x[1], reverse=True)[0][0]
best_f1

231

In [87]:
fine_f1_weight = []

for num_cols in range(best_f1-10, best_f1+10, 1):
    X = df_representation_v2.loc[indexs][ranked_cols[:num_cols+1]]
    y = df_train.loc[X.index]["int"]

    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    clf = make_pipeline(StandardScaler(), SVC(kernel="rbf", gamma='auto', class_weight="balanced"))
    # clf.fit(X_train, y_train)
    cv_results = cross_validate(clf, X, y, cv=5, scoring="f1_weighted")
    test_score = cv_results["test_score"]
    fine_f1_weight.append([num_cols, np.mean(test_score), np.std(test_score)])
    print(fine_f1_weight[-1])

[221, 0.6161255963718444, 0.03959949641766339]
[222, 0.6186198803002875, 0.036407568095489805]
[223, 0.6138463310886813, 0.043263347293339326]
[224, 0.6143611228959699, 0.03579301381324165]
[225, 0.6167498721827913, 0.03468169668907546]
[226, 0.622951465955635, 0.026664902471686684]
[227, 0.6229226948672073, 0.031095527637208215]
[228, 0.6239504164751969, 0.03321562044884642]
[229, 0.6255463082581119, 0.03356620439571124]
[230, 0.6327171768763874, 0.03932064083309087]
[231, 0.6339039135605046, 0.04073463436585979]
[232, 0.6327304831336775, 0.039429697311889514]
[233, 0.6327725983950642, 0.03841928757799965]
[234, 0.6269059225585256, 0.03663145007951668]
[235, 0.6259100504572533, 0.04273009908116228]
[236, 0.6268939376593157, 0.04169930241562819]
[237, 0.6262862090433599, 0.043669043514568004]
[238, 0.6265687269378318, 0.04438889611066611]
[239, 0.6304088107130051, 0.03911324370210424]
[240, 0.6292573854649839, 0.03802969201016805]


In [88]:
fine_best_f1 = sorted(fine_f1_weight, key=lambda x: x[1], reverse=True)[0][0]
fine_best_f1

231

In [84]:
pickle.dump(ranked_cols[:fine_best_f1+1], open(f"cols_selected_{sen}_v2.pickle", "wb"))

In [416]:
df_representation_v3_1 = df_representation_v1_1
df_representation_v3_1 = pd.concat([df_representation_v3_1, df_representation_v2], axis=1)
df_representation_v3_1

Unnamed: 0_level_0,retro<&>num_tokens,retro<&>lenght,retro<&>num_numbs,retro<&>num_alpha,retro<&>num_with_uppercase,retro<&>num_tokens_upper,retro<&>prop_vowels,retro<&>len_max_rep_char,retro<&>max_char_fre_per_token(o),retro<&>max_char_fre_per_token(s),...,BERTweet_758,BERTweet_759,BERTweet_760,BERTweet_761,BERTweet_762,BERTweet_763,BERTweet_764,BERTweet_765,BERTweet_766,BERTweet_767
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10000,18,96,0,18,3,1,0.253165,2,2,1,...,0.091612,0.013699,0.048113,0.180316,0.025295,-0.094723,0.094302,-0.039285,0.234481,-0.249347
10001,23,119,0,25,7,2,0.329897,2,2,2,...,0.061139,0.057788,0.016066,0.114631,-0.075937,0.161129,0.088782,0.099523,0.195573,-0.094360
10002,19,108,0,19,2,2,0.333333,2,3,2,...,0.058741,-0.221301,0.011673,0.277279,0.015945,0.010225,0.090561,0.177221,0.102671,-0.183728
10003,24,134,0,23,1,1,0.315315,2,1,2,...,0.041260,0.074323,0.122148,0.129413,0.010133,-0.035607,0.095457,0.020883,0.222822,-0.049975
10004,24,125,0,25,3,0,0.352941,2,1,0,...,0.152741,-0.128246,0.041543,0.219807,0.010124,0.019276,0.062688,0.044342,0.058079,-0.121317
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40855,24,112,0,23,2,1,0.325843,2,2,2,...,0.094263,0.087653,-0.054003,-0.014100,-0.015075,0.043808,0.258219,-0.013177,0.014293,0.006944
40856,20,121,0,21,1,0,0.352941,2,1,3,...,0.090047,-0.136675,-0.018746,-0.072087,0.148523,-0.059273,0.200686,0.004628,0.182843,-0.168076
40857,23,139,0,23,2,1,0.316239,2,2,4,...,0.214575,-0.152968,-0.036713,0.054679,-0.097778,-0.037324,0.141307,0.059210,0.080930,-0.205299
40858,14,83,2,15,5,1,0.285714,2,1,1,...,-0.018443,-0.159911,0.110586,0.067621,-0.096703,0.073693,0.085268,-0.123595,0.264524,0.105249


In [437]:
# pickle.dump(df_representation_v3_1, open("df_representation_v3_1.pickle", "wb"))

In [109]:
sen = sentiments[0]

# indexs = df_train[df_train["sen"] == sen].index
# indexsLH = df_train.loc[indexs][(df_train.loc[indexs]["int"].isin(["low", "high"]))].index

# X = df_representation_v3.loc[indexsLH]#[rel_cols]
# y = df_train.loc[X.index]["int"]

# dic_label_count = y.value_counts().to_dict()
# min_label = min(dic_label_count.items(), key=lambda x: x[1])[0]
# max_label = max(dic_label_count.items(), key=lambda x: x[1])[0]
# index_label_1 = y[y==min_label].index
# oversampling_steps = int(dic_label_count[max_label] / dic_label_count[min_label]) - 1

# X_res, y_res = X.copy(), y.copy()
# for step in range(oversampling_steps):
#     new_indexs = [f"{ix}+{step + 1}" for ix in index_label_1]
#     copied_sub_X = pd.DataFrame(X.loc[index_label_1].values, columns=X.columns, index=new_indexs)
#     copied_sub_y = pd.Series(y.loc[index_label_1].values, index=new_indexs)
#     X_res = pd.concat([X_res, copied_sub_X], axis=0)
#     y_res = pd.concat([y_res, copied_sub_y], axis=0)

# X_res = pd.DataFrame(StandardScaler().fit_transform(X_res), columns=X_res.columns, index=X_res.index)
# selector = SelectKBest(chi2, k=X.shape[1])
# X_res_ = X_res - X_res.min()
# selector.fit(X_res_, y_res)
# scores_selector = {col: selector.scores_[i] if str(selector.scores_[i]) != "nan" else 0 for i, col in enumerate(X.columns.tolist())}
# ranked_cols = [x[0] for x in sorted(scores_selector.items(), key=lambda x: x[1], reverse=True)]
# print(ranked_cols)

indexs = df_train[df_train["sen"] == sen].index
scores_selector = {col: [] for col in df_representation_v3.columns.tolist()}
comp_l = [(l1, l2) for l1 in intensities for l2 in intensities if l1 < l2]
for l1 in intensities:
    for l2 in intensities:
        if l1 < l2:
            indexsLH = df_train.loc[indexs][(df_train.loc[indexs]["int"].isin([l1, l2]))].index

            X = df_representation_v3.loc[indexsLH]#[rel_cols]
            y = df_train.loc[X.index]["int"]

            dic_label_count = y.value_counts().to_dict()
            min_label = min(dic_label_count.items(), key=lambda x: x[1])[0]
            max_label = max(dic_label_count.items(), key=lambda x: x[1])[0]
            index_label_1 = y[y==min_label].index
            oversampling_steps = int(dic_label_count[max_label] / dic_label_count[min_label]) - 1
            X_res, y_res = X.copy(), y.copy()

            for step in range(oversampling_steps):
                new_indexs = [f"{ix}+{step + 1}" for ix in index_label_1]
                copied_sub_X = pd.DataFrame(X.loc[index_label_1].values, columns=X.columns, index=new_indexs)
                copied_sub_y = pd.Series(y.loc[index_label_1].values, index=new_indexs)
                X_res = pd.concat([X_res, copied_sub_X], axis=0)
                y_res = pd.concat([y_res, copied_sub_y], axis=0)

            X_res = pd.DataFrame(StandardScaler().fit_transform(X_res), columns=X_res.columns, index=X_res.index)
            selector = SelectKBest(chi2, k=X.shape[1])
            X_res_ = X_res - X_res.min()
            selector.fit(X_res_, y_res)
            for i, col in enumerate(X.columns.tolist()):
                scores_selector[col] += [selector.scores_[i]] if str(selector.scores_[i]) != "nan" else [0]

ranked_cols = [x[0] for x in sorted(scores_selector.items(), key=lambda y: max(y[1]), reverse=True)]

f1_weight = []

for num_cols in range(1, len(ranked_cols)+1, 10):
    X = df_representation_v3.loc[indexs][ranked_cols[:num_cols+1]]
    y = df_train.loc[X.index]["int"]

    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    clf = make_pipeline(StandardScaler(), SVC(kernel="rbf", gamma='auto', class_weight="balanced"))
    # clf.fit(X_train, y_train)
    cv_results = cross_validate(clf, X, y, cv=5, scoring="f1_weighted")
    test_score = cv_results["test_score"]
    f1_weight.append([num_cols, np.mean(test_score), np.std(test_score)])
    print(f1_weight[-1])

[1, 0.10424886540056362, 0.019309878014991615]
[11, 0.6411688350633156, 0.029228674134960844]
[21, 0.6609119066959994, 0.02203836650268399]
[31, 0.6588559737273805, 0.012008818616047677]
[41, 0.66215257900806, 0.004897267596503262]
[51, 0.6819780207011394, 0.018291375701466977]
[61, 0.6891776153090093, 0.018405652049383794]
[71, 0.6948351734809612, 0.026925106785193546]
[81, 0.698515744491627, 0.030620007364956892]
[91, 0.6995012251275626, 0.029302400431879483]
[101, 0.7001325185391131, 0.029258873024053687]
[111, 0.7115093975305629, 0.02715503116164831]
[121, 0.710112986921863, 0.02801785329972967]
[131, 0.7000463757225261, 0.03418087393899213]
[141, 0.7062307214260791, 0.03812506665278833]
[151, 0.7079706264171842, 0.03664070574426643]
[161, 0.7036688956016317, 0.038892980889282364]
[171, 0.7010408157744935, 0.04568406926917083]
[181, 0.6933352645072454, 0.032473129834692624]
[191, 0.6942896187745584, 0.04011906590259415]
[201, 0.6928814435108482, 0.043088935779638876]
[211, 0.686060

KeyboardInterrupt: 

In [110]:
best_f1 = sorted(f1_weight, key=lambda x: x[1], reverse=True)[0][0]
best_f1

721

In [111]:
fine_f1_weight = []

for num_cols in range(best_f1-10, best_f1+10, 1):
    X = df_representation_v3.loc[indexs][ranked_cols[:num_cols+1]]
    y = df_train.loc[X.index]["int"]

    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    clf = make_pipeline(StandardScaler(), SVC(kernel="rbf", gamma='auto', class_weight="balanced"))
    # clf.fit(X_train, y_train)
    cv_results = cross_validate(clf, X, y, cv=5, scoring="f1_weighted")
    test_score = cv_results["test_score"]
    fine_f1_weight.append([num_cols, np.mean(test_score), np.std(test_score)])
    print(fine_f1_weight[-1])

[711, 0.7119090707998, 0.027855017461954637]
[712, 0.7136188549235528, 0.025519407164058552]
[713, 0.7136188549235528, 0.025519407164058552]
[714, 0.7136188549235528, 0.025519407164058552]
[715, 0.7147670371058765, 0.02471609889494386]
[716, 0.7152799717726143, 0.02237807015865809]
[717, 0.7152205637645912, 0.02236798949000108]
[718, 0.7152205637645912, 0.02236798949000108]
[719, 0.7152205637645912, 0.02236798949000108]
[720, 0.7139066277577931, 0.023421575299425103]
[721, 0.7139066277577931, 0.023421575299425103]
[722, 0.7130921410103506, 0.02330435003286564]
[723, 0.7130204092882748, 0.02329945517365635]
[724, 0.7130204092882748, 0.02329945517365635]
[725, 0.7130204092882748, 0.02329945517365635]
[726, 0.7138116315671187, 0.022539456984139863]
[727, 0.7138116315671187, 0.022539456984139863]
[728, 0.7138116315671187, 0.022539456984139863]
[729, 0.7138116315671187, 0.022539456984139863]
[730, 0.7138116315671187, 0.022539456984139863]


In [112]:
fine_best_f1 = sorted(fine_f1_weight, key=lambda x: x[1], reverse=True)[0][0]
fine_best_f1

716

In [113]:
pickle.dump(ranked_cols[:fine_best_f1+1], open(f"cols_selected_{sen}_v3.pickle", "wb"))

In [218]:
import nltk
# nltk.download('punkt')
# nltk.download('wordnet')
# nltk.download('opinion_lexicon')
# nltk.download("sentiwordnet")
# nltk.download('omw-1.4')

from nltk.corpus import opinion_lexicon
from nltk.corpus import sentiwordnet as swn
from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

In [219]:
def get_sentiwordnet_sent(tweet):
    def penn_to_wn(tag):
        if tag.startswith('J'):
            return wn.ADJ
        elif tag.startswith('N'):
            return wn.NOUN
        elif tag.startswith('R'):
            return wn.ADV
        elif tag.startswith('V'):
            return wn.VERB
        return None

    def get_sentiment(word,tag):
        wn_tag = penn_to_wn(tag)
        if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
            return [0,0,0]

        lemma = lemmatizer.lemmatize(word, pos=wn_tag)
        if not lemma:
            return [0,0,0]

        synsets = wn.synsets(word, pos=wn_tag)
        if not synsets:
            return [0,0,0]

        synset = synsets[0]
        swn_synset = swn.senti_synset(synset.name())

        return [swn_synset.pos_score(),swn_synset.neg_score(),swn_synset.obj_score()]

    words_data = tweet.lower().strip().split()

    pos_val = nltk.pos_tag(words_data)
    senti_val = [get_sentiment(x,y) for (x,y) in pos_val]
    return dict(zip("+ - o".split(), np.sum(senti_val, axis=0)))

In [220]:
print(f"Cantidad de palabras positivas: {len(opinion_lexicon.positive())}")
print(f"Cantidad de palabras negativas: {len(opinion_lexicon.negative())}")

Cantidad de palabras positivas: 2006
Cantidad de palabras negativas: 4783


In [221]:
def get_lexicon_attrib(tweet):
    o = {}
    o["lexicon<&>LiuHu<&>+"] = sum(int(t.lower() in opinion_lexicon.positive()) for t in tweet.split())
    o["lexicon<&>LiuHu<&>-"] = sum(int(t.lower() in opinion_lexicon.negative()) for t in tweet.split())
    o_sentiwordnet = get_sentiwordnet_sent(tweet)
    o["lexicon<&>sentiwordnet<&>+"] = o_sentiwordnet["+"]
    o["lexicon<&>sentiwordnet<&>-"] = o_sentiwordnet["-"]
    o["lexicon<&>sentiwordnet<&>o"] = o_sentiwordnet["o"]
    return o

In [222]:
get_lexicon_attrib(tweet)

{'lexicon<&>LiuHu<&>+': 0,
 'lexicon<&>LiuHu<&>-': 0,
 'lexicon<&>sentiwordnet<&>+': 0.75,
 'lexicon<&>sentiwordnet<&>-': 0.25,
 'lexicon<&>sentiwordnet<&>o': 6.0}

In [41]:
data_lexicon_attributes = []
times = []
for k, ix in enumerate(df_train.index):
    start_time = time.time()
    tweet = df_train.loc[ix]["text"]
    o = {"id": ix}
    o = {**o, **get_lexicon_attrib(tweet)}
    data_lexicon_attributes.append(o)
    dt = time.time()-start_time
    times.append(dt)
    print(f"""
    row: {k+1}, total_rows: {df_train.shape[0]}
    progress: {np.round(100*(k+1)/df_train.shape[0], 3)}%
    wait time: {datetime.timedelta(seconds = np.median(times)*(df_train.shape[0]-k-1))}s
    """)


    row: 1, total_rows: 3960
    progress: 0.025%
    wait time: 0:14:39.696078s
    

    row: 2, total_rows: 3960
    progress: 0.051%
    wait time: 0:16:38.320869s
    

    row: 3, total_rows: 3960
    progress: 0.076%
    wait time: 0:15:34.698491s
    

    row: 4, total_rows: 3960
    progress: 0.101%
    wait time: 0:17:05.532814s
    

    row: 5, total_rows: 3960
    progress: 0.126%
    wait time: 0:18:36.321094s
    

    row: 6, total_rows: 3960
    progress: 0.152%
    wait time: 0:17:05.014344s
    

    row: 7, total_rows: 3960
    progress: 0.177%
    wait time: 0:18:35.756583s
    

    row: 8, total_rows: 3960
    progress: 0.202%
    wait time: 0:17:55.920444s
    

    row: 9, total_rows: 3960
    progress: 0.227%
    wait time: 0:17:16.104323s
    

    row: 10, total_rows: 3960
    progress: 0.253%
    wait time: 0:16:24.443539s
    

    row: 11, total_rows: 3960
    progress: 0.278%
    wait time: 0:15:32.808780s
    

    row: 12, total_rows: 3960
    progre


    row: 96, total_rows: 3960
    progress: 2.424%
    wait time: 0:13:47.645845s
    

    row: 97, total_rows: 3960
    progress: 2.449%
    wait time: 0:13:51.298055s
    

    row: 98, total_rows: 3960
    progress: 2.475%
    wait time: 0:14:04.612692s
    

    row: 99, total_rows: 3960
    progress: 2.5%
    wait time: 0:14:17.920323s
    

    row: 100, total_rows: 3960
    progress: 2.525%
    wait time: 0:14:04.175296s
    

    row: 101, total_rows: 3960
    progress: 2.551%
    wait time: 0:13:50.437276s
    

    row: 102, total_rows: 3960
    progress: 2.576%
    wait time: 0:13:46.360681s
    

    row: 103, total_rows: 3960
    progress: 2.601%
    wait time: 0:13:50.006886s
    

    row: 104, total_rows: 3960
    progress: 2.626%
    wait time: 0:13:45.932293s
    

    row: 105, total_rows: 3960
    progress: 2.652%
    wait time: 0:13:41.859702s
    

    row: 106, total_rows: 3960
    progress: 2.677%
    wait time: 0:13:45.503905s
    

    row: 107, total_rows: 


    row: 189, total_rows: 3960
    progress: 4.773%
    wait time: 0:12:46.206447s
    

    row: 190, total_rows: 3960
    progress: 4.798%
    wait time: 0:12:44.118849s
    

    row: 191, total_rows: 3960
    progress: 4.823%
    wait time: 0:12:45.800079s
    

    row: 192, total_rows: 3960
    progress: 4.848%
    wait time: 0:12:49.367766s
    

    row: 193, total_rows: 3960
    progress: 4.874%
    wait time: 0:12:45.393711s
    

    row: 194, total_rows: 3960
    progress: 4.899%
    wait time: 0:12:43.308113s
    

    row: 195, total_rows: 3960
    progress: 4.924%
    wait time: 0:12:41.223514s
    

    row: 196, total_rows: 3960
    progress: 4.949%
    wait time: 0:12:42.902745s
    

    row: 197, total_rows: 3960
    progress: 4.975%
    wait time: 0:12:44.580976s
    

    row: 198, total_rows: 3960
    progress: 5.0%
    wait time: 0:12:48.142658s
    

    row: 199, total_rows: 3960
    progress: 5.025%
    wait time: 0:12:51.702340s
    

    row: 200, total_ro


    row: 283, total_rows: 3960
    progress: 7.146%
    wait time: 0:12:49.191232s
    

    row: 284, total_rows: 3960
    progress: 7.172%
    wait time: 0:12:50.822100s
    

    row: 285, total_rows: 3960
    progress: 7.197%
    wait time: 0:12:48.772852s
    

    row: 286, total_rows: 3960
    progress: 7.222%
    wait time: 0:12:46.725044s
    

    row: 287, total_rows: 3960
    progress: 7.247%
    wait time: 0:12:48.354473s
    

    row: 288, total_rows: 3960
    progress: 7.273%
    wait time: 0:12:46.307665s
    

    row: 289, total_rows: 3960
    progress: 7.298%
    wait time: 0:12:47.936093s
    

    row: 290, total_rows: 3960
    progress: 7.323%
    wait time: 0:12:45.890286s
    

    row: 291, total_rows: 3960
    progress: 7.348%
    wait time: 0:12:47.517713s
    

    row: 292, total_rows: 3960
    progress: 7.374%
    wait time: 0:12:45.472907s
    

    row: 293, total_rows: 3960
    progress: 7.399%
    wait time: 0:12:47.099333s
    

    row: 294, total_


    row: 377, total_rows: 3960
    progress: 9.52%
    wait time: 0:12:00.840687s
    

    row: 378, total_rows: 3960
    progress: 9.545%
    wait time: 0:12:02.431654s
    

    row: 379, total_rows: 3960
    progress: 9.571%
    wait time: 0:12:00.438319s
    

    row: 380, total_rows: 3960
    progress: 9.596%
    wait time: 0:12:00.236282s
    

    row: 381, total_rows: 3960
    progress: 9.621%
    wait time: 0:12:00.035952s
    

    row: 382, total_rows: 3960
    progress: 9.646%
    wait time: 0:11:59.833916s
    

    row: 383, total_rows: 3960
    progress: 9.672%
    wait time: 0:11:59.631880s
    

    row: 384, total_rows: 3960
    progress: 9.697%
    wait time: 0:11:59.431549s
    

    row: 385, total_rows: 3960
    progress: 9.722%
    wait time: 0:11:59.229513s
    

    row: 386, total_rows: 3960
    progress: 9.747%
    wait time: 0:11:59.028330s
    

    row: 387, total_rows: 3960
    progress: 9.773%
    wait time: 0:11:58.827147s
    

    row: 388, total_r


    row: 470, total_rows: 3960
    progress: 11.869%
    wait time: 0:11:40.382414s
    

    row: 471, total_rows: 3960
    progress: 11.894%
    wait time: 0:11:41.925273s
    

    row: 472, total_rows: 3960
    progress: 11.919%
    wait time: 0:11:39.981049s
    

    row: 473, total_rows: 3960
    progress: 11.944%
    wait time: 0:11:41.522908s
    

    row: 474, total_rows: 3960
    progress: 11.97%
    wait time: 0:11:41.321726s
    

    row: 475, total_rows: 3960
    progress: 11.995%
    wait time: 0:11:41.120543s
    

    row: 476, total_rows: 3960
    progress: 12.02%
    wait time: 0:11:40.919361s
    

    row: 477, total_rows: 3960
    progress: 12.045%
    wait time: 0:11:40.718179s
    

    row: 478, total_rows: 3960
    progress: 12.071%
    wait time: 0:11:38.776953s
    

    row: 479, total_rows: 3960
    progress: 12.096%
    wait time: 0:11:40.315814s
    

    row: 480, total_rows: 3960
    progress: 12.121%
    wait time: 0:11:38.375587s
    

    row: 48


    row: 563, total_rows: 3960
    progress: 14.217%
    wait time: 0:11:23.416495s
    

    row: 564, total_rows: 3960
    progress: 14.242%
    wait time: 0:11:23.216527s
    

    row: 565, total_rows: 3960
    progress: 14.268%
    wait time: 0:11:23.014131s
    

    row: 566, total_rows: 3960
    progress: 14.293%
    wait time: 0:11:22.814162s
    

    row: 567, total_rows: 3960
    progress: 14.318%
    wait time: 0:11:22.611766s
    

    row: 568, total_rows: 3960
    progress: 14.343%
    wait time: 0:11:22.411797s
    

    row: 569, total_rows: 3960
    progress: 14.369%
    wait time: 0:11:22.211827s
    

    row: 570, total_rows: 3960
    progress: 14.394%
    wait time: 0:11:22.009431s
    

    row: 571, total_rows: 3960
    progress: 14.419%
    wait time: 0:11:21.809460s
    

    row: 572, total_rows: 3960
    progress: 14.444%
    wait time: 0:11:21.607066s
    

    row: 573, total_rows: 3960
    progress: 14.47%
    wait time: 0:11:21.404672s
    

    row: 5


    row: 656, total_rows: 3960
    progress: 16.566%
    wait time: 0:11:03.053867s
    

    row: 657, total_rows: 3960
    progress: 16.591%
    wait time: 0:11:01.204167s
    

    row: 658, total_rows: 3960
    progress: 16.616%
    wait time: 0:11:02.652503s
    

    row: 659, total_rows: 3960
    progress: 16.641%
    wait time: 0:11:04.099840s
    

    row: 660, total_rows: 3960
    progress: 16.667%
    wait time: 0:11:02.251139s
    

    row: 661, total_rows: 3960
    progress: 16.692%
    wait time: 0:11:03.697477s
    

    row: 662, total_rows: 3960
    progress: 16.717%
    wait time: 0:11:03.497869s
    

    row: 663, total_rows: 3960
    progress: 16.742%
    wait time: 0:11:03.298259s
    

    row: 664, total_rows: 3960
    progress: 16.768%
    wait time: 0:11:03.097076s
    

    row: 665, total_rows: 3960
    progress: 16.793%
    wait time: 0:11:02.895894s
    

    row: 666, total_rows: 3960
    progress: 16.818%
    wait time: 0:11:02.695890s
    

    row: 


    row: 749, total_rows: 3960
    progress: 18.914%
    wait time: 0:10:39.569682s
    

    row: 750, total_rows: 3960
    progress: 18.939%
    wait time: 0:10:39.370501s
    

    row: 751, total_rows: 3960
    progress: 18.965%
    wait time: 0:10:39.171320s
    

    row: 752, total_rows: 3960
    progress: 18.99%
    wait time: 0:10:38.972139s
    

    row: 753, total_rows: 3960
    progress: 19.015%
    wait time: 0:10:38.772959s
    

    row: 754, total_rows: 3960
    progress: 19.04%
    wait time: 0:10:38.573778s
    

    row: 755, total_rows: 3960
    progress: 19.066%
    wait time: 0:10:38.374597s
    

    row: 756, total_rows: 3960
    progress: 19.091%
    wait time: 0:10:36.688498s
    

    row: 757, total_rows: 3960
    progress: 19.116%
    wait time: 0:10:35.003327s
    

    row: 758, total_rows: 3960
    progress: 19.141%
    wait time: 0:10:36.291065s
    

    row: 759, total_rows: 3960
    progress: 19.167%
    wait time: 0:10:34.606822s
    

    row: 76


    row: 842, total_rows: 3960
    progress: 21.263%
    wait time: 0:10:00.762120s
    

    row: 843, total_rows: 3960
    progress: 21.288%
    wait time: 0:09:59.009199s
    

    row: 844, total_rows: 3960
    progress: 21.313%
    wait time: 0:10:00.376769s
    

    row: 845, total_rows: 3960
    progress: 21.338%
    wait time: 0:09:58.624849s
    

    row: 846, total_rows: 3960
    progress: 21.364%
    wait time: 0:09:59.991418s
    

    row: 847, total_rows: 3960
    progress: 21.389%
    wait time: 0:09:58.240499s
    

    row: 848, total_rows: 3960
    progress: 21.414%
    wait time: 0:09:58.047954s
    

    row: 849, total_rows: 3960
    progress: 21.439%
    wait time: 0:09:57.856150s
    

    row: 850, total_rows: 3960
    progress: 21.465%
    wait time: 0:09:57.663604s
    

    row: 851, total_rows: 3960
    progress: 21.49%
    wait time: 0:09:57.471059s
    

    row: 852, total_rows: 3960
    progress: 21.515%
    wait time: 0:09:57.277772s
    

    row: 8


    row: 934, total_rows: 3960
    progress: 23.586%
    wait time: 0:09:41.520921s
    

    row: 935, total_rows: 3960
    progress: 23.611%
    wait time: 0:09:41.329107s
    

    row: 936, total_rows: 3960
    progress: 23.636%
    wait time: 0:09:41.136572s
    

    row: 937, total_rows: 3960
    progress: 23.662%
    wait time: 0:09:40.944757s
    

    row: 938, total_rows: 3960
    progress: 23.687%
    wait time: 0:09:42.265274s
    

    row: 939, total_rows: 3960
    progress: 23.712%
    wait time: 0:09:40.560408s
    

    row: 940, total_rows: 3960
    progress: 23.737%
    wait time: 0:09:40.367873s
    

    row: 941, total_rows: 3960
    progress: 23.763%
    wait time: 0:09:40.175338s
    

    row: 942, total_rows: 3960
    progress: 23.788%
    wait time: 0:09:39.982084s
    

    row: 943, total_rows: 3960
    progress: 23.813%
    wait time: 0:09:39.788831s
    

    row: 944, total_rows: 3960
    progress: 23.838%
    wait time: 0:09:38.088047s
    

    row: 


    row: 1026, total_rows: 3960
    progress: 25.909%
    wait time: 0:09:23.840841s
    

    row: 1027, total_rows: 3960
    progress: 25.934%
    wait time: 0:09:23.648316s
    

    row: 1028, total_rows: 3960
    progress: 25.96%
    wait time: 0:09:23.456491s
    

    row: 1029, total_rows: 3960
    progress: 25.985%
    wait time: 0:09:23.263967s
    

    row: 1030, total_rows: 3960
    progress: 26.01%
    wait time: 0:09:23.071094s
    

    row: 1031, total_rows: 3960
    progress: 26.035%
    wait time: 0:09:22.878221s
    

    row: 1032, total_rows: 3960
    progress: 26.061%
    wait time: 0:09:22.686745s
    

    row: 1033, total_rows: 3960
    progress: 26.086%
    wait time: 0:09:22.493872s
    

    row: 1034, total_rows: 3960
    progress: 26.111%
    wait time: 0:09:22.302396s
    

    row: 1035, total_rows: 3960
    progress: 26.136%
    wait time: 0:09:22.110919s
    

    row: 1036, total_rows: 3960
    progress: 26.162%
    wait time: 0:09:21.919093s
    




    row: 1117, total_rows: 3960
    progress: 28.207%
    wait time: 0:09:09.210979s
    

    row: 1118, total_rows: 3960
    progress: 28.232%
    wait time: 0:09:09.012040s
    

    row: 1119, total_rows: 3960
    progress: 28.258%
    wait time: 0:09:08.813104s
    

    row: 1120, total_rows: 3960
    progress: 28.283%
    wait time: 0:09:08.625684s
    

    row: 1121, total_rows: 3960
    progress: 28.308%
    wait time: 0:09:08.438259s
    

    row: 1122, total_rows: 3960
    progress: 28.333%
    wait time: 0:09:08.239328s
    

    row: 1123, total_rows: 3960
    progress: 28.359%
    wait time: 0:09:08.040400s
    

    row: 1124, total_rows: 3960
    progress: 28.384%
    wait time: 0:09:07.852972s
    

    row: 1125, total_rows: 3960
    progress: 28.409%
    wait time: 0:09:07.665539s
    

    row: 1126, total_rows: 3960
    progress: 28.434%
    wait time: 0:09:08.882837s
    

    row: 1127, total_rows: 3960
    progress: 28.46%
    wait time: 0:09:10.099140s
    



    row: 1208, total_rows: 3960
    progress: 30.505%
    wait time: 0:08:59.882072s
    

    row: 1209, total_rows: 3960
    progress: 30.53%
    wait time: 0:08:59.685566s
    

    row: 1210, total_rows: 3960
    progress: 30.556%
    wait time: 0:08:59.489716s
    

    row: 1211, total_rows: 3960
    progress: 30.581%
    wait time: 0:08:59.293866s
    

    row: 1212, total_rows: 3960
    progress: 30.606%
    wait time: 0:08:59.098015s
    

    row: 1213, total_rows: 3960
    progress: 30.631%
    wait time: 0:08:58.902164s
    

    row: 1214, total_rows: 3960
    progress: 30.657%
    wait time: 0:08:58.705659s
    

    row: 1215, total_rows: 3960
    progress: 30.682%
    wait time: 0:08:58.509808s
    

    row: 1216, total_rows: 3960
    progress: 30.707%
    wait time: 0:08:58.313302s
    

    row: 1217, total_rows: 3960
    progress: 30.732%
    wait time: 0:08:58.116797s
    

    row: 1218, total_rows: 3960
    progress: 30.758%
    wait time: 0:08:57.920945s
    



    row: 1301, total_rows: 3960
    progress: 32.854%
    wait time: 0:08:44.297903s
    

    row: 1302, total_rows: 3960
    progress: 32.879%
    wait time: 0:08:44.101675s
    

    row: 1303, total_rows: 3960
    progress: 32.904%
    wait time: 0:08:43.903546s
    

    row: 1304, total_rows: 3960
    progress: 32.929%
    wait time: 0:08:43.705418s
    

    row: 1305, total_rows: 3960
    progress: 32.955%
    wait time: 0:08:43.509189s
    

    row: 1306, total_rows: 3960
    progress: 32.98%
    wait time: 0:08:43.311061s
    

    row: 1307, total_rows: 3960
    progress: 33.005%
    wait time: 0:08:43.114832s
    

    row: 1308, total_rows: 3960
    progress: 33.03%
    wait time: 0:08:42.916705s
    

    row: 1309, total_rows: 3960
    progress: 33.056%
    wait time: 0:08:42.718578s
    

    row: 1310, total_rows: 3960
    progress: 33.081%
    wait time: 0:08:41.787870s
    

    row: 1311, total_rows: 3960
    progress: 33.106%
    wait time: 0:08:42.324223s
    




    row: 1392, total_rows: 3960
    progress: 35.152%
    wait time: 0:08:28.925526s
    

    row: 1393, total_rows: 3960
    progress: 35.177%
    wait time: 0:08:28.727346s
    

    row: 1394, total_rows: 3960
    progress: 35.202%
    wait time: 0:08:28.529166s
    

    row: 1395, total_rows: 3960
    progress: 35.227%
    wait time: 0:08:28.330986s
    

    row: 1396, total_rows: 3960
    progress: 35.253%
    wait time: 0:08:28.132807s
    

    row: 1397, total_rows: 3960
    progress: 35.278%
    wait time: 0:08:27.934627s
    

    row: 1398, total_rows: 3960
    progress: 35.303%
    wait time: 0:08:27.736447s
    

    row: 1399, total_rows: 3960
    progress: 35.328%
    wait time: 0:08:27.538268s
    

    row: 1400, total_rows: 3960
    progress: 35.354%
    wait time: 0:08:27.339783s
    

    row: 1401, total_rows: 3960
    progress: 35.379%
    wait time: 0:08:27.141298s
    

    row: 1402, total_rows: 3960
    progress: 35.404%
    wait time: 0:08:26.943424s
    


    row: 1483, total_rows: 3960
    progress: 37.449%
    wait time: 0:08:10.891762s
    

    row: 1484, total_rows: 3960
    progress: 37.475%
    wait time: 0:08:10.693582s
    

    row: 1485, total_rows: 3960
    progress: 37.5%
    wait time: 0:08:10.495402s
    

    row: 1486, total_rows: 3960
    progress: 37.525%
    wait time: 0:08:10.296927s
    

    row: 1487, total_rows: 3960
    progress: 37.551%
    wait time: 0:08:10.099042s
    

    row: 1488, total_rows: 3960
    progress: 37.576%
    wait time: 0:08:09.900567s
    

    row: 1489, total_rows: 3960
    progress: 37.601%
    wait time: 0:08:09.702093s
    

    row: 1490, total_rows: 3960
    progress: 37.626%
    wait time: 0:08:09.503913s
    

    row: 1491, total_rows: 3960
    progress: 37.652%
    wait time: 0:08:09.305733s
    

    row: 1492, total_rows: 3960
    progress: 37.677%
    wait time: 0:08:09.107259s
    

    row: 1493, total_rows: 3960
    progress: 37.702%
    wait time: 0:08:08.908786s
    




    row: 1574, total_rows: 3960
    progress: 39.747%
    wait time: 0:07:52.856816s
    

    row: 1575, total_rows: 3960
    progress: 39.773%
    wait time: 0:07:52.658637s
    

    row: 1576, total_rows: 3960
    progress: 39.798%
    wait time: 0:07:52.460741s
    

    row: 1577, total_rows: 3960
    progress: 39.823%
    wait time: 0:07:52.262277s
    

    row: 1578, total_rows: 3960
    progress: 39.848%
    wait time: 0:07:52.064097s
    

    row: 1579, total_rows: 3960
    progress: 39.874%
    wait time: 0:07:51.865918s
    

    row: 1580, total_rows: 3960
    progress: 39.899%
    wait time: 0:07:51.668022s
    

    row: 1581, total_rows: 3960
    progress: 39.924%
    wait time: 0:07:51.469558s
    

    row: 1582, total_rows: 3960
    progress: 39.949%
    wait time: 0:07:51.271662s
    

    row: 1583, total_rows: 3960
    progress: 39.975%
    wait time: 0:07:51.073199s
    

    row: 1584, total_rows: 3960
    progress: 40.0%
    wait time: 0:07:50.875019s
    




    row: 1667, total_rows: 3960
    progress: 42.096%
    wait time: 0:07:34.427742s
    

    row: 1668, total_rows: 3960
    progress: 42.121%
    wait time: 0:07:34.229015s
    

    row: 1669, total_rows: 3960
    progress: 42.146%
    wait time: 0:07:34.031381s
    

    row: 1670, total_rows: 3960
    progress: 42.172%
    wait time: 0:07:33.832655s
    

    row: 1671, total_rows: 3960
    progress: 42.197%
    wait time: 0:07:33.633929s
    

    row: 1672, total_rows: 3960
    progress: 42.222%
    wait time: 0:07:33.436295s
    

    row: 1673, total_rows: 3960
    progress: 42.247%
    wait time: 0:07:33.238660s
    

    row: 1674, total_rows: 3960
    progress: 42.273%
    wait time: 0:07:33.042114s
    

    row: 1675, total_rows: 3960
    progress: 42.298%
    wait time: 0:07:32.845567s
    

    row: 1676, total_rows: 3960
    progress: 42.323%
    wait time: 0:07:32.728251s
    

    row: 1677, total_rows: 3960
    progress: 42.348%
    wait time: 0:07:32.449204s
    


    row: 1759, total_rows: 3960
    progress: 44.419%
    wait time: 0:07:18.397032s
    

    row: 1760, total_rows: 3960
    progress: 44.444%
    wait time: 0:07:18.197851s
    

    row: 1761, total_rows: 3960
    progress: 44.47%
    wait time: 0:07:17.998670s
    

    row: 1762, total_rows: 3960
    progress: 44.495%
    wait time: 0:07:17.799489s
    

    row: 1763, total_rows: 3960
    progress: 44.52%
    wait time: 0:07:17.600309s
    

    row: 1764, total_rows: 3960
    progress: 44.545%
    wait time: 0:07:17.401128s
    

    row: 1765, total_rows: 3960
    progress: 44.571%
    wait time: 0:07:17.201947s
    

    row: 1766, total_rows: 3960
    progress: 44.596%
    wait time: 0:07:17.002766s
    

    row: 1767, total_rows: 3960
    progress: 44.621%
    wait time: 0:07:16.803585s
    

    row: 1768, total_rows: 3960
    progress: 44.646%
    wait time: 0:07:16.604143s
    

    row: 1769, total_rows: 3960
    progress: 44.672%
    wait time: 0:07:16.405224s
    




    row: 1851, total_rows: 3960
    progress: 46.742%
    wait time: 0:06:57.961536s
    

    row: 1852, total_rows: 3960
    progress: 46.768%
    wait time: 0:06:57.763859s
    

    row: 1853, total_rows: 3960
    progress: 46.793%
    wait time: 0:06:57.566181s
    

    row: 1854, total_rows: 3960
    progress: 46.818%
    wait time: 0:06:57.369507s
    

    row: 1855, total_rows: 3960
    progress: 46.843%
    wait time: 0:06:57.169820s
    

    row: 1856, total_rows: 3960
    progress: 46.869%
    wait time: 0:06:56.971138s
    

    row: 1857, total_rows: 3960
    progress: 46.894%
    wait time: 0:06:56.773459s
    

    row: 1858, total_rows: 3960
    progress: 46.919%
    wait time: 0:06:56.576782s
    

    row: 1859, total_rows: 3960
    progress: 46.944%
    wait time: 0:06:56.377098s
    

    row: 1860, total_rows: 3960
    progress: 46.97%
    wait time: 0:06:56.178417s
    

    row: 1861, total_rows: 3960
    progress: 46.995%
    wait time: 0:06:55.980737s
    



    row: 1942, total_rows: 3960
    progress: 49.04%
    wait time: 0:06:39.926919s
    

    row: 1943, total_rows: 3960
    progress: 49.066%
    wait time: 0:06:39.728980s
    

    row: 1944, total_rows: 3960
    progress: 49.091%
    wait time: 0:06:39.530560s
    

    row: 1945, total_rows: 3960
    progress: 49.116%
    wait time: 0:06:39.332620s
    

    row: 1946, total_rows: 3960
    progress: 49.141%
    wait time: 0:06:39.134440s
    

    row: 1947, total_rows: 3960
    progress: 49.167%
    wait time: 0:06:38.936260s
    

    row: 1948, total_rows: 3960
    progress: 49.192%
    wait time: 0:06:38.738080s
    

    row: 1949, total_rows: 3960
    progress: 49.217%
    wait time: 0:06:38.539900s
    

    row: 1950, total_rows: 3960
    progress: 49.242%
    wait time: 0:06:38.341720s
    

    row: 1951, total_rows: 3960
    progress: 49.268%
    wait time: 0:06:38.143540s
    

    row: 1952, total_rows: 3960
    progress: 49.293%
    wait time: 0:06:37.945360s
    



    row: 2034, total_rows: 3960
    progress: 51.364%
    wait time: 0:06:21.694374s
    

    row: 2035, total_rows: 3960
    progress: 51.389%
    wait time: 0:06:21.496423s
    

    row: 2036, total_rows: 3960
    progress: 51.414%
    wait time: 0:06:21.298244s
    

    row: 2037, total_rows: 3960
    progress: 51.439%
    wait time: 0:06:21.100064s
    

    row: 2038, total_rows: 3960
    progress: 51.465%
    wait time: 0:06:20.901654s
    

    row: 2039, total_rows: 3960
    progress: 51.49%
    wait time: 0:06:20.703246s
    

    row: 2040, total_rows: 3960
    progress: 51.515%
    wait time: 0:06:20.505066s
    

    row: 2041, total_rows: 3960
    progress: 51.54%
    wait time: 0:06:20.306886s
    

    row: 2042, total_rows: 3960
    progress: 51.566%
    wait time: 0:06:20.108935s
    

    row: 2043, total_rows: 3960
    progress: 51.591%
    wait time: 0:06:19.910527s
    

    row: 2044, total_rows: 3960
    progress: 51.616%
    wait time: 0:06:19.712347s
    




    row: 2127, total_rows: 3960
    progress: 53.712%
    wait time: 0:06:03.262556s
    

    row: 2128, total_rows: 3960
    progress: 53.737%
    wait time: 0:06:03.064377s
    

    row: 2129, total_rows: 3960
    progress: 53.763%
    wait time: 0:06:02.866198s
    

    row: 2130, total_rows: 3960
    progress: 53.788%
    wait time: 0:06:02.668018s
    

    row: 2131, total_rows: 3960
    progress: 53.813%
    wait time: 0:06:02.469839s
    

    row: 2132, total_rows: 3960
    progress: 53.838%
    wait time: 0:06:02.271660s
    

    row: 2133, total_rows: 3960
    progress: 53.864%
    wait time: 0:06:02.073481s
    

    row: 2134, total_rows: 3960
    progress: 53.889%
    wait time: 0:06:01.875301s
    

    row: 2135, total_rows: 3960
    progress: 53.914%
    wait time: 0:06:01.677122s
    

    row: 2136, total_rows: 3960
    progress: 53.939%
    wait time: 0:06:01.479160s
    

    row: 2137, total_rows: 3960
    progress: 53.965%
    wait time: 0:06:01.281198s
    


    row: 2218, total_rows: 3960
    progress: 56.01%
    wait time: 0:05:45.228452s
    

    row: 2219, total_rows: 3960
    progress: 56.035%
    wait time: 0:05:45.030066s
    

    row: 2220, total_rows: 3960
    progress: 56.061%
    wait time: 0:05:44.832094s
    

    row: 2221, total_rows: 3960
    progress: 56.086%
    wait time: 0:05:44.634122s
    

    row: 2222, total_rows: 3960
    progress: 56.111%
    wait time: 0:05:44.435942s
    

    row: 2223, total_rows: 3960
    progress: 56.136%
    wait time: 0:05:44.237763s
    

    row: 2224, total_rows: 3960
    progress: 56.162%
    wait time: 0:05:44.039376s
    

    row: 2225, total_rows: 3960
    progress: 56.187%
    wait time: 0:05:43.840990s
    

    row: 2226, total_rows: 3960
    progress: 56.212%
    wait time: 0:05:43.643018s
    

    row: 2227, total_rows: 3960
    progress: 56.237%
    wait time: 0:05:43.444632s
    

    row: 2228, total_rows: 3960
    progress: 56.263%
    wait time: 0:05:43.246659s
    



    row: 2309, total_rows: 3960
    progress: 58.308%
    wait time: 0:05:27.194721s
    

    row: 2310, total_rows: 3960
    progress: 58.333%
    wait time: 0:05:26.996541s
    

    row: 2311, total_rows: 3960
    progress: 58.359%
    wait time: 0:05:26.798361s
    

    row: 2312, total_rows: 3960
    progress: 58.384%
    wait time: 0:05:26.600182s
    

    row: 2313, total_rows: 3960
    progress: 58.409%
    wait time: 0:05:26.402002s
    

    row: 2314, total_rows: 3960
    progress: 58.434%
    wait time: 0:05:26.203626s
    

    row: 2315, total_rows: 3960
    progress: 58.46%
    wait time: 0:05:26.005642s
    

    row: 2316, total_rows: 3960
    progress: 58.485%
    wait time: 0:05:25.807463s
    

    row: 2317, total_rows: 3960
    progress: 58.51%
    wait time: 0:05:25.609283s
    

    row: 2318, total_rows: 3960
    progress: 58.535%
    wait time: 0:05:25.411103s
    

    row: 2319, total_rows: 3960
    progress: 58.561%
    wait time: 0:05:25.212924s
    




    row: 2401, total_rows: 3960
    progress: 60.631%
    wait time: 0:05:07.402556s
    

    row: 2402, total_rows: 3960
    progress: 60.657%
    wait time: 0:05:07.205377s
    

    row: 2403, total_rows: 3960
    progress: 60.682%
    wait time: 0:05:07.008197s
    

    row: 2404, total_rows: 3960
    progress: 60.707%
    wait time: 0:05:06.811018s
    

    row: 2405, total_rows: 3960
    progress: 60.732%
    wait time: 0:05:06.613839s
    

    row: 2406, total_rows: 3960
    progress: 60.758%
    wait time: 0:05:06.416659s
    

    row: 2407, total_rows: 3960
    progress: 60.783%
    wait time: 0:05:06.219480s
    

    row: 2408, total_rows: 3960
    progress: 60.808%
    wait time: 0:05:06.022301s
    

    row: 2409, total_rows: 3960
    progress: 60.833%
    wait time: 0:05:05.825121s
    

    row: 2410, total_rows: 3960
    progress: 60.859%
    wait time: 0:05:05.627942s
    

    row: 2411, total_rows: 3960
    progress: 60.884%
    wait time: 0:05:05.430763s
    


    row: 2492, total_rows: 3960
    progress: 62.929%
    wait time: 0:04:49.458713s
    

    row: 2493, total_rows: 3960
    progress: 62.955%
    wait time: 0:04:49.261709s
    

    row: 2494, total_rows: 3960
    progress: 62.98%
    wait time: 0:04:49.064530s
    

    row: 2495, total_rows: 3960
    progress: 63.005%
    wait time: 0:04:48.867351s
    

    row: 2496, total_rows: 3960
    progress: 63.03%
    wait time: 0:04:48.670172s
    

    row: 2497, total_rows: 3960
    progress: 63.056%
    wait time: 0:04:48.472993s
    

    row: 2498, total_rows: 3960
    progress: 63.081%
    wait time: 0:04:48.275639s
    

    row: 2499, total_rows: 3960
    progress: 63.106%
    wait time: 0:04:48.078635s
    

    row: 2500, total_rows: 3960
    progress: 63.131%
    wait time: 0:04:47.881281s
    

    row: 2501, total_rows: 3960
    progress: 63.157%
    wait time: 0:04:47.683928s
    

    row: 2502, total_rows: 3960
    progress: 63.182%
    wait time: 0:04:47.486923s
    




    row: 2583, total_rows: 3960
    progress: 65.227%
    wait time: 0:04:31.514607s
    

    row: 2584, total_rows: 3960
    progress: 65.253%
    wait time: 0:04:31.317593s
    

    row: 2585, total_rows: 3960
    progress: 65.278%
    wait time: 0:04:31.120578s
    

    row: 2586, total_rows: 3960
    progress: 65.303%
    wait time: 0:04:30.923236s
    

    row: 2587, total_rows: 3960
    progress: 65.328%
    wait time: 0:04:30.726221s
    

    row: 2588, total_rows: 3960
    progress: 65.354%
    wait time: 0:04:30.528879s
    

    row: 2589, total_rows: 3960
    progress: 65.379%
    wait time: 0:04:30.331537s
    

    row: 2590, total_rows: 3960
    progress: 65.404%
    wait time: 0:04:30.134522s
    

    row: 2591, total_rows: 3960
    progress: 65.429%
    wait time: 0:04:29.937506s
    

    row: 2592, total_rows: 3960
    progress: 65.455%
    wait time: 0:04:29.740328s
    

    row: 2593, total_rows: 3960
    progress: 65.48%
    wait time: 0:04:29.543149s
    



    row: 2674, total_rows: 3960
    progress: 67.525%
    wait time: 0:04:12.284856s
    

    row: 2675, total_rows: 3960
    progress: 67.551%
    wait time: 0:04:12.088678s
    

    row: 2676, total_rows: 3960
    progress: 67.576%
    wait time: 0:04:11.892347s
    

    row: 2677, total_rows: 3960
    progress: 67.601%
    wait time: 0:04:11.696016s
    

    row: 2678, total_rows: 3960
    progress: 67.626%
    wait time: 0:04:10.858732s
    

    row: 2679, total_rows: 3960
    progress: 67.652%
    wait time: 0:04:10.022448s
    

    row: 2680, total_rows: 3960
    progress: 67.677%
    wait time: 0:04:10.467377s
    

    row: 2681, total_rows: 3960
    progress: 67.702%
    wait time: 0:04:10.911305s
    

    row: 2682, total_rows: 3960
    progress: 67.727%
    wait time: 0:04:10.076021s
    

    row: 2683, total_rows: 3960
    progress: 67.753%
    wait time: 0:04:09.241738s
    

    row: 2684, total_rows: 3960
    progress: 67.778%
    wait time: 0:04:09.684666s
    


    row: 2765, total_rows: 3960
    progress: 69.823%
    wait time: 0:03:54.432662s
    

    row: 2766, total_rows: 3960
    progress: 69.848%
    wait time: 0:03:54.236484s
    

    row: 2767, total_rows: 3960
    progress: 69.874%
    wait time: 0:03:54.040306s
    

    row: 2768, total_rows: 3960
    progress: 69.899%
    wait time: 0:03:53.844128s
    

    row: 2769, total_rows: 3960
    progress: 69.924%
    wait time: 0:03:53.647950s
    

    row: 2770, total_rows: 3960
    progress: 69.949%
    wait time: 0:03:53.451772s
    

    row: 2771, total_rows: 3960
    progress: 69.975%
    wait time: 0:03:53.255594s
    

    row: 2772, total_rows: 3960
    progress: 70.0%
    wait time: 0:03:53.059416s
    

    row: 2773, total_rows: 3960
    progress: 70.025%
    wait time: 0:03:52.863238s
    

    row: 2774, total_rows: 3960
    progress: 70.051%
    wait time: 0:03:52.666919s
    

    row: 2775, total_rows: 3960
    progress: 70.076%
    wait time: 0:03:52.470599s
    




    row: 2857, total_rows: 3960
    progress: 72.146%
    wait time: 0:03:36.384026s
    

    row: 2858, total_rows: 3960
    progress: 72.172%
    wait time: 0:03:36.187980s
    

    row: 2859, total_rows: 3960
    progress: 72.197%
    wait time: 0:03:35.991933s
    

    row: 2860, total_rows: 3960
    progress: 72.222%
    wait time: 0:03:35.795624s
    

    row: 2861, total_rows: 3960
    progress: 72.247%
    wait time: 0:03:35.599577s
    

    row: 2862, total_rows: 3960
    progress: 72.273%
    wait time: 0:03:35.403269s
    

    row: 2863, total_rows: 3960
    progress: 72.298%
    wait time: 0:03:35.207222s
    

    row: 2864, total_rows: 3960
    progress: 72.323%
    wait time: 0:03:35.010913s
    

    row: 2865, total_rows: 3960
    progress: 72.348%
    wait time: 0:03:34.814866s
    

    row: 2866, total_rows: 3960
    progress: 72.374%
    wait time: 0:03:34.618688s
    

    row: 2867, total_rows: 3960
    progress: 72.399%
    wait time: 0:03:34.422510s
    


    row: 2949, total_rows: 3960
    progress: 74.47%
    wait time: 0:03:17.324508s
    

    row: 2950, total_rows: 3960
    progress: 74.495%
    wait time: 0:03:17.634414s
    

    row: 2951, total_rows: 3960
    progress: 74.52%
    wait time: 0:03:16.934153s
    

    row: 2952, total_rows: 3960
    progress: 74.545%
    wait time: 0:03:17.243059s
    

    row: 2953, total_rows: 3960
    progress: 74.571%
    wait time: 0:03:16.543798s
    

    row: 2954, total_rows: 3960
    progress: 74.596%
    wait time: 0:03:16.851704s
    

    row: 2955, total_rows: 3960
    progress: 74.621%
    wait time: 0:03:17.158610s
    

    row: 2956, total_rows: 3960
    progress: 74.646%
    wait time: 0:03:16.460349s
    

    row: 2957, total_rows: 3960
    progress: 74.672%
    wait time: 0:03:16.766254s
    

    row: 2958, total_rows: 3960
    progress: 74.697%
    wait time: 0:03:16.570196s
    

    row: 2959, total_rows: 3960
    progress: 74.722%
    wait time: 0:03:16.374137s
    




    row: 3040, total_rows: 3960
    progress: 76.768%
    wait time: 0:03:00.483832s
    

    row: 3041, total_rows: 3960
    progress: 76.793%
    wait time: 0:03:00.287764s
    

    row: 3042, total_rows: 3960
    progress: 76.818%
    wait time: 0:03:00.091476s
    

    row: 3043, total_rows: 3960
    progress: 76.843%
    wait time: 0:02:59.895189s
    

    row: 3044, total_rows: 3960
    progress: 76.869%
    wait time: 0:02:59.699120s
    

    row: 3045, total_rows: 3960
    progress: 76.894%
    wait time: 0:02:59.502833s
    

    row: 3046, total_rows: 3960
    progress: 76.919%
    wait time: 0:02:59.306655s
    

    row: 3047, total_rows: 3960
    progress: 76.944%
    wait time: 0:02:59.110477s
    

    row: 3048, total_rows: 3960
    progress: 76.97%
    wait time: 0:02:58.914299s
    

    row: 3049, total_rows: 3960
    progress: 76.995%
    wait time: 0:02:58.718121s
    

    row: 3050, total_rows: 3960
    progress: 77.02%
    wait time: 0:02:58.521943s
    




    row: 3131, total_rows: 3960
    progress: 79.066%
    wait time: 0:02:40.972463s
    

    row: 3132, total_rows: 3960
    progress: 79.091%
    wait time: 0:02:40.778187s
    

    row: 3133, total_rows: 3960
    progress: 79.116%
    wait time: 0:02:40.584109s
    

    row: 3134, total_rows: 3960
    progress: 79.141%
    wait time: 0:02:40.802903s
    

    row: 3135, total_rows: 3960
    progress: 79.167%
    wait time: 0:02:41.020696s
    

    row: 3136, total_rows: 3960
    progress: 79.192%
    wait time: 0:02:40.825912s
    

    row: 3137, total_rows: 3960
    progress: 79.217%
    wait time: 0:02:40.630343s
    

    row: 3138, total_rows: 3960
    progress: 79.242%
    wait time: 0:02:40.024196s
    

    row: 3139, total_rows: 3960
    progress: 79.268%
    wait time: 0:02:39.419049s
    

    row: 3140, total_rows: 3960
    progress: 79.293%
    wait time: 0:02:39.634843s
    

    row: 3141, total_rows: 3960
    progress: 79.318%
    wait time: 0:02:39.849637s
    


    row: 3222, total_rows: 3960
    progress: 81.364%
    wait time: 0:02:23.302033s
    

    row: 3223, total_rows: 3960
    progress: 81.389%
    wait time: 0:02:23.107682s
    

    row: 3224, total_rows: 3960
    progress: 81.414%
    wait time: 0:02:22.913681s
    

    row: 3225, total_rows: 3960
    progress: 81.439%
    wait time: 0:02:22.719680s
    

    row: 3226, total_rows: 3960
    progress: 81.465%
    wait time: 0:02:22.525591s
    

    row: 3227, total_rows: 3960
    progress: 81.49%
    wait time: 0:02:22.331327s
    

    row: 3228, total_rows: 3960
    progress: 81.515%
    wait time: 0:02:22.136976s
    

    row: 3229, total_rows: 3960
    progress: 81.54%
    wait time: 0:02:21.942974s
    

    row: 3230, total_rows: 3960
    progress: 81.566%
    wait time: 0:02:21.748624s
    

    row: 3231, total_rows: 3960
    progress: 81.591%
    wait time: 0:02:21.554274s
    

    row: 3232, total_rows: 3960
    progress: 81.616%
    wait time: 0:02:21.359924s
    




    row: 3313, total_rows: 3960
    progress: 83.662%
    wait time: 0:02:05.632154s
    

    row: 3314, total_rows: 3960
    progress: 83.687%
    wait time: 0:02:05.437823s
    

    row: 3315, total_rows: 3960
    progress: 83.712%
    wait time: 0:02:05.243801s
    

    row: 3316, total_rows: 3960
    progress: 83.737%
    wait time: 0:02:05.049701s
    

    row: 3317, total_rows: 3960
    progress: 83.763%
    wait time: 0:02:04.855448s
    

    row: 3318, total_rows: 3960
    progress: 83.788%
    wait time: 0:02:04.661348s
    

    row: 3319, total_rows: 3960
    progress: 83.813%
    wait time: 0:02:04.467095s
    

    row: 3320, total_rows: 3960
    progress: 83.838%
    wait time: 0:02:04.272995s
    

    row: 3321, total_rows: 3960
    progress: 83.864%
    wait time: 0:02:04.078895s
    

    row: 3322, total_rows: 3960
    progress: 83.889%
    wait time: 0:02:04.203695s
    

    row: 3323, total_rows: 3960
    progress: 83.914%
    wait time: 0:02:04.327495s
    


    row: 3405, total_rows: 3960
    progress: 85.985%
    wait time: 0:01:48.878767s
    

    row: 3406, total_rows: 3960
    progress: 86.01%
    wait time: 0:01:48.682590s
    

    row: 3407, total_rows: 3960
    progress: 86.035%
    wait time: 0:01:48.486412s
    

    row: 3408, total_rows: 3960
    progress: 86.061%
    wait time: 0:01:48.290234s
    

    row: 3409, total_rows: 3960
    progress: 86.086%
    wait time: 0:01:48.094056s
    

    row: 3410, total_rows: 3960
    progress: 86.111%
    wait time: 0:01:47.897878s
    

    row: 3411, total_rows: 3960
    progress: 86.136%
    wait time: 0:01:47.701700s
    

    row: 3412, total_rows: 3960
    progress: 86.162%
    wait time: 0:01:47.505522s
    

    row: 3413, total_rows: 3960
    progress: 86.187%
    wait time: 0:01:47.309344s
    

    row: 3414, total_rows: 3960
    progress: 86.212%
    wait time: 0:01:47.113166s
    

    row: 3415, total_rows: 3960
    progress: 86.237%
    wait time: 0:01:46.916988s
    



    row: 3496, total_rows: 3960
    progress: 88.283%
    wait time: 0:01:31.130396s
    

    row: 3497, total_rows: 3960
    progress: 88.308%
    wait time: 0:01:31.037041s
    

    row: 3498, total_rows: 3960
    progress: 88.333%
    wait time: 0:01:30.968245s
    

    row: 3499, total_rows: 3960
    progress: 88.359%
    wait time: 0:01:30.898896s
    

    row: 3500, total_rows: 3960
    progress: 88.384%
    wait time: 0:01:30.701773s
    

    row: 3501, total_rows: 3960
    progress: 88.409%
    wait time: 0:01:30.504650s
    

    row: 3502, total_rows: 3960
    progress: 88.434%
    wait time: 0:01:30.307472s
    

    row: 3503, total_rows: 3960
    progress: 88.46%
    wait time: 0:01:30.110294s
    

    row: 3504, total_rows: 3960
    progress: 88.485%
    wait time: 0:01:29.913116s
    

    row: 3505, total_rows: 3960
    progress: 88.51%
    wait time: 0:01:29.715939s
    

    row: 3506, total_rows: 3960
    progress: 88.535%
    wait time: 0:01:29.518761s
    




    row: 3587, total_rows: 3960
    progress: 90.581%
    wait time: 0:01:13.547708s
    

    row: 3588, total_rows: 3960
    progress: 90.606%
    wait time: 0:01:13.350529s
    

    row: 3589, total_rows: 3960
    progress: 90.631%
    wait time: 0:01:13.153350s
    

    row: 3590, total_rows: 3960
    progress: 90.657%
    wait time: 0:01:12.956215s
    

    row: 3591, total_rows: 3960
    progress: 90.682%
    wait time: 0:01:12.759080s
    

    row: 3592, total_rows: 3960
    progress: 90.707%
    wait time: 0:01:12.561857s
    

    row: 3593, total_rows: 3960
    progress: 90.732%
    wait time: 0:01:12.364722s
    

    row: 3594, total_rows: 3960
    progress: 90.758%
    wait time: 0:01:12.167543s
    

    row: 3595, total_rows: 3960
    progress: 90.783%
    wait time: 0:01:11.970364s
    

    row: 3596, total_rows: 3960
    progress: 90.808%
    wait time: 0:01:11.773185s
    

    row: 3597, total_rows: 3960
    progress: 90.833%
    wait time: 0:01:11.576006s
    


    row: 3678, total_rows: 3960
    progress: 92.879%
    wait time: 0:00:55.745456s
    

    row: 3679, total_rows: 3960
    progress: 92.904%
    wait time: 0:00:55.407522s
    

    row: 3680, total_rows: 3960
    progress: 92.929%
    wait time: 0:00:55.350099s
    

    row: 3681, total_rows: 3960
    progress: 92.955%
    wait time: 0:00:55.291677s
    

    row: 3682, total_rows: 3960
    progress: 92.98%
    wait time: 0:00:54.954741s
    

    row: 3683, total_rows: 3960
    progress: 93.005%
    wait time: 0:00:54.618803s
    

    row: 3684, total_rows: 3960
    progress: 93.03%
    wait time: 0:00:54.559383s
    

    row: 3685, total_rows: 3960
    progress: 93.056%
    wait time: 0:00:54.498965s
    

    row: 3686, total_rows: 3960
    progress: 93.081%
    wait time: 0:00:54.164025s
    

    row: 3687, total_rows: 3960
    progress: 93.106%
    wait time: 0:00:54.102608s
    

    row: 3688, total_rows: 3960
    progress: 93.131%
    wait time: 0:00:53.904495s
    




    row: 3769, total_rows: 3960
    progress: 95.177%
    wait time: 0:00:37.852008s
    

    row: 3770, total_rows: 3960
    progress: 95.202%
    wait time: 0:00:37.653875s
    

    row: 3771, total_rows: 3960
    progress: 95.227%
    wait time: 0:00:37.455652s
    

    row: 3772, total_rows: 3960
    progress: 95.253%
    wait time: 0:00:37.163638s
    

    row: 3773, total_rows: 3960
    progress: 95.278%
    wait time: 0:00:37.059296s
    

    row: 3774, total_rows: 3960
    progress: 95.303%
    wait time: 0:00:36.768280s
    

    row: 3775, total_rows: 3960
    progress: 95.328%
    wait time: 0:00:36.478262s
    

    row: 3776, total_rows: 3960
    progress: 95.354%
    wait time: 0:00:36.281082s
    

    row: 3777, total_rows: 3960
    progress: 95.379%
    wait time: 0:00:36.083902s
    

    row: 3778, total_rows: 3960
    progress: 95.404%
    wait time: 0:00:35.977564s
    

    row: 3779, total_rows: 3960
    progress: 95.429%
    wait time: 0:00:35.870228s
    


    row: 3860, total_rows: 3960
    progress: 97.475%
    wait time: 0:00:19.817913s
    

    row: 3861, total_rows: 3960
    progress: 97.5%
    wait time: 0:00:19.619745s
    

    row: 3862, total_rows: 3960
    progress: 97.525%
    wait time: 0:00:19.421554s
    

    row: 3863, total_rows: 3960
    progress: 97.551%
    wait time: 0:00:19.223387s
    

    row: 3864, total_rows: 3960
    progress: 97.576%
    wait time: 0:00:19.025196s
    

    row: 3865, total_rows: 3960
    progress: 97.601%
    wait time: 0:00:18.827028s
    

    row: 3866, total_rows: 3960
    progress: 97.626%
    wait time: 0:00:18.628849s
    

    row: 3867, total_rows: 3960
    progress: 97.652%
    wait time: 0:00:18.430670s
    

    row: 3868, total_rows: 3960
    progress: 97.677%
    wait time: 0:00:18.232491s
    

    row: 3869, total_rows: 3960
    progress: 97.702%
    wait time: 0:00:18.034311s
    

    row: 3870, total_rows: 3960
    progress: 97.727%
    wait time: 0:00:17.836132s
    




    row: 3951, total_rows: 3960
    progress: 99.773%
    wait time: 0:00:01.783615s
    

    row: 3952, total_rows: 3960
    progress: 99.798%
    wait time: 0:00:01.585436s
    

    row: 3953, total_rows: 3960
    progress: 99.823%
    wait time: 0:00:01.387256s
    

    row: 3954, total_rows: 3960
    progress: 99.848%
    wait time: 0:00:01.189077s
    

    row: 3955, total_rows: 3960
    progress: 99.874%
    wait time: 0:00:00.990897s
    

    row: 3956, total_rows: 3960
    progress: 99.899%
    wait time: 0:00:00.792718s
    

    row: 3957, total_rows: 3960
    progress: 99.924%
    wait time: 0:00:00.594538s
    

    row: 3958, total_rows: 3960
    progress: 99.949%
    wait time: 0:00:00.396359s
    

    row: 3959, total_rows: 3960
    progress: 99.975%
    wait time: 0:00:00.198179s
    

    row: 3960, total_rows: 3960
    progress: 100.0%
    wait time: 0:00:00s
    


In [116]:
df_representation_v4_1 = pd.DataFrame(data_lexicon_attributes).set_index("id")
df_representation_v4 = pd.concat([df_representation_v4, df_representation_v3], axis=1)
df_representation_v4

NameError: name 'data_lexicon_attributes' is not defined

In [432]:
# pickle.dump(df_representation_v4, open("df_representation_v4.pickle", "wb"))
df_representation_v4 = pickle.load(open("df_representation_v4.pickle", "rb"))
df_representation_v4_1 = df_representation_v4[[c for c in df_representation_v4.columns if "lexicon<&>" in c]]
df_representation_v4_1 = pd.concat([df_representation_v4_1, df_representation_v3_1], axis=1)
df_representation_v4_1

Unnamed: 0_level_0,lexicon<&>LiuHu<&>+,lexicon<&>LiuHu<&>-,lexicon<&>sentiwordnet<&>+,lexicon<&>sentiwordnet<&>-,lexicon<&>sentiwordnet<&>o,retro<&>num_tokens,retro<&>lenght,retro<&>num_numbs,retro<&>num_alpha,retro<&>num_with_uppercase,...,BERTweet_758,BERTweet_759,BERTweet_760,BERTweet_761,BERTweet_762,BERTweet_763,BERTweet_764,BERTweet_765,BERTweet_766,BERTweet_767
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10000,0,1,0.000,0.000,1.000,18,96,0,18,3,...,0.091612,0.013699,0.048113,0.180316,0.025295,-0.094723,0.094302,-0.039285,0.234481,-0.249347
10001,0,0,0.000,0.250,7.750,23,119,0,25,7,...,0.061139,0.057788,0.016066,0.114631,-0.075937,0.161129,0.088782,0.099523,0.195573,-0.094360
10002,0,0,0.000,0.625,6.375,19,108,0,19,2,...,0.058741,-0.221301,0.011673,0.277279,0.015945,0.010225,0.090561,0.177221,0.102671,-0.183728
10003,0,1,1.125,0.250,5.625,24,134,0,23,1,...,0.041260,0.074323,0.122148,0.129413,0.010133,-0.035607,0.095457,0.020883,0.222822,-0.049975
10004,0,0,0.000,0.250,4.750,24,125,0,25,3,...,0.152741,-0.128246,0.041543,0.219807,0.010124,0.019276,0.062688,0.044342,0.058079,-0.121317
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40855,0,2,0.375,0.250,7.375,24,112,0,23,2,...,0.094263,0.087653,-0.054003,-0.014100,-0.015075,0.043808,0.258219,-0.013177,0.014293,0.006944
40856,1,0,1.500,0.500,3.000,20,121,0,21,1,...,0.090047,-0.136675,-0.018746,-0.072087,0.148523,-0.059273,0.200686,0.004628,0.182843,-0.168076
40857,0,0,0.750,0.000,2.250,23,139,0,23,2,...,0.214575,-0.152968,-0.036713,0.054679,-0.097778,-0.037324,0.141307,0.059210,0.080930,-0.205299
40858,0,0,0.125,0.625,3.250,14,83,2,15,5,...,-0.018443,-0.159911,0.110586,0.067621,-0.096703,0.073693,0.085268,-0.123595,0.264524,0.105249


In [436]:
# pickle.dump(df_representation_v4_1, open("df_representation_v4_1.pickle", "wb"))

In [363]:
sen = sentiments[0]

# indexs = df_train[df_train["sen"] == sen].index
# indexsLH = df_train.loc[indexs][(df_train.loc[indexs]["int"].isin(["low", "high"]))].index

# X = df_representation_v4.loc[indexsLH]#[rel_cols]
# y = df_train.loc[X.index]["int"]

# dic_label_count = y.value_counts().to_dict()
# min_label = min(dic_label_count.items(), key=lambda x: x[1])[0]
# max_label = max(dic_label_count.items(), key=lambda x: x[1])[0]
# index_label_1 = y[y==min_label].index
# oversampling_steps = int(dic_label_count[max_label] / dic_label_count[min_label]) - 1

# X_res, y_res = X.copy(), y.copy()
# for step in range(oversampling_steps):
#     new_indexs = [f"{ix}+{step + 1}" for ix in index_label_1]
#     copied_sub_X = pd.DataFrame(X.loc[index_label_1].values, columns=X.columns, index=new_indexs)
#     copied_sub_y = pd.Series(y.loc[index_label_1].values, index=new_indexs)
#     X_res = pd.concat([X_res, copied_sub_X], axis=0)
#     y_res = pd.concat([y_res, copied_sub_y], axis=0)

# X_res = pd.DataFrame(StandardScaler().fit_transform(X_res), columns=X_res.columns, index=X_res.index)
# selector = SelectKBest(chi2, k=X.shape[1])
# X_res_ = X_res - X_res.min()
# selector.fit(X_res_, y_res)
# scores_selector = {col: selector.scores_[i] if str(selector.scores_[i]) != "nan" else 0 for i, col in enumerate(X.columns.tolist())}
# ranked_cols = [x[0] for x in sorted(scores_selector.items(), key=lambda x: x[1], reverse=True)]
# print(ranked_cols)

indexs = df_train[df_train["sen"] == sen].index
scores_selector = {col: [] for col in df_representation_v4.columns.tolist()}
comp_l = [(l1, l2) for l1 in intensities for l2 in intensities if l1 < l2]
for l1 in intensities:
    for l2 in intensities:
        if l1 < l2:
            indexsLH = df_train.loc[indexs][(df_train.loc[indexs]["int"].isin([l1, l2]))].index

            X = df_representation_v4.loc[indexsLH]#[rel_cols]
            y = df_train.loc[X.index]["int"]

            dic_label_count = y.value_counts().to_dict()
            min_label = min(dic_label_count.items(), key=lambda x: x[1])[0]
            max_label = max(dic_label_count.items(), key=lambda x: x[1])[0]
            index_label_1 = y[y==min_label].index
            oversampling_steps = int(dic_label_count[max_label] / dic_label_count[min_label]) - 1
            X_res, y_res = X.copy(), y.copy()

            for step in range(oversampling_steps):
                new_indexs = [f"{ix}+{step + 1}" for ix in index_label_1]
                copied_sub_X = pd.DataFrame(X.loc[index_label_1].values, columns=X.columns, index=new_indexs)
                copied_sub_y = pd.Series(y.loc[index_label_1].values, index=new_indexs)
                X_res = pd.concat([X_res, copied_sub_X], axis=0)
                y_res = pd.concat([y_res, copied_sub_y], axis=0)

            X_res = pd.DataFrame(StandardScaler().fit_transform(X_res), columns=X_res.columns, index=X_res.index)
            selector = SelectKBest(chi2, k=X.shape[1])
            X_res_ = X_res - X_res.min()
            selector.fit(X_res_, y_res)
            for i, col in enumerate(X.columns.tolist()):
                scores_selector[col] += [selector.scores_[i]] if str(selector.scores_[i]) != "nan" else [0]

ranked_cols = [x[0] for x in sorted(scores_selector.items(), key=lambda y: max(y[1]), reverse=True)]


f1_weight = []

for num_cols in range(1, len(ranked_cols)+1, 10):
    X = df_representation_v4.loc[indexs][ranked_cols[:num_cols+1]]
    y = df_train.loc[X.index]["int"]

    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    clf = make_pipeline(StandardScaler(), SVC(kernel="rbf", gamma='auto', class_weight="balanced"))
    # clf.fit(X_train, y_train)
    cv_results = cross_validate(clf, X, y, cv=5, scoring="f1_weighted")
    test_score = cv_results["test_score"]
    f1_weight.append([num_cols, np.mean(test_score), np.std(test_score)])
    print(f1_weight[-1])

[10, 0.269252565656121, 0.02290469784861938]
0.0
[20, 0.2860016373313831, 0.024583514937789906]
0.0
[30, 0.33076135719898636, 0.017229772075774856]
0.0
[40, 0.3479687056866973, 0.030236108736200792]
0.0
[50, 0.3771521982927939, 0.0746230044378462]
0.0
[60, 0.3996847995111568, 0.04144134271314507]
0.0
[70, 0.4267900688789351, 0.05983731131658159]
0.0
[80, 0.45470957220675745, 0.08410853080338244]
0.0
[90, 0.5011042060899384, 0.09963893271964369]
0.0
[100, 0.5083544506129323, 0.10320718275703097]
0.0
[110, 0.5062030619499056, 0.1024069822254258]
0.0
[120, 0.5049054223742787, 0.1036636277641506]
0.0
[130, 0.5454862629398902, 0.10287608842056734]
0.0
[140, 0.5466147734493683, 0.1039515551946309]
0.0
[150, 0.5458826594113458, 0.10346433981180436]
0.0
[160, 0.5557256350934482, 0.10366834115383063]
0.0
[170, 0.5580013550582306, 0.10446034550950009]
0.0
[180, 0.5592190913496725, 0.11125537541340726]
0.0
[190, 0.6110727233059613, 0.03166581842338372]
0.0
[200, 0.6129803083756296, 0.031134681135

379

In [365]:
best_f1 = sorted(f1_weight, key=lambda x: x[1], reverse=True)[0][0]
best_f1

380

In [175]:
fine_f1_weight = []

for num_cols in range(best_f1-10, best_f1+10, 1):
    X = df_representation_v4.loc[indexs][ranked_cols[:num_cols+1]]
    y = df_train.loc[X.index]["int"]

    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    clf = make_pipeline(StandardScaler(), SVC(kernel="rbf", gamma='auto', class_weight="balanced"))
    # clf.fit(X_train, y_train)
    cv_results = cross_validate(clf, X, y, cv=5, scoring="f1_weighted")
    test_score = cv_results["test_score"]
    fine_f1_weight.append([num_cols, np.mean(test_score), np.std(test_score)])
    print(fine_f1_weight[-1])

[711, 0.7135064054634677, 0.02843300232297205]
[712, 0.7135064054634677, 0.02843300232297205]
[713, 0.7152161895872203, 0.02604441533996918]
[714, 0.7152161895872203, 0.02604441533996918]
[715, 0.7152161895872203, 0.02604441533996918]
[716, 0.7154814838404342, 0.02646863771210781]
[717, 0.7175227985191704, 0.02487539412245578]
[718, 0.7148818500130721, 0.02404939261838799]
[719, 0.7148818500130721, 0.02404939261838799]
[720, 0.7148818500130721, 0.02404939261838799]
[721, 0.7148818500130721, 0.02404939261838799]
[722, 0.7148818500130721, 0.02404939261838799]
[723, 0.7140130503602529, 0.023388058654153963]
[724, 0.7139676914418421, 0.023356567742729654]
[725, 0.7124241789966591, 0.022477710945360958]
[726, 0.7133086362027828, 0.021444504015364445]
[727, 0.7140998584816268, 0.0206051731151041]
[728, 0.7140998584816268, 0.0206051731151041]
[729, 0.7140998584816268, 0.0206051731151041]
[730, 0.7140998584816268, 0.0206051731151041]


In [176]:
fine_best_f1 = sorted(fine_f1_weight, key=lambda x: x[1], reverse=True)[0][0]
fine_best_f1

717

In [400]:
def feature_selection_chi2(sen, df_rep, df_train, n0=10):
    indexs = df_train[df_train["sen"] == sen].index
    scores_selector = {col: [] for col in df_rep.columns.tolist()}
    comp_l = [(l1, l2) for l1 in intensities for l2 in intensities if l1 < l2]
    for l1 in intensities:
        for l2 in intensities:
            if l1 < l2:
                indexsLH = df_train.loc[indexs][(df_train.loc[indexs]["int"].isin([l1, l2]))].index

                X = df_rep.loc[indexsLH]#[rel_cols]
                y = df_train.loc[X.index]["int"]

                dic_label_count = y.value_counts().to_dict()
                min_label = min(dic_label_count.items(), key=lambda x: x[1])[0]
                max_label = max(dic_label_count.items(), key=lambda x: x[1])[0]
                index_label_1 = y[y==min_label].index
                oversampling_steps = int(dic_label_count[max_label] / dic_label_count[min_label]) - 1
                X_res, y_res = X.copy(), y.copy()

                for step in range(oversampling_steps):
                    new_indexs = [f"{ix}+{step + 1}" for ix in index_label_1]
                    copied_sub_X = pd.DataFrame(X.loc[index_label_1].values, columns=X.columns, index=new_indexs)
                    copied_sub_y = pd.Series(y.loc[index_label_1].values, index=new_indexs)
                    X_res = pd.concat([X_res, copied_sub_X], axis=0)
                    y_res = pd.concat([y_res, copied_sub_y], axis=0)

                X_res = pd.DataFrame(StandardScaler().fit_transform(X_res), columns=X_res.columns, index=X_res.index)
                selector = SelectKBest(chi2, k=X.shape[1])
                X_res_ = X_res - X_res.min()
                selector.fit(X_res_, y_res)
                for i, col in enumerate(X.columns.tolist()):
                    scores_selector[col] += [selector.scores_[i]] if str(selector.scores_[i]) != "nan" else [0]

    ranked_cols = [x[0] for x in sorted(scores_selector.items(), key=lambda y: max(y[1]), reverse=True)]


    f1_weight = []
    b = []
    p = []
    pp = []
    ppp = []
    for num_cols in range(n0, len(ranked_cols)+1, 10):
        X = df_rep.loc[indexs][ranked_cols[:num_cols+1]]
        y = df_train.loc[X.index]["int"]

        # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
        clf = make_pipeline(StandardScaler(), SVC(kernel="rbf", gamma='auto', class_weight="balanced"))
        # clf.fit(X_train, y_train)
        cv_results = cross_validate(clf, X, y, cv=5, scoring="f1_weighted")
        test_score = cv_results["test_score"]
        f1_weight.append([num_cols, np.mean(test_score), np.std(test_score)])
        print(f1_weight[-1])
        b.append(f1_weight[-1][1])
        p.append(np.mean(b[max(0, len(b)-15-1):]))
        pp.append(p[-1]-p[-min(2, len(p))])
        ppp.append(np.mean(pp[max(0, len(pp)-30-1):]))
        pppc = np.mean([int(x<0) for x in ppp[max(0, len(ppp)-5-1):]])
        print(pppc)
        if int(pppc) == 1:
            break

    best_f1 = sorted(f1_weight, key=lambda x: x[1], reverse=True)[0][0]

    fine_f1_weight = []

    for num_cols in range(best_f1-10, best_f1+10, 1):
        X = df_rep.loc[indexs][ranked_cols[:num_cols+1]]
        y = df_train.loc[X.index]["int"]

        # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
        clf = make_pipeline(StandardScaler(), SVC(kernel="rbf", gamma='auto', class_weight="balanced"))
        # clf.fit(X_train, y_train)
        cv_results = cross_validate(clf, X, y, cv=5, scoring="f1_weighted")
        test_score = cv_results["test_score"]
        fine_f1_weight.append([num_cols, np.mean(test_score), np.std(test_score)])
        print(fine_f1_weight[-1])

    fine_best_f1 = sorted(fine_f1_weight, key=lambda x: x[1], reverse=True)[0]
    return fine_best_f1, ranked_cols

In [446]:
sen = sentiments[3]
print(sen)
df_rep = df_representation_v4_1

fine_best_f1, ranked_cols = feature_selection_chi2(sen, df_rep, df_train, n0=61)
fine_best_f1

sadness
[61, 0.5618359080669355, 0.04365959786628209]
0.0
[71, 0.5471965203651374, 0.034257810100853416]
0.5
[81, 0.5732344570230656, 0.043976209985787114]
0.6666666666666666
[91, 0.5392367191450962, 0.06671093798143386]
0.75
[101, 0.5815966709136859, 0.04707140537110987]
0.8
[111, 0.5814169367035197, 0.040202709432892995]
0.6666666666666666
[121, 0.6005862202614274, 0.022529123848951973]
0.6666666666666666
[131, 0.6156320873592984, 0.03356069529131624]
0.5
[141, 0.6182268655432943, 0.03231804903612055]
0.3333333333333333
[151, 0.6104892773703442, 0.026646565402526457]
0.16666666666666666
[161, 0.6097924866435024, 0.020446185203755983]
0.0
[171, 0.6240952756268285, 0.030487922552965067]
0.0
[181, 0.6141501083694252, 0.01734628662035174]
0.0
[191, 0.6225563463645949, 0.016916496026816937]
0.0
[201, 0.6425712243351848, 0.022608768729893834]
0.0
[211, 0.6400780110285347, 0.026599658498892315]
0.0
[221, 0.6517812516652721, 0.03228739502356252]
0.0
[231, 0.6488058355298479, 0.03089229126609

[263, 0.6621621917665539, 0.03577512484762911]

In [None]:
[300, 0.6497708645932712, 0.013279920290572246]

In [447]:
pickle.dump(ranked_cols[:fine_best_f1[0]+1], open(f"cols_selected_{sen}_v4_1.pickle", "wb"))

In [179]:
# Datasets que deberán predecir para la competencia.
target = {
    'anger': pd.read_csv('https://raw.githubusercontent.com/dccuchile/CC6205/master/assignments/assignment_1/data/target/anger-target.txt', sep='\t', names=['id', 'tweet', 'class', 'sentiment_intensity'], na_values=['NONE']),
    'fear': pd.read_csv('https://raw.githubusercontent.com/dccuchile/CC6205/master/assignments/assignment_1/data/target/fear-target.txt', sep='\t', names=['id', 'tweet', 'class', 'sentiment_intensity'], na_values=['NONE']),
    'joy': pd.read_csv('https://raw.githubusercontent.com/dccuchile/CC6205/master/assignments/assignment_1/data/target/joy-target.txt', sep='\t', names=['id', 'tweet', 'class', 'sentiment_intensity'], na_values=['NONE']),
    'sadness': pd.read_csv('https://raw.githubusercontent.com/dccuchile/CC6205/master/assignments/assignment_1/data/target/sadness-target.txt', sep='\t', names=['id', 'tweet', 'class', 'sentiment_intensity'], na_values=['NONE'])
}

In [197]:
df_test = pd.DataFrame()
for sen in sentiments:
    df_test = pd.concat([df_test, target[sen]], axis=0)
df_test = df_test.set_index("id")

In [198]:
pickle.dump(df_test, open("df_test.pickle", "wb"))

In [183]:
import time

In [448]:
data_all_attributes = []
times = []
for k, ix in enumerate(df_test.index):
    start_time = time.time()
    tweet = df_test.loc[ix]["tweet"]
    o = {"id": ix}
    o = {**o, **get_retro_attrib(tweet)}
    o = {**o, **get_punct_attrib(tweet)}
    o = {**o, **get_emojilib_attrib(tweet)}
    o = {**o, **get_linguistics_attrib(tweet)}
    o = {**o, **get_n_grams_attrib(tweet)}
    data_all_attributes.append(o)
    dt = time.time()-start_time
    times.append(dt)
    print(f"""
    row: {k+1}, total_rows: {df_test.shape[0]}
    progress: {np.round(100*(k+1)/df_test.shape[0], 3)}%
    wait time: {datetime.timedelta(seconds = np.mean(times)*(df_test.shape[0]-k-1))}s
    """)


    row: 1, total_rows: 3142
    progress: 0.032%
    wait time: 0:10:06.763667s
    

    row: 2, total_rows: 3142
    progress: 0.064%
    wait time: 0:05:37.856805s
    

    row: 3, total_rows: 3142
    progress: 0.095%
    wait time: 0:04:08.207004s
    

    row: 4, total_rows: 3142
    progress: 0.127%
    wait time: 0:03:24.155161s
    

    row: 5, total_rows: 3142
    progress: 0.159%
    wait time: 0:02:56.459541s
    

    row: 6, total_rows: 3142
    progress: 0.191%
    wait time: 0:02:38.512039s
    

    row: 7, total_rows: 3142
    progress: 0.223%
    wait time: 0:02:21.651624s
    

    row: 8, total_rows: 3142
    progress: 0.255%
    wait time: 0:02:10.571254s
    

    row: 9, total_rows: 3142
    progress: 0.286%
    wait time: 0:02:01.949490s
    

    row: 10, total_rows: 3142
    progress: 0.318%
    wait time: 0:01:53.794846s
    

    row: 11, total_rows: 3142
    progress: 0.35%
    wait time: 0:01:45.980883s
    

    row: 12, total_rows: 3142
    progres


    row: 98, total_rows: 3142
    progress: 3.119%
    wait time: 0:00:49.230457s
    

    row: 99, total_rows: 3142
    progress: 3.151%
    wait time: 0:00:49.240129s
    

    row: 100, total_rows: 3142
    progress: 3.183%
    wait time: 0:00:49.188505s
    

    row: 101, total_rows: 3142
    progress: 3.215%
    wait time: 0:00:49.016985s
    

    row: 102, total_rows: 3142
    progress: 3.246%
    wait time: 0:00:49.206581s
    

    row: 103, total_rows: 3142
    progress: 3.278%
    wait time: 0:00:49.244366s
    

    row: 104, total_rows: 3142
    progress: 3.31%
    wait time: 0:00:49.017950s
    

    row: 105, total_rows: 3142
    progress: 3.342%
    wait time: 0:00:48.882536s
    

    row: 106, total_rows: 3142
    progress: 3.374%
    wait time: 0:00:48.778111s
    

    row: 107, total_rows: 3142
    progress: 3.405%
    wait time: 0:00:48.561857s
    

    row: 108, total_rows: 3142
    progress: 3.437%
    wait time: 0:00:48.321316s
    

    row: 109, total_row


    row: 206, total_rows: 3142
    progress: 6.556%
    wait time: 0:00:42.161455s
    

    row: 207, total_rows: 3142
    progress: 6.588%
    wait time: 0:00:42.113781s
    

    row: 208, total_rows: 3142
    progress: 6.62%
    wait time: 0:00:42.066456s
    

    row: 209, total_rows: 3142
    progress: 6.652%
    wait time: 0:00:42.047561s
    

    row: 210, total_rows: 3142
    progress: 6.684%
    wait time: 0:00:41.944867s
    

    row: 211, total_rows: 3142
    progress: 6.715%
    wait time: 0:00:41.829174s
    

    row: 212, total_rows: 3142
    progress: 6.747%
    wait time: 0:00:41.852816s
    

    row: 213, total_rows: 3142
    progress: 6.779%
    wait time: 0:00:41.807259s
    

    row: 214, total_rows: 3142
    progress: 6.811%
    wait time: 0:00:41.734651s
    

    row: 215, total_rows: 3142
    progress: 6.843%
    wait time: 0:00:41.703489s
    

    row: 216, total_rows: 3142
    progress: 6.875%
    wait time: 0:00:41.740284s
    

    row: 217, total_r


    row: 313, total_rows: 3142
    progress: 9.962%
    wait time: 0:00:37.086400s
    

    row: 314, total_rows: 3142
    progress: 9.994%
    wait time: 0:00:37.081429s
    

    row: 315, total_rows: 3142
    progress: 10.025%
    wait time: 0:00:37.022498s
    

    row: 316, total_rows: 3142
    progress: 10.057%
    wait time: 0:00:36.999703s
    

    row: 317, total_rows: 3142
    progress: 10.089%
    wait time: 0:00:36.976967s
    

    row: 318, total_rows: 3142
    progress: 10.121%
    wait time: 0:00:36.963190s
    

    row: 319, total_rows: 3142
    progress: 10.153%
    wait time: 0:00:36.940562s
    

    row: 320, total_rows: 3142
    progress: 10.185%
    wait time: 0:00:36.891515s
    

    row: 321, total_rows: 3142
    progress: 10.216%
    wait time: 0:00:36.877909s
    

    row: 322, total_rows: 3142
    progress: 10.248%
    wait time: 0:00:36.846770s
    

    row: 323, total_rows: 3142
    progress: 10.28%
    wait time: 0:00:36.807024s
    

    row: 324


    row: 418, total_rows: 3142
    progress: 13.304%
    wait time: 0:00:34.214699s
    

    row: 419, total_rows: 3142
    progress: 13.335%
    wait time: 0:00:34.205071s
    

    row: 420, total_rows: 3142
    progress: 13.367%
    wait time: 0:00:34.150019s
    

    row: 421, total_rows: 3142
    progress: 13.399%
    wait time: 0:00:34.101670s
    

    row: 422, total_rows: 3142
    progress: 13.431%
    wait time: 0:00:34.079326s
    

    row: 423, total_rows: 3142
    progress: 13.463%
    wait time: 0:00:34.057029s
    

    row: 424, total_rows: 3142
    progress: 13.495%
    wait time: 0:00:34.009126s
    

    row: 425, total_rows: 3142
    progress: 13.526%
    wait time: 0:00:33.993412s
    

    row: 426, total_rows: 3142
    progress: 13.558%
    wait time: 0:00:33.971321s
    

    row: 427, total_rows: 3142
    progress: 13.59%
    wait time: 0:00:33.942925s
    

    row: 428, total_rows: 3142
    progress: 13.622%
    wait time: 0:00:33.901929s
    

    row: 4


    row: 511, total_rows: 3142
    progress: 16.264%
    wait time: 0:00:32.031028s
    

    row: 512, total_rows: 3142
    progress: 16.295%
    wait time: 0:00:32.007731s
    

    row: 513, total_rows: 3142
    progress: 16.327%
    wait time: 0:00:31.994751s
    

    row: 514, total_rows: 3142
    progress: 16.359%
    wait time: 0:00:31.976646s
    

    row: 515, total_rows: 3142
    progress: 16.391%
    wait time: 0:00:31.968784s
    

    row: 516, total_rows: 3142
    progress: 16.423%
    wait time: 0:00:31.935433s
    

    row: 517, total_rows: 3142
    progress: 16.454%
    wait time: 0:00:31.912347s
    

    row: 518, total_rows: 3142
    progress: 16.486%
    wait time: 0:00:31.904523s
    

    row: 519, total_rows: 3142
    progress: 16.518%
    wait time: 0:00:31.891613s
    

    row: 520, total_rows: 3142
    progress: 16.55%
    wait time: 0:00:31.868617s
    

    row: 521, total_rows: 3142
    progress: 16.582%
    wait time: 0:00:31.830567s
    

    row: 5


    row: 621, total_rows: 3142
    progress: 19.764%
    wait time: 0:00:30.006959s
    

    row: 622, total_rows: 3142
    progress: 19.796%
    wait time: 0:00:29.979272s
    

    row: 623, total_rows: 3142
    progress: 19.828%
    wait time: 0:00:29.967839s
    

    row: 624, total_rows: 3142
    progress: 19.86%
    wait time: 0:00:29.960440s
    

    row: 625, total_rows: 3142
    progress: 19.892%
    wait time: 0:00:29.948995s
    

    row: 626, total_rows: 3142
    progress: 19.924%
    wait time: 0:00:29.933524s
    

    row: 627, total_rows: 3142
    progress: 19.955%
    wait time: 0:00:29.914053s
    

    row: 628, total_rows: 3142
    progress: 19.987%
    wait time: 0:00:29.902627s
    

    row: 629, total_rows: 3142
    progress: 20.019%
    wait time: 0:00:29.887199s
    

    row: 630, total_rows: 3142
    progress: 20.051%
    wait time: 0:00:29.851829s
    

    row: 631, total_rows: 3142
    progress: 20.083%
    wait time: 0:00:29.824519s
    

    row: 6


    row: 713, total_rows: 3142
    progress: 22.693%
    wait time: 0:00:28.534917s
    

    row: 714, total_rows: 3142
    progress: 22.724%
    wait time: 0:00:28.524065s
    

    row: 715, total_rows: 3142
    progress: 22.756%
    wait time: 0:00:28.523401s
    

    row: 716, total_rows: 3142
    progress: 22.788%
    wait time: 0:00:28.509133s
    

    row: 717, total_rows: 3142
    progress: 22.82%
    wait time: 0:00:28.498257s
    

    row: 718, total_rows: 3142
    progress: 22.852%
    wait time: 0:00:28.487380s
    

    row: 719, total_rows: 3142
    progress: 22.884%
    wait time: 0:00:28.476501s
    

    row: 720, total_rows: 3142
    progress: 22.915%
    wait time: 0:00:28.462253s
    

    row: 721, total_rows: 3142
    progress: 22.947%
    wait time: 0:00:28.437925s
    

    row: 722, total_rows: 3142
    progress: 22.979%
    wait time: 0:00:28.420354s
    

    row: 723, total_rows: 3142
    progress: 23.011%
    wait time: 0:00:28.389412s
    

    row: 7


    row: 810, total_rows: 3142
    progress: 25.78%
    wait time: 0:00:27.448762s
    

    row: 811, total_rows: 3142
    progress: 25.812%
    wait time: 0:00:27.449192s
    

    row: 812, total_rows: 3142
    progress: 25.843%
    wait time: 0:00:27.435221s
    

    row: 813, total_rows: 3142
    progress: 25.875%
    wait time: 0:00:27.429859s
    

    row: 814, total_rows: 3142
    progress: 25.907%
    wait time: 0:00:27.421612s
    

    row: 815, total_rows: 3142
    progress: 25.939%
    wait time: 0:00:27.416209s
    

    row: 816, total_rows: 3142
    progress: 25.971%
    wait time: 0:00:27.402228s
    

    row: 817, total_rows: 3142
    progress: 26.003%
    wait time: 0:00:27.393949s
    

    row: 818, total_rows: 3142
    progress: 26.034%
    wait time: 0:00:27.385662s
    

    row: 819, total_rows: 3142
    progress: 26.066%
    wait time: 0:00:27.377359s
    

    row: 820, total_rows: 3142
    progress: 26.098%
    wait time: 0:00:27.357710s
    

    row: 8


    row: 917, total_rows: 3142
    progress: 29.185%
    wait time: 0:00:25.987031s
    

    row: 918, total_rows: 3142
    progress: 29.217%
    wait time: 0:00:25.978577s
    

    row: 919, total_rows: 3142
    progress: 29.249%
    wait time: 0:00:25.970117s
    

    row: 920, total_rows: 3142
    progress: 29.281%
    wait time: 0:00:25.959228s
    

    row: 921, total_rows: 3142
    progress: 29.313%
    wait time: 0:00:25.943508s
    

    row: 922, total_rows: 3142
    progress: 29.344%
    wait time: 0:00:25.925390s
    

    row: 923, total_rows: 3142
    progress: 29.376%
    wait time: 0:00:25.902483s
    

    row: 924, total_rows: 3142
    progress: 29.408%
    wait time: 0:00:25.891619s
    

    row: 925, total_rows: 3142
    progress: 29.44%
    wait time: 0:00:25.871160s
    

    row: 926, total_rows: 3142
    progress: 29.472%
    wait time: 0:00:25.860307s
    

    row: 927, total_rows: 3142
    progress: 29.504%
    wait time: 0:00:25.842278s
    

    row: 9


    row: 1022, total_rows: 3142
    progress: 32.527%
    wait time: 0:00:24.635603s
    

    row: 1023, total_rows: 3142
    progress: 32.559%
    wait time: 0:00:24.628937s
    

    row: 1024, total_rows: 3142
    progress: 32.591%
    wait time: 0:00:24.618114s
    

    row: 1025, total_rows: 3142
    progress: 32.623%
    wait time: 0:00:24.605225s
    

    row: 1026, total_rows: 3142
    progress: 32.654%
    wait time: 0:00:24.590272s
    

    row: 1027, total_rows: 3142
    progress: 32.686%
    wait time: 0:00:24.577393s
    

    row: 1028, total_rows: 3142
    progress: 32.718%
    wait time: 0:00:24.560400s
    

    row: 1029, total_rows: 3142
    progress: 32.75%
    wait time: 0:00:24.541376s
    

    row: 1030, total_rows: 3142
    progress: 32.782%
    wait time: 0:00:24.538776s
    

    row: 1031, total_rows: 3142
    progress: 32.813%
    wait time: 0:00:24.534109s
    

    row: 1032, total_rows: 3142
    progress: 32.845%
    wait time: 0:00:24.513048s
    



    row: 1113, total_rows: 3142
    progress: 35.423%
    wait time: 0:00:23.425748s
    

    row: 1114, total_rows: 3142
    progress: 35.455%
    wait time: 0:00:23.415051s
    

    row: 1115, total_rows: 3142
    progress: 35.487%
    wait time: 0:00:23.402532s
    

    row: 1116, total_rows: 3142
    progress: 35.519%
    wait time: 0:00:23.386381s
    

    row: 1117, total_rows: 3142
    progress: 35.551%
    wait time: 0:00:23.379315s
    

    row: 1118, total_rows: 3142
    progress: 35.582%
    wait time: 0:00:23.374049s
    

    row: 1119, total_rows: 3142
    progress: 35.614%
    wait time: 0:00:23.368763s
    

    row: 1120, total_rows: 3142
    progress: 35.646%
    wait time: 0:00:23.363464s
    

    row: 1121, total_rows: 3142
    progress: 35.678%
    wait time: 0:00:23.347317s
    

    row: 1122, total_rows: 3142
    progress: 35.71%
    wait time: 0:00:23.336589s
    

    row: 1123, total_rows: 3142
    progress: 35.742%
    wait time: 0:00:23.324060s
    



    row: 1204, total_rows: 3142
    progress: 38.32%
    wait time: 0:00:22.177457s
    

    row: 1205, total_rows: 3142
    progress: 38.351%
    wait time: 0:00:22.166925s
    

    row: 1206, total_rows: 3142
    progress: 38.383%
    wait time: 0:00:22.156391s
    

    row: 1207, total_rows: 3142
    progress: 38.415%
    wait time: 0:00:22.144251s
    

    row: 1208, total_rows: 3142
    progress: 38.447%
    wait time: 0:00:22.128906s
    

    row: 1209, total_rows: 3142
    progress: 38.479%
    wait time: 0:00:22.119974s
    

    row: 1210, total_rows: 3142
    progress: 38.511%
    wait time: 0:00:22.107839s
    

    row: 1211, total_rows: 3142
    progress: 38.542%
    wait time: 0:00:22.089321s
    

    row: 1212, total_rows: 3142
    progress: 38.574%
    wait time: 0:00:22.074010s
    

    row: 1213, total_rows: 3142
    progress: 38.606%
    wait time: 0:00:22.058710s
    

    row: 1214, total_rows: 3142
    progress: 38.638%
    wait time: 0:00:22.041830s
    



    row: 1299, total_rows: 3142
    progress: 41.343%
    wait time: 0:00:20.909774s
    

    row: 1300, total_rows: 3142
    progress: 41.375%
    wait time: 0:00:20.902207s
    

    row: 1301, total_rows: 3142
    progress: 41.407%
    wait time: 0:00:20.887549s
    

    row: 1302, total_rows: 3142
    progress: 41.439%
    wait time: 0:00:20.870071s
    

    row: 1303, total_rows: 3142
    progress: 41.47%
    wait time: 0:00:20.856848s
    

    row: 1304, total_rows: 3142
    progress: 41.502%
    wait time: 0:00:20.843628s
    

    row: 1305, total_rows: 3142
    progress: 41.534%
    wait time: 0:00:20.830414s
    

    row: 1306, total_rows: 3142
    progress: 41.566%
    wait time: 0:00:20.814390s
    

    row: 1307, total_rows: 3142
    progress: 41.598%
    wait time: 0:00:20.796973s
    

    row: 1308, total_rows: 3142
    progress: 41.63%
    wait time: 0:00:20.782379s
    

    row: 1309, total_rows: 3142
    progress: 41.661%
    wait time: 0:00:20.771998s
    




    row: 1394, total_rows: 3142
    progress: 44.367%
    wait time: 0:00:19.730484s
    

    row: 1395, total_rows: 3142
    progress: 44.398%
    wait time: 0:00:19.723865s
    

    row: 1396, total_rows: 3142
    progress: 44.43%
    wait time: 0:00:19.708468s
    

    row: 1397, total_rows: 3142
    progress: 44.462%
    wait time: 0:00:19.696834s
    

    row: 1398, total_rows: 3142
    progress: 44.494%
    wait time: 0:00:19.685200s
    

    row: 1399, total_rows: 3142
    progress: 44.526%
    wait time: 0:00:19.669826s
    

    row: 1400, total_rows: 3142
    progress: 44.558%
    wait time: 0:00:19.659444s
    

    row: 1401, total_rows: 3142
    progress: 44.589%
    wait time: 0:00:19.647816s
    

    row: 1402, total_rows: 3142
    progress: 44.621%
    wait time: 0:00:19.634947s
    

    row: 1403, total_rows: 3142
    progress: 44.653%
    wait time: 0:00:19.623322s
    

    row: 1404, total_rows: 3142
    progress: 44.685%
    wait time: 0:00:19.612936s
    



    row: 1502, total_rows: 3142
    progress: 47.804%
    wait time: 0:00:18.425168s
    

    row: 1503, total_rows: 3142
    progress: 47.836%
    wait time: 0:00:18.411504s
    

    row: 1504, total_rows: 3142
    progress: 47.868%
    wait time: 0:00:18.394577s
    

    row: 1505, total_rows: 3142
    progress: 47.899%
    wait time: 0:00:18.378215s
    

    row: 1506, total_rows: 3142
    progress: 47.931%
    wait time: 0:00:18.366886s
    

    row: 1507, total_rows: 3142
    progress: 47.963%
    wait time: 0:00:18.354339s
    

    row: 1508, total_rows: 3142
    progress: 47.995%
    wait time: 0:00:18.340710s
    

    row: 1509, total_rows: 3142
    progress: 48.027%
    wait time: 0:00:18.329253s
    

    row: 1510, total_rows: 3142
    progress: 48.059%
    wait time: 0:00:18.315633s
    

    row: 1511, total_rows: 3142
    progress: 48.09%
    wait time: 0:00:18.303100s
    

    row: 1512, total_rows: 3142
    progress: 48.122%
    wait time: 0:00:18.290571s
    



    row: 1598, total_rows: 3142
    progress: 50.859%
    wait time: 0:00:17.255776s
    

    row: 1599, total_rows: 3142
    progress: 50.891%
    wait time: 0:00:17.246371s
    

    row: 1600, total_rows: 3142
    progress: 50.923%
    wait time: 0:00:17.233103s
    

    row: 1601, total_rows: 3142
    progress: 50.955%
    wait time: 0:00:17.224658s
    

    row: 1602, total_rows: 3142
    progress: 50.987%
    wait time: 0:00:17.216205s
    

    row: 1603, total_rows: 3142
    progress: 51.018%
    wait time: 0:00:17.205824s
    

    row: 1604, total_rows: 3142
    progress: 51.05%
    wait time: 0:00:17.191602s
    

    row: 1605, total_rows: 3142
    progress: 51.082%
    wait time: 0:00:17.178346s
    

    row: 1606, total_rows: 3142
    progress: 51.114%
    wait time: 0:00:17.164139s
    

    row: 1607, total_rows: 3142
    progress: 51.146%
    wait time: 0:00:17.151850s
    

    row: 1608, total_rows: 3142
    progress: 51.178%
    wait time: 0:00:17.137665s
    



    row: 1694, total_rows: 3142
    progress: 53.915%
    wait time: 0:00:16.078716s
    

    row: 1695, total_rows: 3142
    progress: 53.947%
    wait time: 0:00:16.065823s
    

    row: 1696, total_rows: 3142
    progress: 53.978%
    wait time: 0:00:16.052935s
    

    row: 1697, total_rows: 3142
    progress: 54.01%
    wait time: 0:00:16.040902s
    

    row: 1698, total_rows: 3142
    progress: 54.042%
    wait time: 0:00:16.028021s
    

    row: 1699, total_rows: 3142
    progress: 54.074%
    wait time: 0:00:16.016845s
    

    row: 1700, total_rows: 3142
    progress: 54.106%
    wait time: 0:00:16.004821s
    

    row: 1701, total_rows: 3142
    progress: 54.137%
    wait time: 0:00:15.991951s
    

    row: 1702, total_rows: 3142
    progress: 54.169%
    wait time: 0:00:15.981625s
    

    row: 1703, total_rows: 3142
    progress: 54.201%
    wait time: 0:00:15.971298s
    

    row: 1704, total_rows: 3142
    progress: 54.233%
    wait time: 0:00:15.957589s
    



    row: 1790, total_rows: 3142
    progress: 56.97%
    wait time: 0:00:14.926519s
    

    row: 1791, total_rows: 3142
    progress: 57.002%
    wait time: 0:00:14.912437s
    

    row: 1792, total_rows: 3142
    progress: 57.034%
    wait time: 0:00:14.904394s
    

    row: 1793, total_rows: 3142
    progress: 57.066%
    wait time: 0:00:14.893331s
    

    row: 1794, total_rows: 3142
    progress: 57.097%
    wait time: 0:00:14.882268s
    

    row: 1795, total_rows: 3142
    progress: 57.129%
    wait time: 0:00:14.873458s
    

    row: 1796, total_rows: 3142
    progress: 57.161%
    wait time: 0:00:14.861642s
    

    row: 1797, total_rows: 3142
    progress: 57.193%
    wait time: 0:00:14.848330s
    

    row: 1798, total_rows: 3142
    progress: 57.225%
    wait time: 0:00:14.836520s
    

    row: 1799, total_rows: 3142
    progress: 57.257%
    wait time: 0:00:14.826954s
    

    row: 1800, total_rows: 3142
    progress: 57.288%
    wait time: 0:00:14.816638s
    



    row: 1884, total_rows: 3142
    progress: 59.962%
    wait time: 0:00:13.841379s
    

    row: 1885, total_rows: 3142
    progress: 59.994%
    wait time: 0:00:13.830381s
    

    row: 1886, total_rows: 3142
    progress: 60.025%
    wait time: 0:00:13.819383s
    

    row: 1887, total_rows: 3142
    progress: 60.057%
    wait time: 0:00:13.809051s
    

    row: 1888, total_rows: 3142
    progress: 60.089%
    wait time: 0:00:13.799382s
    

    row: 1889, total_rows: 3142
    progress: 60.121%
    wait time: 0:00:13.789710s
    

    row: 1890, total_rows: 3142
    progress: 60.153%
    wait time: 0:00:13.777382s
    

    row: 1891, total_rows: 3142
    progress: 60.185%
    wait time: 0:00:13.764394s
    

    row: 1892, total_rows: 3142
    progress: 60.216%
    wait time: 0:00:13.751412s
    

    row: 1893, total_rows: 3142
    progress: 60.248%
    wait time: 0:00:13.741738s
    

    row: 1894, total_rows: 3142
    progress: 60.28%
    wait time: 0:00:13.731401s
    



    row: 1991, total_rows: 3142
    progress: 63.367%
    wait time: 0:00:12.641399s
    

    row: 1992, total_rows: 3142
    progress: 63.399%
    wait time: 0:00:12.631009s
    

    row: 1993, total_rows: 3142
    progress: 63.431%
    wait time: 0:00:12.620618s
    

    row: 1994, total_rows: 3142
    progress: 63.463%
    wait time: 0:00:12.609649s
    

    row: 1995, total_rows: 3142
    progress: 63.495%
    wait time: 0:00:12.598680s
    

    row: 1996, total_rows: 3142
    progress: 63.526%
    wait time: 0:00:12.588286s
    

    row: 1997, total_rows: 3142
    progress: 63.558%
    wait time: 0:00:12.577316s
    

    row: 1998, total_rows: 3142
    progress: 63.59%
    wait time: 0:00:12.565200s
    

    row: 1999, total_rows: 3142
    progress: 63.622%
    wait time: 0:00:12.552514s
    

    row: 2000, total_rows: 3142
    progress: 63.654%
    wait time: 0:00:12.540405s
    

    row: 2001, total_rows: 3142
    progress: 63.686%
    wait time: 0:00:12.528299s
    



    row: 2095, total_rows: 3142
    progress: 66.677%
    wait time: 0:00:11.488822s
    

    row: 2096, total_rows: 3142
    progress: 66.709%
    wait time: 0:00:11.476869s
    

    row: 2097, total_rows: 3142
    progress: 66.741%
    wait time: 0:00:11.465916s
    

    row: 2098, total_rows: 3142
    progress: 66.773%
    wait time: 0:00:11.452472s
    

    row: 2099, total_rows: 3142
    progress: 66.805%
    wait time: 0:00:11.440030s
    

    row: 2100, total_rows: 3142
    progress: 66.836%
    wait time: 0:00:11.428586s
    

    row: 2101, total_rows: 3142
    progress: 66.868%
    wait time: 0:00:11.416151s
    

    row: 2102, total_rows: 3142
    progress: 66.9%
    wait time: 0:00:11.405206s
    

    row: 2103, total_rows: 3142
    progress: 66.932%
    wait time: 0:00:11.393766s
    

    row: 2104, total_rows: 3142
    progress: 66.964%
    wait time: 0:00:11.381834s
    

    row: 2105, total_rows: 3142
    progress: 66.996%
    wait time: 0:00:11.370398s
    




    row: 2189, total_rows: 3142
    progress: 69.669%
    wait time: 0:00:10.421287s
    

    row: 2190, total_rows: 3142
    progress: 69.701%
    wait time: 0:00:10.409514s
    

    row: 2191, total_rows: 3142
    progress: 69.733%
    wait time: 0:00:10.397744s
    

    row: 2192, total_rows: 3142
    progress: 69.764%
    wait time: 0:00:10.386410s
    

    row: 2193, total_rows: 3142
    progress: 69.796%
    wait time: 0:00:10.375077s
    

    row: 2194, total_rows: 3142
    progress: 69.828%
    wait time: 0:00:10.363313s
    

    row: 2195, total_rows: 3142
    progress: 69.86%
    wait time: 0:00:10.352847s
    

    row: 2196, total_rows: 3142
    progress: 69.892%
    wait time: 0:00:10.341517s
    

    row: 2197, total_rows: 3142
    progress: 69.924%
    wait time: 0:00:10.330619s
    

    row: 2198, total_rows: 3142
    progress: 69.955%
    wait time: 0:00:10.319290s
    

    row: 2199, total_rows: 3142
    progress: 69.987%
    wait time: 0:00:10.307105s
    



    row: 2281, total_rows: 3142
    progress: 72.597%
    wait time: 0:00:09.392528s
    

    row: 2282, total_rows: 3142
    progress: 72.629%
    wait time: 0:00:09.382412s
    

    row: 2283, total_rows: 3142
    progress: 72.661%
    wait time: 0:00:09.371916s
    

    row: 2284, total_rows: 3142
    progress: 72.693%
    wait time: 0:00:09.361796s
    

    row: 2285, total_rows: 3142
    progress: 72.724%
    wait time: 0:00:09.350546s
    

    row: 2286, total_rows: 3142
    progress: 72.756%
    wait time: 0:00:09.339298s
    

    row: 2287, total_rows: 3142
    progress: 72.788%
    wait time: 0:00:09.329173s
    

    row: 2288, total_rows: 3142
    progress: 72.82%
    wait time: 0:00:09.319046s
    

    row: 2289, total_rows: 3142
    progress: 72.852%
    wait time: 0:00:09.307424s
    

    row: 2290, total_rows: 3142
    progress: 72.884%
    wait time: 0:00:09.296177s
    

    row: 2291, total_rows: 3142
    progress: 72.915%
    wait time: 0:00:09.285675s
    



    row: 2390, total_rows: 3142
    progress: 76.066%
    wait time: 0:00:08.186460s
    

    row: 2391, total_rows: 3142
    progress: 76.098%
    wait time: 0:00:08.175298s
    

    row: 2392, total_rows: 3142
    progress: 76.13%
    wait time: 0:00:08.164765s
    

    row: 2393, total_rows: 3142
    progress: 76.162%
    wait time: 0:00:08.153918s
    

    row: 2394, total_rows: 3142
    progress: 76.194%
    wait time: 0:00:08.143070s
    

    row: 2395, total_rows: 3142
    progress: 76.225%
    wait time: 0:00:08.132534s
    

    row: 2396, total_rows: 3142
    progress: 76.257%
    wait time: 0:00:08.122309s
    

    row: 2397, total_rows: 3142
    progress: 76.289%
    wait time: 0:00:08.111148s
    

    row: 2398, total_rows: 3142
    progress: 76.321%
    wait time: 0:00:08.099056s
    

    row: 2399, total_rows: 3142
    progress: 76.353%
    wait time: 0:00:08.087279s
    

    row: 2400, total_rows: 3142
    progress: 76.384%
    wait time: 0:00:08.075505s
    



    row: 2483, total_rows: 3142
    progress: 79.026%
    wait time: 0:00:07.153718s
    

    row: 2484, total_rows: 3142
    progress: 79.058%
    wait time: 0:00:07.142903s
    

    row: 2485, total_rows: 3142
    progress: 79.09%
    wait time: 0:00:07.132088s
    

    row: 2486, total_rows: 3142
    progress: 79.122%
    wait time: 0:00:07.121802s
    

    row: 2487, total_rows: 3142
    progress: 79.153%
    wait time: 0:00:07.110195s
    

    row: 2488, total_rows: 3142
    progress: 79.185%
    wait time: 0:00:07.099644s
    

    row: 2489, total_rows: 3142
    progress: 79.217%
    wait time: 0:00:07.088303s
    

    row: 2490, total_rows: 3142
    progress: 79.249%
    wait time: 0:00:07.076703s
    

    row: 2491, total_rows: 3142
    progress: 79.281%
    wait time: 0:00:07.065628s
    

    row: 2492, total_rows: 3142
    progress: 79.313%
    wait time: 0:00:07.055076s
    

    row: 2493, total_rows: 3142
    progress: 79.344%
    wait time: 0:00:07.043742s
    



    row: 2575, total_rows: 3142
    progress: 81.954%
    wait time: 0:00:06.143825s
    

    row: 2576, total_rows: 3142
    progress: 81.986%
    wait time: 0:00:06.133247s
    

    row: 2577, total_rows: 3142
    progress: 82.018%
    wait time: 0:00:06.122450s
    

    row: 2578, total_rows: 3142
    progress: 82.05%
    wait time: 0:00:06.111213s
    

    row: 2579, total_rows: 3142
    progress: 82.081%
    wait time: 0:00:06.099979s
    

    row: 2580, total_rows: 3142
    progress: 82.113%
    wait time: 0:00:06.088310s
    

    row: 2581, total_rows: 3142
    progress: 82.145%
    wait time: 0:00:06.077515s
    

    row: 2582, total_rows: 3142
    progress: 82.177%
    wait time: 0:00:06.066937s
    

    row: 2583, total_rows: 3142
    progress: 82.209%
    wait time: 0:00:06.056358s
    

    row: 2584, total_rows: 3142
    progress: 82.241%
    wait time: 0:00:06.045778s
    

    row: 2585, total_rows: 3142
    progress: 82.272%
    wait time: 0:00:06.034981s
    



    row: 2666, total_rows: 3142
    progress: 84.85%
    wait time: 0:00:05.153017s
    

    row: 2667, total_rows: 3142
    progress: 84.882%
    wait time: 0:00:05.142402s
    

    row: 2668, total_rows: 3142
    progress: 84.914%
    wait time: 0:00:05.131787s
    

    row: 2669, total_rows: 3142
    progress: 84.946%
    wait time: 0:00:05.120638s
    

    row: 2670, total_rows: 3142
    progress: 84.978%
    wait time: 0:00:05.110198s
    

    row: 2671, total_rows: 3142
    progress: 85.01%
    wait time: 0:00:05.099933s
    

    row: 2672, total_rows: 3142
    progress: 85.041%
    wait time: 0:00:05.088961s
    

    row: 2673, total_rows: 3142
    progress: 85.073%
    wait time: 0:00:05.077815s
    

    row: 2674, total_rows: 3142
    progress: 85.105%
    wait time: 0:00:05.067195s
    

    row: 2675, total_rows: 3142
    progress: 85.137%
    wait time: 0:00:05.056225s
    

    row: 2676, total_rows: 3142
    progress: 85.169%
    wait time: 0:00:05.045256s
    




    row: 2774, total_rows: 3142
    progress: 88.288%
    wait time: 0:00:04.000295s
    

    row: 2775, total_rows: 3142
    progress: 88.32%
    wait time: 0:00:03.989443s
    

    row: 2776, total_rows: 3142
    progress: 88.351%
    wait time: 0:00:03.978195s
    

    row: 2777, total_rows: 3142
    progress: 88.383%
    wait time: 0:00:03.967475s
    

    row: 2778, total_rows: 3142
    progress: 88.415%
    wait time: 0:00:03.956886s
    

    row: 2779, total_rows: 3142
    progress: 88.447%
    wait time: 0:00:03.945772s
    

    row: 2780, total_rows: 3142
    progress: 88.479%
    wait time: 0:00:03.935051s
    

    row: 2781, total_rows: 3142
    progress: 88.511%
    wait time: 0:00:03.924199s
    

    row: 2782, total_rows: 3142
    progress: 88.542%
    wait time: 0:00:03.913735s
    

    row: 2783, total_rows: 3142
    progress: 88.574%
    wait time: 0:00:03.903656s
    

    row: 2784, total_rows: 3142
    progress: 88.606%
    wait time: 0:00:03.893315s
    



    row: 2869, total_rows: 3142
    progress: 91.311%
    wait time: 0:00:02.978491s
    

    row: 2870, total_rows: 3142
    progress: 91.343%
    wait time: 0:00:02.967496s
    

    row: 2871, total_rows: 3142
    progress: 91.375%
    wait time: 0:00:02.956312s
    

    row: 2872, total_rows: 3142
    progress: 91.407%
    wait time: 0:00:02.945554s
    

    row: 2873, total_rows: 3142
    progress: 91.439%
    wait time: 0:00:02.934654s
    

    row: 2874, total_rows: 3142
    progress: 91.47%
    wait time: 0:00:02.923567s
    

    row: 2875, total_rows: 3142
    progress: 91.502%
    wait time: 0:00:02.912389s
    

    row: 2876, total_rows: 3142
    progress: 91.534%
    wait time: 0:00:02.901490s
    

    row: 2877, total_rows: 3142
    progress: 91.566%
    wait time: 0:00:02.890407s
    

    row: 2878, total_rows: 3142
    progress: 91.598%
    wait time: 0:00:02.879326s
    

    row: 2879, total_rows: 3142
    progress: 91.63%
    wait time: 0:00:02.868612s
    




    row: 2962, total_rows: 3142
    progress: 94.271%
    wait time: 0:00:01.961485s
    

    row: 2963, total_rows: 3142
    progress: 94.303%
    wait time: 0:00:01.950595s
    

    row: 2964, total_rows: 3142
    progress: 94.335%
    wait time: 0:00:01.939584s
    

    row: 2965, total_rows: 3142
    progress: 94.367%
    wait time: 0:00:01.928695s
    

    row: 2966, total_rows: 3142
    progress: 94.398%
    wait time: 0:00:01.917864s
    

    row: 2967, total_rows: 3142
    progress: 94.43%
    wait time: 0:00:01.906974s
    

    row: 2968, total_rows: 3142
    progress: 94.462%
    wait time: 0:00:01.895907s
    

    row: 2969, total_rows: 3142
    progress: 94.494%
    wait time: 0:00:01.885251s
    

    row: 2970, total_rows: 3142
    progress: 94.526%
    wait time: 0:00:01.874302s
    

    row: 2971, total_rows: 3142
    progress: 94.558%
    wait time: 0:00:01.863181s
    

    row: 2972, total_rows: 3142
    progress: 94.589%
    wait time: 0:00:01.852178s
    



    row: 3053, total_rows: 3142
    progress: 97.167%
    wait time: 0:00:00.967825s
    

    row: 3054, total_rows: 3142
    progress: 97.199%
    wait time: 0:00:00.956897s
    

    row: 3055, total_rows: 3142
    progress: 97.231%
    wait time: 0:00:00.946056s
    

    row: 3056, total_rows: 3142
    progress: 97.263%
    wait time: 0:00:00.935241s
    

    row: 3057, total_rows: 3142
    progress: 97.295%
    wait time: 0:00:00.924287s
    

    row: 3058, total_rows: 3142
    progress: 97.327%
    wait time: 0:00:00.913389s
    

    row: 3059, total_rows: 3142
    progress: 97.358%
    wait time: 0:00:00.902519s
    

    row: 3060, total_rows: 3142
    progress: 97.39%
    wait time: 0:00:00.891569s
    

    row: 3061, total_rows: 3142
    progress: 97.422%
    wait time: 0:00:00.880646s
    

    row: 3062, total_rows: 3142
    progress: 97.454%
    wait time: 0:00:00.869804s
    

    row: 3063, total_rows: 3142
    progress: 97.486%
    wait time: 0:00:00.858858s
    


In [449]:
df_representation_test_v1_1 = pd.DataFrame(data_all_attributes).set_index("id").fillna(0)
df_representation_test_v1_1

Unnamed: 0_level_0,retro<&>num_tokens,retro<&>lenght,retro<&>num_numbs,retro<&>num_alpha,retro<&>num_with_uppercase,retro<&>num_tokens_upper,retro<&>prop_vowels,retro<&>len_max_rep_char,retro<&>max_char_fre_per_token(o),retro<&>max_char_fre_per_token(s),...,linguistics<&>lemma<&>@sainsburys,linguistics<&>lemma<&>chafford,"linguistics<&>bigram<&>('car', 'park')","linguistics<&>bigram<&>('park', 'light')","linguistics<&>bigram<&>('light', '?')","linguistics<&>bigram<&>('bit', 'scary')","linguistics<&>trigram<&>('car', 'park', 'light')","linguistics<&>trigram<&>('park', 'light', '?')","linguistics<&>trigram<&>('night', '!', 'thank')","linguistics<&>trigram<&>('!', 'thank', '.')"
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10941,21,109,0,21,1,0,0.370787,2,2,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10942,8,60,2,10,6,5,0.150943,2,3,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10943,23,115,0,23,4,0,0.344086,2,2,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10944,19,98,0,20,4,0,0.375000,2,1,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10945,21,131,3,22,4,1,0.288288,2,3,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41528,7,41,0,6,3,1,0.257143,2,1,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
41529,22,132,0,23,3,2,0.324324,2,1,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
41530,25,139,1,25,1,0,0.382609,2,1,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
41531,15,80,0,17,2,0,0.348485,2,1,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [450]:
pickle.dump(df_representation_test_v1_1, open("df_representation_test_v1_1.pickle", "wb"))

In [209]:
data_bertweet = []
col_names = [f"BERTweet_{i}" for i in range(768)]
for k, ix in enumerate(df_test.index):
    print(k+1, df_test.shape[0], 100*(k+1)/df_test.shape[0])
    tweet = df_test.loc[ix]["tweet"]
    input_ids = torch.tensor([tokenizer.encode(tweet)])
    with torch.no_grad():
        outputs = bertweet(input_ids)
        hidden_states = outputs[0]

    token_embeddings = np.array([ll.numpy() for ll in hidden_states[0]])
    sentence_embedding = np.mean(token_embeddings, axis=0)
    o = {"id": ix}
    o = {**o, **dict(zip(col_names, sentence_embedding))}
    data_bertweet.append(o)

1 3142 0.031826861871419476
2 3142 0.06365372374283895
3 3142 0.09548058561425843
4 3142 0.1273074474856779
5 3142 0.15913430935709738
6 3142 0.19096117122851686
7 3142 0.22278803309993633
8 3142 0.2546148949713558
9 3142 0.2864417568427753
10 3142 0.31826861871419476
11 3142 0.35009548058561424
12 3142 0.3819223424570337
13 3142 0.4137492043284532
14 3142 0.44557606619987267
15 3142 0.47740292807129214
16 3142 0.5092297899427116
17 3142 0.5410566518141311
18 3142 0.5728835136855506
19 3142 0.60471037555697
20 3142 0.6365372374283895
21 3142 0.668364099299809
22 3142 0.7001909611712285
23 3142 0.732017823042648
24 3142 0.7638446849140674
25 3142 0.7956715467854869
26 3142 0.8274984086569064
27 3142 0.8593252705283259
28 3142 0.8911521323997453
29 3142 0.9229789942711648
30 3142 0.9548058561425843
31 3142 0.9866327180140039
32 3142 1.0184595798854232
33 3142 1.0502864417568427
34 3142 1.0821133036282622
35 3142 1.1139401654996817
36 3142 1.1457670273711011
37 3142 1.1775938892425206
38 

306 3142 9.739019732654361
307 3142 9.77084659452578
308 3142 9.8026734563972
309 3142 9.834500318268619
310 3142 9.866327180140038
311 3142 9.898154042011457
312 3142 9.929980903882877
313 3142 9.961807765754296
314 3142 9.993634627625717
315 3142 10.025461489497136
316 3142 10.057288351368555
317 3142 10.089115213239975
318 3142 10.120942075111394
319 3142 10.152768936982813
320 3142 10.184595798854232
321 3142 10.216422660725652
322 3142 10.248249522597073
323 3142 10.280076384468492
324 3142 10.311903246339911
325 3142 10.34373010821133
326 3142 10.37555697008275
327 3142 10.407383831954169
328 3142 10.439210693825588
329 3142 10.471037555697007
330 3142 10.502864417568428
331 3142 10.534691279439848
332 3142 10.566518141311267
333 3142 10.598345003182686
334 3142 10.630171865054105
335 3142 10.661998726925525
336 3142 10.693825588796944
337 3142 10.725652450668363
338 3142 10.757479312539784
339 3142 10.789306174411204
340 3142 10.821133036282623
341 3142 10.852959898154042
342 31

603 3142 19.191597708465945
604 3142 19.223424570337365
605 3142 19.255251432208784
606 3142 19.287078294080203
607 3142 19.318905155951622
608 3142 19.35073201782304
609 3142 19.38255887969446
610 3142 19.41438574156588
611 3142 19.446212603437303
612 3142 19.478039465308722
613 3142 19.50986632718014
614 3142 19.54169318905156
615 3142 19.57352005092298
616 3142 19.6053469127944
617 3142 19.63717377466582
618 3142 19.669000636537238
619 3142 19.700827498408657
620 3142 19.732654360280076
621 3142 19.764481222151495
622 3142 19.796308084022915
623 3142 19.828134945894334
624 3142 19.859961807765753
625 3142 19.891788669637172
626 3142 19.92361553150859
627 3142 19.955442393380014
628 3142 19.987269255251434
629 3142 20.019096117122853
630 3142 20.050922978994272
631 3142 20.08274984086569
632 3142 20.11457670273711
633 3142 20.14640356460853
634 3142 20.17823042647995
635 3142 20.21005728835137
636 3142 20.241884150222788
637 3142 20.273711012094207
638 3142 20.305537873965626
639 314

902 3142 28.70782940802037
903 3142 28.73965626989179
904 3142 28.77148313176321
905 3142 28.803309993634628
906 3142 28.835136855506047
907 3142 28.866963717377466
908 3142 28.898790579248885
909 3142 28.930617441120305
910 3142 28.962444302991724
911 3142 28.994271164863143
912 3142 29.026098026734562
913 3142 29.057924888605985
914 3142 29.089751750477404
915 3142 29.121578612348824
916 3142 29.153405474220243
917 3142 29.185232336091662
918 3142 29.21705919796308
919 3142 29.2488860598345
920 3142 29.28071292170592
921 3142 29.31253978357734
922 3142 29.34436664544876
923 3142 29.376193507320178
924 3142 29.408020369191597
925 3142 29.439847231063016
926 3142 29.471674092934435
927 3142 29.503500954805855
928 3142 29.535327816677274
929 3142 29.567154678548697
930 3142 29.598981540420116
931 3142 29.630808402291535
932 3142 29.662635264162954
933 3142 29.694462126034374
934 3142 29.726288987905793
935 3142 29.758115849777212
936 3142 29.78994271164863
937 3142 29.82176957352005
938

1198 3142 38.12858052196054
1199 3142 38.16040738383195
1200 3142 38.192234245703375
1201 3142 38.22406110757479
1202 3142 38.25588796944621
1203 3142 38.28771483131763
1204 3142 38.31954169318905
1205 3142 38.35136855506047
1206 3142 38.38319541693189
1207 3142 38.41502227880331
1208 3142 38.44684914067473
1209 3142 38.47867600254615
1210 3142 38.51050286441757
1211 3142 38.54232972628899
1212 3142 38.574156588160406
1213 3142 38.60598345003183
1214 3142 38.637810311903245
1215 3142 38.66963717377467
1216 3142 38.70146403564608
1217 3142 38.733290897517506
1218 3142 38.76511775938892
1219 3142 38.796944621260344
1220 3142 38.82877148313176
1221 3142 38.86059834500318
1222 3142 38.892425206874606
1223 3142 38.92425206874602
1224 3142 38.956078930617444
1225 3142 38.98790579248886
1226 3142 39.01973265436028
1227 3142 39.0515595162317
1228 3142 39.08338637810312
1229 3142 39.11521323997454
1230 3142 39.14704010184596
1231 3142 39.178866963717375
1232 3142 39.2106938255888
1233 3142 39.2

1492 3142 47.48567791215786
1493 3142 47.517504774029284
1494 3142 47.5493316359007
1495 3142 47.58115849777212
1496 3142 47.61298535964354
1497 3142 47.64481222151496
1498 3142 47.67663908338638
1499 3142 47.7084659452578
1500 3142 47.740292807129215
1501 3142 47.77211966900064
1502 3142 47.803946530872054
1503 3142 47.83577339274348
1504 3142 47.86760025461489
1505 3142 47.899427116486315
1506 3142 47.93125397835773
1507 3142 47.96308084022915
1508 3142 47.994907702100576
1509 3142 48.02673456397199
1510 3142 48.058561425843415
1511 3142 48.09038828771483
1512 3142 48.12221514958625
1513 3142 48.15404201145767
1514 3142 48.18586887332909
1515 3142 48.21769573520051
1516 3142 48.24952259707193
1517 3142 48.281349458943346
1518 3142 48.31317632081477
1519 3142 48.345003182686185
1520 3142 48.37683004455761
1521 3142 48.40865690642902
1522 3142 48.440483768300446
1523 3142 48.47231063017186
1524 3142 48.504137492043284
1525 3142 48.53596435391471
1526 3142 48.56779121578612
1527 3142 48

1785 3142 56.81094844048377
1786 3142 56.842775302355186
1787 3142 56.87460216422661
1788 3142 56.906429026098024
1789 3142 56.93825588796945
1790 3142 56.97008274984086
1791 3142 57.001909611712286
1792 3142 57.0337364735837
1793 3142 57.065563335455124
1794 3142 57.09739019732655
1795 3142 57.12921705919796
1796 3142 57.161043921069385
1797 3142 57.1928707829408
1798 3142 57.224697644812224
1799 3142 57.25652450668364
1800 3142 57.28835136855506
1801 3142 57.32017823042648
1802 3142 57.3520050922979
1803 3142 57.38383195416932
1804 3142 57.41565881604074
1805 3142 57.447485677912155
1806 3142 57.47931253978358
1807 3142 57.511139401654994
1808 3142 57.54296626352642
1809 3142 57.57479312539783
1810 3142 57.606619987269255
1811 3142 57.63844684914068
1812 3142 57.67027371101209
1813 3142 57.702100572883516
1814 3142 57.73392743475493
1815 3142 57.765754296626355
1816 3142 57.79758115849777
1817 3142 57.82940802036919
1818 3142 57.86123488224061
1819 3142 57.89306174411203
1820 3142 57

2078 3142 66.13621896880967
2079 3142 66.1680458306811
2080 3142 66.19987269255252
2081 3142 66.23169955442394
2082 3142 66.26352641629535
2083 3142 66.29535327816677
2084 3142 66.3271801400382
2085 3142 66.35900700190962
2086 3142 66.39083386378103
2087 3142 66.42266072565245
2088 3142 66.45448758752387
2089 3142 66.4863144493953
2090 3142 66.5181413112667
2091 3142 66.54996817313813
2092 3142 66.58179503500955
2093 3142 66.61362189688097
2094 3142 66.64544875875238
2095 3142 66.6772756206238
2096 3142 66.70910248249523
2097 3142 66.74092934436665
2098 3142 66.77275620623807
2099 3142 66.80458306810948
2100 3142 66.8364099299809
2101 3142 66.86823679185233
2102 3142 66.90006365372375
2103 3142 66.93189051559516
2104 3142 66.96371737746658
2105 3142 66.995544239338
2106 3142 67.02737110120943
2107 3142 67.05919796308083
2108 3142 67.09102482495226
2109 3142 67.12285168682368
2110 3142 67.1546785486951
2111 3142 67.18650541056653
2112 3142 67.21833227243793
2113 3142 67.25015913430936
2

2375 3142 75.58879694462127
2376 3142 75.62062380649267
2377 3142 75.6524506683641
2378 3142 75.68427753023552
2379 3142 75.71610439210694
2380 3142 75.74793125397835
2381 3142 75.77975811584977
2382 3142 75.8115849777212
2383 3142 75.84341183959262
2384 3142 75.87523870146404
2385 3142 75.90706556333545
2386 3142 75.93889242520687
2387 3142 75.9707192870783
2388 3142 76.00254614894972
2389 3142 76.03437301082113
2390 3142 76.06619987269255
2391 3142 76.09802673456397
2392 3142 76.1298535964354
2393 3142 76.1616804583068
2394 3142 76.19350732017823
2395 3142 76.22533418204965
2396 3142 76.25716104392107
2397 3142 76.2889879057925
2398 3142 76.3208147676639
2399 3142 76.35264162953533
2400 3142 76.38446849140675
2401 3142 76.41629535327817
2402 3142 76.44812221514958
2403 3142 76.479949077021
2404 3142 76.51177593889243
2405 3142 76.54360280076385
2406 3142 76.57542966263526
2407 3142 76.60725652450668
2408 3142 76.6390833863781
2409 3142 76.67091024824953
2410 3142 76.70273711012094
24

2673 3142 85.07320178230427
2674 3142 85.10502864417569
2675 3142 85.1368555060471
2676 3142 85.16868236791852
2677 3142 85.20050922978994
2678 3142 85.23233609166137
2679 3142 85.26416295353278
2680 3142 85.2959898154042
2681 3142 85.32781667727562
2682 3142 85.35964353914704
2683 3142 85.39147040101847
2684 3142 85.42329726288988
2685 3142 85.4551241247613
2686 3142 85.48695098663272
2687 3142 85.51877784850414
2688 3142 85.55060471037555
2689 3142 85.58243157224697
2690 3142 85.6142584341184
2691 3142 85.64608529598982
2692 3142 85.67791215786123
2693 3142 85.70973901973265
2694 3142 85.74156588160407
2695 3142 85.7733927434755
2696 3142 85.8052196053469
2697 3142 85.83704646721833
2698 3142 85.86887332908975
2699 3142 85.90070019096117
2700 3142 85.9325270528326
2701 3142 85.964353914704
2702 3142 85.99618077657543
2703 3142 86.02800763844685
2704 3142 86.05983450031827
2705 3142 86.09166136218968
2706 3142 86.1234882240611
2707 3142 86.15531508593253
2708 3142 86.18714194780395
27

2968 3142 94.46212603437301
2969 3142 94.49395289624444
2970 3142 94.52577975811585
2971 3142 94.55760661998727
2972 3142 94.58943348185869
2973 3142 94.62126034373011
2974 3142 94.65308720560152
2975 3142 94.68491406747295
2976 3142 94.71674092934437
2977 3142 94.74856779121579
2978 3142 94.7803946530872
2979 3142 94.81222151495862
2980 3142 94.84404837683005
2981 3142 94.87587523870147
2982 3142 94.90770210057288
2983 3142 94.9395289624443
2984 3142 94.97135582431572
2985 3142 95.00318268618715
2986 3142 95.03500954805857
2987 3142 95.06683640992998
2988 3142 95.0986632718014
2989 3142 95.13049013367282
2990 3142 95.16231699554424
2991 3142 95.19414385741565
2992 3142 95.22597071928708
2993 3142 95.2577975811585
2994 3142 95.28962444302992
2995 3142 95.32145130490133
2996 3142 95.35327816677275
2997 3142 95.38510502864418
2998 3142 95.4169318905156
2999 3142 95.44875875238701
3000 3142 95.48058561425843
3001 3142 95.51241247612985
3002 3142 95.54423933800128
3003 3142 95.576066199872

In [214]:
pickle.dump(pd.DataFrame(data_bertweet).set_index("id"), open("df_representation_test_v2.pickle", "wb"))
df_representation_test_v2 = pickle.load(open("df_representation_test_v2.pickle", "rb"))

In [451]:
df_representation_test_v3_1 = df_representation_test_v1_1
df_representation_test_v3_1 = pd.concat([df_representation_test_v3_1, df_representation_test_v2], axis=1)
df_representation_test_v3_1

Unnamed: 0_level_0,retro<&>num_tokens,retro<&>lenght,retro<&>num_numbs,retro<&>num_alpha,retro<&>num_with_uppercase,retro<&>num_tokens_upper,retro<&>prop_vowels,retro<&>len_max_rep_char,retro<&>max_char_fre_per_token(o),retro<&>max_char_fre_per_token(s),...,BERTweet_758,BERTweet_759,BERTweet_760,BERTweet_761,BERTweet_762,BERTweet_763,BERTweet_764,BERTweet_765,BERTweet_766,BERTweet_767
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10941,21,109,0,21,1,0,0.370787,2,2,2,...,0.123334,0.062934,0.063844,-0.114385,-0.223249,-0.082935,0.096100,0.134355,-0.089524,0.052725
10942,8,60,2,10,6,5,0.150943,2,3,2,...,0.114743,-0.102170,0.210606,0.015215,-0.190899,-0.090932,-0.051202,0.083870,0.000473,0.243169
10943,23,115,0,23,4,0,0.344086,2,2,2,...,-0.032676,0.090184,0.130342,-0.079150,-0.150815,0.017163,0.346057,-0.046520,-0.133298,-0.233223
10944,19,98,0,20,4,0,0.375000,2,1,2,...,-0.017395,-0.029215,-0.080877,0.040828,0.020043,-0.034798,0.240827,0.083698,0.198686,0.055953
10945,21,131,3,22,4,1,0.288288,2,3,2,...,0.052519,-0.201800,-0.094245,0.137064,-0.070205,-0.114456,0.057291,0.062598,0.147984,-0.147572
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41528,7,41,0,6,3,1,0.257143,2,1,1,...,-0.070892,-0.112746,0.078887,0.042544,0.017418,-0.242335,0.189593,0.099153,0.250478,0.002542
41529,22,132,0,23,3,2,0.324324,2,1,1,...,0.145892,-0.139355,0.015763,0.104158,0.099488,0.047747,0.095338,-0.118079,0.227708,-0.134026
41530,25,139,1,25,1,0,0.382609,2,1,1,...,0.093361,-0.109361,0.008455,0.067768,-0.013469,-0.081498,-0.004565,0.038196,-0.042650,-0.140450
41531,15,80,0,17,2,0,0.348485,2,1,2,...,0.108233,-0.009540,-0.049508,0.043545,-0.108673,-0.095933,0.196386,-0.061105,0.095339,0.049243


In [452]:
pickle.dump(df_representation_test_v3_1, open("df_representation_test_v3_1.pickle", "wb"))

In [223]:
data_lexicon_attributes = []
times = []
for k, ix in enumerate(df_test.index):
    start_time = time.time()
    tweet = df_test.loc[ix]["tweet"]
    o = {"id": ix}
    o = {**o, **get_lexicon_attrib(tweet)}
    data_lexicon_attributes.append(o)
    dt = time.time()-start_time
    times.append(dt)
    print(f"""
    row: {k+1}, total_rows: {df_test.shape[0]}
    progress: {np.round(100*(k+1)/df_test.shape[0], 3)}%
    wait time: {datetime.timedelta(seconds = np.median(times)*(df_test.shape[0]-k-1))}s
    """)


    row: 1, total_rows: 3142
    progress: 0.032%
    wait time: 0:13:16.259178s
    

    row: 2, total_rows: 3142
    progress: 0.064%
    wait time: 0:09:13.576164s
    

    row: 3, total_rows: 3142
    progress: 0.095%
    wait time: 0:13:15.752168s
    

    row: 4, total_rows: 3142
    progress: 0.127%
    wait time: 0:12:43.573287s
    

    row: 5, total_rows: 3142
    progress: 0.159%
    wait time: 0:13:11.243043s
    

    row: 6, total_rows: 3142
    progress: 0.191%
    wait time: 0:13:12.991234s
    

    row: 7, total_rows: 3142
    progress: 0.223%
    wait time: 0:13:10.738585s
    

    row: 8, total_rows: 3142
    progress: 0.255%
    wait time: 0:12:46.302730s
    

    row: 9, total_rows: 3142
    progress: 0.286%
    wait time: 0:12:21.882309s
    

    row: 10, total_rows: 3142
    progress: 0.318%
    wait time: 0:12:15.947239s
    

    row: 11, total_rows: 3142
    progress: 0.35%
    wait time: 0:12:10.015808s
    

    row: 12, total_rows: 3142
    progres


    row: 95, total_rows: 3142
    progress: 3.024%
    wait time: 0:10:06.562587s
    

    row: 96, total_rows: 3142
    progress: 3.055%
    wait time: 0:10:12.631912s
    

    row: 97, total_rows: 3142
    progress: 3.087%
    wait time: 0:10:18.697121s
    

    row: 98, total_rows: 3142
    progress: 3.119%
    wait time: 0:10:20.019092s
    

    row: 99, total_rows: 3142
    progress: 3.151%
    wait time: 0:10:18.290752s
    

    row: 100, total_rows: 3142
    progress: 3.183%
    wait time: 0:10:11.827405s
    

    row: 101, total_rows: 3142
    progress: 3.215%
    wait time: 0:10:05.368174s
    

    row: 102, total_rows: 3142
    progress: 3.246%
    wait time: 0:10:11.425152s
    

    row: 103, total_rows: 3142
    progress: 3.278%
    wait time: 0:10:17.478014s
    

    row: 104, total_rows: 3142
    progress: 3.31%
    wait time: 0:10:11.022898s
    

    row: 105, total_rows: 3142
    progress: 3.342%
    wait time: 0:10:04.571899s
    

    row: 106, total_rows: 


    row: 188, total_rows: 3142
    progress: 5.983%
    wait time: 0:10:50.469574s
    

    row: 189, total_rows: 3142
    progress: 6.015%
    wait time: 0:10:47.292364s
    

    row: 190, total_rows: 3142
    progress: 6.047%
    wait time: 0:10:45.597625s
    

    row: 191, total_rows: 3142
    progress: 6.079%
    wait time: 0:10:43.903885s
    

    row: 192, total_rows: 3142
    progress: 6.111%
    wait time: 0:10:36.305946s
    

    row: 193, total_rows: 3142
    progress: 6.143%
    wait time: 0:10:28.713010s
    

    row: 194, total_rows: 3142
    progress: 6.174%
    wait time: 0:10:25.382288s
    

    row: 195, total_rows: 3142
    progress: 6.206%
    wait time: 0:10:28.286620s
    

    row: 196, total_rows: 3142
    progress: 6.238%
    wait time: 0:10:35.443158s
    

    row: 197, total_rows: 3142
    progress: 6.27%
    wait time: 0:10:27.860229s
    

    row: 198, total_rows: 3142
    progress: 6.302%
    wait time: 0:10:27.644226s
    

    row: 199, total_r


    row: 281, total_rows: 3142
    progress: 8.943%
    wait time: 0:09:38.447633s
    

    row: 282, total_rows: 3142
    progress: 8.975%
    wait time: 0:09:39.676709s
    

    row: 283, total_rows: 3142
    progress: 9.007%
    wait time: 0:09:38.043265s
    

    row: 284, total_rows: 3142
    progress: 9.039%
    wait time: 0:09:39.271341s
    

    row: 285, total_rows: 3142
    progress: 9.071%
    wait time: 0:09:37.638898s
    

    row: 286, total_rows: 3142
    progress: 9.102%
    wait time: 0:09:32.988582s
    

    row: 287, total_rows: 3142
    progress: 9.134%
    wait time: 0:09:37.234530s
    

    row: 288, total_rows: 3142
    progress: 9.166%
    wait time: 0:09:32.587329s
    

    row: 289, total_rows: 3142
    progress: 9.198%
    wait time: 0:09:36.830163s
    

    row: 290, total_rows: 3142
    progress: 9.23%
    wait time: 0:09:32.186077s
    

    row: 291, total_rows: 3142
    progress: 9.262%
    wait time: 0:09:36.425796s
    

    row: 292, total_r


    row: 373, total_rows: 3142
    progress: 11.871%
    wait time: 0:09:33.711192s
    

    row: 374, total_rows: 3142
    progress: 11.903%
    wait time: 0:09:33.507301s
    

    row: 375, total_rows: 3142
    progress: 11.935%
    wait time: 0:09:33.303408s
    

    row: 376, total_rows: 3142
    progress: 11.967%
    wait time: 0:09:33.092917s
    

    row: 377, total_rows: 3142
    progress: 11.999%
    wait time: 0:09:32.882429s
    

    row: 378, total_rows: 3142
    progress: 12.031%
    wait time: 0:09:32.678534s
    

    row: 379, total_rows: 3142
    progress: 12.062%
    wait time: 0:09:32.474635s
    

    row: 380, total_rows: 3142
    progress: 12.094%
    wait time: 0:09:33.641428s
    

    row: 381, total_rows: 3142
    progress: 12.126%
    wait time: 0:09:34.807225s
    

    row: 382, total_rows: 3142
    progress: 12.158%
    wait time: 0:09:35.979595s
    

    row: 383, total_rows: 3142
    progress: 12.19%
    wait time: 0:09:34.390849s
    

    row: 3


    row: 465, total_rows: 3142
    progress: 14.799%
    wait time: 0:09:11.963902s
    

    row: 466, total_rows: 3142
    progress: 14.831%
    wait time: 0:09:13.100083s
    

    row: 467, total_rows: 3142
    progress: 14.863%
    wait time: 0:09:11.551527s
    

    row: 468, total_rows: 3142
    progress: 14.895%
    wait time: 0:09:11.345021s
    

    row: 469, total_rows: 3142
    progress: 14.927%
    wait time: 0:09:11.139152s
    

    row: 470, total_rows: 3142
    progress: 14.959%
    wait time: 0:09:12.273327s
    

    row: 471, total_rows: 3142
    progress: 14.99%
    wait time: 0:09:10.726777s
    

    row: 472, total_rows: 3142
    progress: 15.022%
    wait time: 0:09:10.520271s
    

    row: 473, total_rows: 3142
    progress: 15.054%
    wait time: 0:09:10.314402s
    

    row: 474, total_rows: 3142
    progress: 15.086%
    wait time: 0:09:10.107896s
    

    row: 475, total_rows: 3142
    progress: 15.118%
    wait time: 0:09:09.901391s
    

    row: 4


    row: 557, total_rows: 3142
    progress: 17.728%
    wait time: 0:08:50.405526s
    

    row: 558, total_rows: 3142
    progress: 17.759%
    wait time: 0:08:50.037389s
    

    row: 559, total_rows: 3142
    progress: 17.791%
    wait time: 0:08:49.669378s
    

    row: 560, total_rows: 3142
    progress: 17.823%
    wait time: 0:08:48.337160s
    

    row: 561, total_rows: 3142
    progress: 17.855%
    wait time: 0:08:47.005816s
    

    row: 562, total_rows: 3142
    progress: 17.887%
    wait time: 0:08:46.801014s
    

    row: 563, total_rows: 3142
    progress: 17.919%
    wait time: 0:08:46.597442s
    

    row: 564, total_rows: 3142
    progress: 17.95%
    wait time: 0:08:46.392641s
    

    row: 565, total_rows: 3142
    progress: 17.982%
    wait time: 0:08:46.187840s
    

    row: 566, total_rows: 3142
    progress: 18.014%
    wait time: 0:08:45.984268s
    

    row: 567, total_rows: 3142
    progress: 18.046%
    wait time: 0:08:45.780696s
    

    row: 5


    row: 649, total_rows: 3142
    progress: 20.656%
    wait time: 0:08:29.033227s
    

    row: 650, total_rows: 3142
    progress: 20.687%
    wait time: 0:08:28.829042s
    

    row: 651, total_rows: 3142
    progress: 20.719%
    wait time: 0:08:28.624857s
    

    row: 652, total_rows: 3142
    progress: 20.751%
    wait time: 0:08:28.420672s
    

    row: 653, total_rows: 3142
    progress: 20.783%
    wait time: 0:08:28.216487s
    

    row: 654, total_rows: 3142
    progress: 20.815%
    wait time: 0:08:28.013785s
    

    row: 655, total_rows: 3142
    progress: 20.847%
    wait time: 0:08:27.811082s
    

    row: 656, total_rows: 3142
    progress: 20.878%
    wait time: 0:08:27.605414s
    

    row: 657, total_rows: 3142
    progress: 20.91%
    wait time: 0:08:27.399747s
    

    row: 658, total_rows: 3142
    progress: 20.942%
    wait time: 0:08:27.195562s
    

    row: 659, total_rows: 3142
    progress: 20.974%
    wait time: 0:08:26.991377s
    

    row: 6


    row: 741, total_rows: 3142
    progress: 23.584%
    wait time: 0:08:10.248207s
    

    row: 742, total_rows: 3142
    progress: 23.616%
    wait time: 0:08:08.843822s
    

    row: 743, total_rows: 3142
    progress: 23.647%
    wait time: 0:08:07.440439s
    

    row: 744, total_rows: 3142
    progress: 23.679%
    wait time: 0:08:07.236968s
    

    row: 745, total_rows: 3142
    progress: 23.711%
    wait time: 0:08:07.034069s
    

    row: 746, total_rows: 3142
    progress: 23.743%
    wait time: 0:08:06.830598s
    

    row: 747, total_rows: 3142
    progress: 23.775%
    wait time: 0:08:06.627699s
    

    row: 748, total_rows: 3142
    progress: 23.806%
    wait time: 0:08:07.621713s
    

    row: 749, total_rows: 3142
    progress: 23.838%
    wait time: 0:08:08.614727s
    

    row: 750, total_rows: 3142
    progress: 23.87%
    wait time: 0:08:07.214343s
    

    row: 751, total_rows: 3142
    progress: 23.902%
    wait time: 0:08:08.206357s
    

    row: 7


    row: 833, total_rows: 3142
    progress: 26.512%
    wait time: 0:07:53.775872s
    

    row: 834, total_rows: 3142
    progress: 26.544%
    wait time: 0:07:53.570685s
    

    row: 835, total_rows: 3142
    progress: 26.575%
    wait time: 0:07:53.365499s
    

    row: 836, total_rows: 3142
    progress: 26.607%
    wait time: 0:07:53.160312s
    

    row: 837, total_rows: 3142
    progress: 26.639%
    wait time: 0:07:52.955126s
    

    row: 838, total_rows: 3142
    progress: 26.671%
    wait time: 0:07:52.749115s
    

    row: 839, total_rows: 3142
    progress: 26.703%
    wait time: 0:07:52.543105s
    

    row: 840, total_rows: 3142
    progress: 26.735%
    wait time: 0:07:52.192751s
    

    row: 841, total_rows: 3142
    progress: 26.766%
    wait time: 0:07:51.842523s
    

    row: 842, total_rows: 3142
    progress: 26.798%
    wait time: 0:07:51.782506s
    

    row: 843, total_rows: 3142
    progress: 26.83%
    wait time: 0:07:51.432404s
    

    row: 8


    row: 926, total_rows: 3142
    progress: 29.472%
    wait time: 0:07:34.692725s
    

    row: 927, total_rows: 3142
    progress: 29.504%
    wait time: 0:07:34.486747s
    

    row: 928, total_rows: 3142
    progress: 29.535%
    wait time: 0:07:34.141942s
    

    row: 929, total_rows: 3142
    progress: 29.567%
    wait time: 0:07:34.076375s
    

    row: 930, total_rows: 3142
    progress: 29.599%
    wait time: 0:07:33.871980s
    

    row: 931, total_rows: 3142
    progress: 29.631%
    wait time: 0:07:33.666003s
    

    row: 932, total_rows: 3142
    progress: 29.663%
    wait time: 0:07:33.321451s
    

    row: 933, total_rows: 3142
    progress: 29.694%
    wait time: 0:07:33.255631s
    

    row: 934, total_rows: 3142
    progress: 29.726%
    wait time: 0:07:32.911205s
    

    row: 935, total_rows: 3142
    progress: 29.758%
    wait time: 0:07:32.566905s
    

    row: 936, total_rows: 3142
    progress: 29.79%
    wait time: 0:07:31.398829s
    

    row: 9


    row: 1017, total_rows: 3142
    progress: 32.368%
    wait time: 0:07:16.019003s
    

    row: 1018, total_rows: 3142
    progress: 32.4%
    wait time: 0:07:15.814324s
    

    row: 1019, total_rows: 3142
    progress: 32.432%
    wait time: 0:07:15.609645s
    

    row: 1020, total_rows: 3142
    progress: 32.463%
    wait time: 0:07:15.404459s
    

    row: 1021, total_rows: 3142
    progress: 32.495%
    wait time: 0:07:15.199273s
    

    row: 1022, total_rows: 3142
    progress: 32.527%
    wait time: 0:07:14.993582s
    

    row: 1023, total_rows: 3142
    progress: 32.559%
    wait time: 0:07:14.788901s
    

    row: 1024, total_rows: 3142
    progress: 32.591%
    wait time: 0:07:14.583715s
    

    row: 1025, total_rows: 3142
    progress: 32.623%
    wait time: 0:07:14.378530s
    

    row: 1026, total_rows: 3142
    progress: 32.654%
    wait time: 0:07:14.173344s
    

    row: 1027, total_rows: 3142
    progress: 32.686%
    wait time: 0:07:13.968158s
    




    row: 1108, total_rows: 3142
    progress: 35.264%
    wait time: 0:06:55.315218s
    

    row: 1109, total_rows: 3142
    progress: 35.296%
    wait time: 0:06:55.110547s
    

    row: 1110, total_rows: 3142
    progress: 35.328%
    wait time: 0:06:54.906845s
    

    row: 1111, total_rows: 3142
    progress: 35.36%
    wait time: 0:06:54.703143s
    

    row: 1112, total_rows: 3142
    progress: 35.391%
    wait time: 0:06:54.498472s
    

    row: 1113, total_rows: 3142
    progress: 35.423%
    wait time: 0:06:54.293802s
    

    row: 1114, total_rows: 3142
    progress: 35.455%
    wait time: 0:06:54.088407s
    

    row: 1115, total_rows: 3142
    progress: 35.487%
    wait time: 0:06:53.883013s
    

    row: 1116, total_rows: 3142
    progress: 35.519%
    wait time: 0:06:53.678828s
    

    row: 1117, total_rows: 3142
    progress: 35.551%
    wait time: 0:06:53.474643s
    

    row: 1118, total_rows: 3142
    progress: 35.582%
    wait time: 0:06:53.271665s
    



    row: 1199, total_rows: 3142
    progress: 38.16%
    wait time: 0:06:34.788150s
    

    row: 1200, total_rows: 3142
    progress: 38.192%
    wait time: 0:06:34.584734s
    

    row: 1201, total_rows: 3142
    progress: 38.224%
    wait time: 0:06:34.381318s
    

    row: 1202, total_rows: 3142
    progress: 38.256%
    wait time: 0:06:34.177902s
    

    row: 1203, total_rows: 3142
    progress: 38.288%
    wait time: 0:06:33.974486s
    

    row: 1204, total_rows: 3142
    progress: 38.32%
    wait time: 0:06:33.771071s
    

    row: 1205, total_rows: 3142
    progress: 38.351%
    wait time: 0:06:33.568117s
    

    row: 1206, total_rows: 3142
    progress: 38.383%
    wait time: 0:06:33.365164s
    

    row: 1207, total_rows: 3142
    progress: 38.415%
    wait time: 0:06:33.162210s
    

    row: 1208, total_rows: 3142
    progress: 38.447%
    wait time: 0:06:32.958795s
    

    row: 1209, total_rows: 3142
    progress: 38.479%
    wait time: 0:06:32.755841s
    




    row: 1290, total_rows: 3142
    progress: 41.057%
    wait time: 0:06:15.273489s
    

    row: 1291, total_rows: 3142
    progress: 41.088%
    wait time: 0:06:15.865662s
    

    row: 1292, total_rows: 3142
    progress: 41.12%
    wait time: 0:06:15.776619s
    

    row: 1293, total_rows: 3142
    progress: 41.152%
    wait time: 0:06:15.459540s
    

    row: 1294, total_rows: 3142
    progress: 41.184%
    wait time: 0:06:15.370374s
    

    row: 1295, total_rows: 3142
    progress: 41.216%
    wait time: 0:06:15.281084s
    

    row: 1296, total_rows: 3142
    progress: 41.248%
    wait time: 0:06:15.078120s
    

    row: 1297, total_rows: 3142
    progress: 41.279%
    wait time: 0:06:14.874716s
    

    row: 1298, total_rows: 3142
    progress: 41.311%
    wait time: 0:06:14.557884s
    

    row: 1299, total_rows: 3142
    progress: 41.343%
    wait time: 0:06:14.468348s
    

    row: 1300, total_rows: 3142
    progress: 41.375%
    wait time: 0:06:14.265383s
    



    row: 1381, total_rows: 3142
    progress: 43.953%
    wait time: 0:05:52.519594s
    

    row: 1382, total_rows: 3142
    progress: 43.985%
    wait time: 0:05:52.319832s
    

    row: 1383, total_rows: 3142
    progress: 44.017%
    wait time: 0:05:52.119231s
    

    row: 1384, total_rows: 3142
    progress: 44.048%
    wait time: 0:05:51.919468s
    

    row: 1385, total_rows: 3142
    progress: 44.08%
    wait time: 0:05:51.719706s
    

    row: 1386, total_rows: 3142
    progress: 44.112%
    wait time: 0:05:52.398297s
    

    row: 1387, total_rows: 3142
    progress: 44.144%
    wait time: 0:05:53.075888s
    

    row: 1388, total_rows: 3142
    progress: 44.176%
    wait time: 0:05:51.996933s
    

    row: 1389, total_rows: 3142
    progress: 44.208%
    wait time: 0:05:52.673522s
    

    row: 1390, total_rows: 3142
    progress: 44.239%
    wait time: 0:05:53.348903s
    

    row: 1391, total_rows: 3142
    progress: 44.271%
    wait time: 0:05:54.023282s
    



    row: 1472, total_rows: 3142
    progress: 46.849%
    wait time: 0:05:39.317693s
    

    row: 1473, total_rows: 3142
    progress: 46.881%
    wait time: 0:05:39.114707s
    

    row: 1474, total_rows: 3142
    progress: 46.913%
    wait time: 0:05:38.911324s
    

    row: 1475, total_rows: 3142
    progress: 46.945%
    wait time: 0:05:38.707941s
    

    row: 1476, total_rows: 3142
    progress: 46.976%
    wait time: 0:05:38.402079s
    

    row: 1477, total_rows: 3142
    progress: 47.008%
    wait time: 0:05:38.096341s
    

    row: 1478, total_rows: 3142
    progress: 47.04%
    wait time: 0:05:37.995834s
    

    row: 1479, total_rows: 3142
    progress: 47.072%
    wait time: 0:05:37.895205s
    

    row: 1480, total_rows: 3142
    progress: 47.104%
    wait time: 0:05:37.692219s
    

    row: 1481, total_rows: 3142
    progress: 47.136%
    wait time: 0:05:37.488836s
    

    row: 1482, total_rows: 3142
    progress: 47.167%
    wait time: 0:05:37.285850s
    



    row: 1563, total_rows: 3142
    progress: 49.745%
    wait time: 0:05:20.828867s
    

    row: 1564, total_rows: 3142
    progress: 49.777%
    wait time: 0:05:21.414813s
    

    row: 1565, total_rows: 3142
    progress: 49.809%
    wait time: 0:05:20.422498s
    

    row: 1566, total_rows: 3142
    progress: 49.841%
    wait time: 0:05:20.219313s
    

    row: 1567, total_rows: 3142
    progress: 49.873%
    wait time: 0:05:20.016128s
    

    row: 1568, total_rows: 3142
    progress: 49.905%
    wait time: 0:05:19.812943s
    

    row: 1569, total_rows: 3142
    progress: 49.936%
    wait time: 0:05:19.609758s
    

    row: 1570, total_rows: 3142
    progress: 49.968%
    wait time: 0:05:19.406573s
    

    row: 1571, total_rows: 3142
    progress: 50.0%
    wait time: 0:05:19.203388s
    

    row: 1572, total_rows: 3142
    progress: 50.032%
    wait time: 0:05:19.785334s
    

    row: 1573, total_rows: 3142
    progress: 50.064%
    wait time: 0:05:20.366279s
    




    row: 1654, total_rows: 3142
    progress: 52.642%
    wait time: 0:05:02.339046s
    

    row: 1655, total_rows: 3142
    progress: 52.673%
    wait time: 0:05:02.135862s
    

    row: 1656, total_rows: 3142
    progress: 52.705%
    wait time: 0:05:01.932677s
    

    row: 1657, total_rows: 3142
    progress: 52.737%
    wait time: 0:05:01.729492s
    

    row: 1658, total_rows: 3142
    progress: 52.769%
    wait time: 0:05:01.526307s
    

    row: 1659, total_rows: 3142
    progress: 52.801%
    wait time: 0:05:01.323122s
    

    row: 1660, total_rows: 3142
    progress: 52.833%
    wait time: 0:05:01.122764s
    

    row: 1661, total_rows: 3142
    progress: 52.864%
    wait time: 0:05:00.916753s
    

    row: 1662, total_rows: 3142
    progress: 52.896%
    wait time: 0:05:00.713568s
    

    row: 1663, total_rows: 3142
    progress: 52.928%
    wait time: 0:05:00.510383s
    

    row: 1664, total_rows: 3142
    progress: 52.96%
    wait time: 0:05:00.307198s
    



    row: 1745, total_rows: 3142
    progress: 55.538%
    wait time: 0:04:42.476308s
    

    row: 1746, total_rows: 3142
    progress: 55.57%
    wait time: 0:04:42.261458s
    

    row: 1747, total_rows: 3142
    progress: 55.602%
    wait time: 0:04:42.046627s
    

    row: 1748, total_rows: 3142
    progress: 55.633%
    wait time: 0:04:41.844277s
    

    row: 1749, total_rows: 3142
    progress: 55.665%
    wait time: 0:04:41.641927s
    

    row: 1750, total_rows: 3142
    progress: 55.697%
    wait time: 0:04:41.439909s
    

    row: 1751, total_rows: 3142
    progress: 55.729%
    wait time: 0:04:41.237891s
    

    row: 1752, total_rows: 3142
    progress: 55.761%
    wait time: 0:04:41.035541s
    

    row: 1753, total_rows: 3142
    progress: 55.792%
    wait time: 0:04:40.833192s
    

    row: 1754, total_rows: 3142
    progress: 55.824%
    wait time: 0:04:40.631174s
    

    row: 1755, total_rows: 3142
    progress: 55.856%
    wait time: 0:04:40.429155s
    



    row: 1836, total_rows: 3142
    progress: 58.434%
    wait time: 0:04:22.091217s
    

    row: 1837, total_rows: 3142
    progress: 58.466%
    wait time: 0:04:22.543609s
    

    row: 1838, total_rows: 3142
    progress: 58.498%
    wait time: 0:04:21.689852s
    

    row: 1839, total_rows: 3142
    progress: 58.53%
    wait time: 0:04:20.837095s
    

    row: 1840, total_rows: 3142
    progress: 58.561%
    wait time: 0:04:20.636603s
    

    row: 1841, total_rows: 3142
    progress: 58.593%
    wait time: 0:04:20.436731s
    

    row: 1842, total_rows: 3142
    progress: 58.625%
    wait time: 0:04:20.887122s
    

    row: 1843, total_rows: 3142
    progress: 58.657%
    wait time: 0:04:21.336512s
    

    row: 1844, total_rows: 3142
    progress: 58.689%
    wait time: 0:04:21.784746s
    

    row: 1845, total_rows: 3142
    progress: 58.721%
    wait time: 0:04:22.231980s
    

    row: 1846, total_rows: 3142
    progress: 58.752%
    wait time: 0:04:22.029951s
    



    row: 1928, total_rows: 3142
    progress: 61.362%
    wait time: 0:04:02.413221s
    

    row: 1929, total_rows: 3142
    progress: 61.394%
    wait time: 0:04:01.606939s
    

    row: 1930, total_rows: 3142
    progress: 61.426%
    wait time: 0:04:02.013858s
    

    row: 1931, total_rows: 3142
    progress: 61.458%
    wait time: 0:04:01.208576s
    

    row: 1932, total_rows: 3142
    progress: 61.489%
    wait time: 0:04:01.009107s
    

    row: 1933, total_rows: 3142
    progress: 61.521%
    wait time: 0:04:00.810214s
    

    row: 1934, total_rows: 3142
    progress: 61.553%
    wait time: 0:04:01.215133s
    

    row: 1935, total_rows: 3142
    progress: 61.585%
    wait time: 0:04:01.619051s
    

    row: 1936, total_rows: 3142
    progress: 61.617%
    wait time: 0:04:01.419158s
    

    row: 1937, total_rows: 3142
    progress: 61.649%
    wait time: 0:04:01.219263s
    

    row: 1938, total_rows: 3142
    progress: 61.68%
    wait time: 0:04:01.621612s
    



    row: 2019, total_rows: 3142
    progress: 64.258%
    wait time: 0:03:43.679817s
    

    row: 2020, total_rows: 3142
    progress: 64.29%
    wait time: 0:03:43.480770s
    

    row: 2021, total_rows: 3142
    progress: 64.322%
    wait time: 0:03:43.281456s
    

    row: 2022, total_rows: 3142
    progress: 64.354%
    wait time: 0:03:43.082409s
    

    row: 2023, total_rows: 3142
    progress: 64.386%
    wait time: 0:03:42.883362s
    

    row: 2024, total_rows: 3142
    progress: 64.418%
    wait time: 0:03:42.684447s
    

    row: 2025, total_rows: 3142
    progress: 64.449%
    wait time: 0:03:42.485533s
    

    row: 2026, total_rows: 3142
    progress: 64.481%
    wait time: 0:03:42.844444s
    

    row: 2027, total_rows: 3142
    progress: 64.513%
    wait time: 0:03:43.202355s
    

    row: 2028, total_rows: 3142
    progress: 64.545%
    wait time: 0:03:43.002439s
    

    row: 2029, total_rows: 3142
    progress: 64.577%
    wait time: 0:03:42.801992s
    



    row: 2110, total_rows: 3142
    progress: 67.155%
    wait time: 0:03:25.554382s
    

    row: 2111, total_rows: 3142
    progress: 67.187%
    wait time: 0:03:25.355202s
    

    row: 2112, total_rows: 3142
    progress: 67.218%
    wait time: 0:03:25.156021s
    

    row: 2113, total_rows: 3142
    progress: 67.25%
    wait time: 0:03:24.956841s
    

    row: 2114, total_rows: 3142
    progress: 67.282%
    wait time: 0:03:24.757292s
    

    row: 2115, total_rows: 3142
    progress: 67.314%
    wait time: 0:03:24.558479s
    

    row: 2116, total_rows: 3142
    progress: 67.346%
    wait time: 0:03:24.358932s
    

    row: 2117, total_rows: 3142
    progress: 67.377%
    wait time: 0:03:24.160118s
    

    row: 2118, total_rows: 3142
    progress: 67.409%
    wait time: 0:03:23.960571s
    

    row: 2119, total_rows: 3142
    progress: 67.441%
    wait time: 0:03:23.761757s
    

    row: 2120, total_rows: 3142
    progress: 67.473%
    wait time: 0:03:23.562211s
    



    row: 2202, total_rows: 3142
    progress: 70.083%
    wait time: 0:03:07.176876s
    

    row: 2203, total_rows: 3142
    progress: 70.115%
    wait time: 0:03:07.029915s
    

    row: 2204, total_rows: 3142
    progress: 70.146%
    wait time: 0:03:06.831070s
    

    row: 2205, total_rows: 3142
    progress: 70.178%
    wait time: 0:03:06.631555s
    

    row: 2206, total_rows: 3142
    progress: 70.21%
    wait time: 0:03:06.432710s
    

    row: 2207, total_rows: 3142
    progress: 70.242%
    wait time: 0:03:06.233195s
    

    row: 2208, total_rows: 3142
    progress: 70.274%
    wait time: 0:03:05.982130s
    

    row: 2209, total_rows: 3142
    progress: 70.306%
    wait time: 0:03:05.731176s
    

    row: 2210, total_rows: 3142
    progress: 70.337%
    wait time: 0:03:05.354009s
    

    row: 2211, total_rows: 3142
    progress: 70.369%
    wait time: 0:03:04.977224s
    

    row: 2212, total_rows: 3142
    progress: 70.401%
    wait time: 0:03:04.956254s
    



    row: 2293, total_rows: 3142
    progress: 72.979%
    wait time: 0:02:49.104332s
    

    row: 2294, total_rows: 3142
    progress: 73.011%
    wait time: 0:02:48.905151s
    

    row: 2295, total_rows: 3142
    progress: 73.043%
    wait time: 0:02:48.705971s
    

    row: 2296, total_rows: 3142
    progress: 73.074%
    wait time: 0:02:48.506790s
    

    row: 2297, total_rows: 3142
    progress: 73.106%
    wait time: 0:02:48.307610s
    

    row: 2298, total_rows: 3142
    progress: 73.138%
    wait time: 0:02:48.108429s
    

    row: 2299, total_rows: 3142
    progress: 73.17%
    wait time: 0:02:47.909248s
    

    row: 2300, total_rows: 3142
    progress: 73.202%
    wait time: 0:02:47.710168s
    

    row: 2301, total_rows: 3142
    progress: 73.234%
    wait time: 0:02:47.511088s
    

    row: 2302, total_rows: 3142
    progress: 73.265%
    wait time: 0:02:47.312107s
    

    row: 2303, total_rows: 3142
    progress: 73.297%
    wait time: 0:02:47.112726s
    



    row: 2384, total_rows: 3142
    progress: 75.875%
    wait time: 0:02:32.117260s
    

    row: 2385, total_rows: 3142
    progress: 75.907%
    wait time: 0:02:31.537745s
    

    row: 2386, total_rows: 3142
    progress: 75.939%
    wait time: 0:02:31.337382s
    

    row: 2387, total_rows: 3142
    progress: 75.971%
    wait time: 0:02:31.137021s
    

    row: 2388, total_rows: 3142
    progress: 76.003%
    wait time: 0:02:30.565529s
    

    row: 2389, total_rows: 3142
    progress: 76.034%
    wait time: 0:02:29.995023s
    

    row: 2390, total_rows: 3142
    progress: 76.066%
    wait time: 0:02:29.790089s
    

    row: 2391, total_rows: 3142
    progress: 76.098%
    wait time: 0:02:29.596629s
    

    row: 2392, total_rows: 3142
    progress: 76.13%
    wait time: 0:02:29.766773s
    

    row: 2393, total_rows: 3142
    progress: 76.162%
    wait time: 0:02:29.198236s
    

    row: 2394, total_rows: 3142
    progress: 76.194%
    wait time: 0:02:29.367395s
    



    row: 2475, total_rows: 3142
    progress: 78.771%
    wait time: 0:02:12.864117s
    

    row: 2476, total_rows: 3142
    progress: 78.803%
    wait time: 0:02:12.992894s
    

    row: 2477, total_rows: 3142
    progress: 78.835%
    wait time: 0:02:13.120687s
    

    row: 2478, total_rows: 3142
    progress: 78.867%
    wait time: 0:02:12.920664s
    

    row: 2479, total_rows: 3142
    progress: 78.899%
    wait time: 0:02:12.720640s
    

    row: 2480, total_rows: 3142
    progress: 78.931%
    wait time: 0:02:12.520300s
    

    row: 2481, total_rows: 3142
    progress: 78.962%
    wait time: 0:02:12.319961s
    

    row: 2482, total_rows: 3142
    progress: 78.994%
    wait time: 0:02:12.119937s
    

    row: 2483, total_rows: 3142
    progress: 79.026%
    wait time: 0:02:11.919912s
    

    row: 2484, total_rows: 3142
    progress: 79.058%
    wait time: 0:02:11.719573s
    

    row: 2485, total_rows: 3142
    progress: 79.09%
    wait time: 0:02:11.519548s
    



    row: 2567, total_rows: 3142
    progress: 81.7%
    wait time: 0:01:54.538032s
    

    row: 2568, total_rows: 3142
    progress: 81.731%
    wait time: 0:01:54.621504s
    

    row: 2569, total_rows: 3142
    progress: 81.763%
    wait time: 0:01:54.703990s
    

    row: 2570, total_rows: 3142
    progress: 81.795%
    wait time: 0:01:54.503945s
    

    row: 2571, total_rows: 3142
    progress: 81.827%
    wait time: 0:01:54.303900s
    

    row: 2572, total_rows: 3142
    progress: 81.859%
    wait time: 0:01:54.103718s
    

    row: 2573, total_rows: 3142
    progress: 81.891%
    wait time: 0:01:53.903536s
    

    row: 2574, total_rows: 3142
    progress: 81.922%
    wait time: 0:01:53.703354s
    

    row: 2575, total_rows: 3142
    progress: 81.954%
    wait time: 0:01:53.503172s
    

    row: 2576, total_rows: 3142
    progress: 81.986%
    wait time: 0:01:53.586239s
    

    row: 2577, total_rows: 3142
    progress: 82.018%
    wait time: 0:01:53.668306s
    




    row: 2660, total_rows: 3142
    progress: 84.659%
    wait time: 0:01:37.210998s
    

    row: 2661, total_rows: 3142
    progress: 84.691%
    wait time: 0:01:37.249683s
    

    row: 2662, total_rows: 3142
    progress: 84.723%
    wait time: 0:01:36.807632s
    

    row: 2663, total_rows: 3142
    progress: 84.755%
    wait time: 0:01:36.366581s
    

    row: 2664, total_rows: 3142
    progress: 84.787%
    wait time: 0:01:36.404267s
    

    row: 2665, total_rows: 3142
    progress: 84.819%
    wait time: 0:01:36.440954s
    

    row: 2666, total_rows: 3142
    progress: 84.85%
    wait time: 0:01:36.239055s
    

    row: 2667, total_rows: 3142
    progress: 84.882%
    wait time: 0:01:36.037155s
    

    row: 2668, total_rows: 3142
    progress: 84.914%
    wait time: 0:01:35.834689s
    

    row: 2669, total_rows: 3142
    progress: 84.946%
    wait time: 0:01:35.632225s
    

    row: 2670, total_rows: 3142
    progress: 84.978%
    wait time: 0:01:35.430324s
    



    row: 2751, total_rows: 3142
    progress: 87.556%
    wait time: 0:01:18.662491s
    

    row: 2752, total_rows: 3142
    progress: 87.588%
    wait time: 0:01:18.266137s
    

    row: 2753, total_rows: 3142
    progress: 87.619%
    wait time: 0:01:18.260126s
    

    row: 2754, total_rows: 3142
    progress: 87.651%
    wait time: 0:01:18.058943s
    

    row: 2755, total_rows: 3142
    progress: 87.683%
    wait time: 0:01:17.857760s
    

    row: 2756, total_rows: 3142
    progress: 87.715%
    wait time: 0:01:17.463407s
    

    row: 2757, total_rows: 3142
    progress: 87.747%
    wait time: 0:01:17.070055s
    

    row: 2758, total_rows: 3142
    progress: 87.778%
    wait time: 0:01:16.869873s
    

    row: 2759, total_rows: 3142
    progress: 87.81%
    wait time: 0:01:16.669691s
    

    row: 2760, total_rows: 3142
    progress: 87.842%
    wait time: 0:01:16.469509s
    

    row: 2761, total_rows: 3142
    progress: 87.874%
    wait time: 0:01:16.269327s
    



    row: 2844, total_rows: 3142
    progress: 90.516%
    wait time: 0:00:59.654153s
    

    row: 2845, total_rows: 3142
    progress: 90.547%
    wait time: 0:00:59.453901s
    

    row: 2846, total_rows: 3142
    progress: 90.579%
    wait time: 0:00:59.253790s
    

    row: 2847, total_rows: 3142
    progress: 90.611%
    wait time: 0:00:59.053679s
    

    row: 2848, total_rows: 3142
    progress: 90.643%
    wait time: 0:00:58.853497s
    

    row: 2849, total_rows: 3142
    progress: 90.675%
    wait time: 0:00:58.653315s
    

    row: 2850, total_rows: 3142
    progress: 90.707%
    wait time: 0:00:58.599261s
    

    row: 2851, total_rows: 3142
    progress: 90.738%
    wait time: 0:00:58.252951s
    

    row: 2852, total_rows: 3142
    progress: 90.77%
    wait time: 0:00:58.197896s
    

    row: 2853, total_rows: 3142
    progress: 90.802%
    wait time: 0:00:57.852587s
    

    row: 2854, total_rows: 3142
    progress: 90.834%
    wait time: 0:00:57.796532s
    



    row: 2936, total_rows: 3142
    progress: 93.444%
    wait time: 0:00:41.649822s
    

    row: 2937, total_rows: 3142
    progress: 93.475%
    wait time: 0:00:41.447614s
    

    row: 2938, total_rows: 3142
    progress: 93.507%
    wait time: 0:00:41.245431s
    

    row: 2939, total_rows: 3142
    progress: 93.539%
    wait time: 0:00:41.043247s
    

    row: 2940, total_rows: 3142
    progress: 93.571%
    wait time: 0:00:40.841064s
    

    row: 2941, total_rows: 3142
    progress: 93.603%
    wait time: 0:00:40.638880s
    

    row: 2942, total_rows: 3142
    progress: 93.635%
    wait time: 0:00:40.436697s
    

    row: 2943, total_rows: 3142
    progress: 93.666%
    wait time: 0:00:40.234514s
    

    row: 2944, total_rows: 3142
    progress: 93.698%
    wait time: 0:00:40.032330s
    

    row: 2945, total_rows: 3142
    progress: 93.73%
    wait time: 0:00:39.830147s
    

    row: 2946, total_rows: 3142
    progress: 93.762%
    wait time: 0:00:39.627963s
    



    row: 3027, total_rows: 3142
    progress: 96.34%
    wait time: 0:00:23.136027s
    

    row: 3028, total_rows: 3142
    progress: 96.372%
    wait time: 0:00:22.877794s
    

    row: 3029, total_rows: 3142
    progress: 96.404%
    wait time: 0:00:22.620562s
    

    row: 3030, total_rows: 3142
    progress: 96.435%
    wait time: 0:00:22.476429s
    

    row: 3031, total_rows: 3142
    progress: 96.467%
    wait time: 0:00:22.331295s
    

    row: 3032, total_rows: 3142
    progress: 96.499%
    wait time: 0:00:22.075064s
    

    row: 3033, total_rows: 3142
    progress: 96.531%
    wait time: 0:00:21.819834s
    

    row: 3034, total_rows: 3142
    progress: 96.563%
    wait time: 0:00:21.619652s
    

    row: 3035, total_rows: 3142
    progress: 96.595%
    wait time: 0:00:21.419470s
    

    row: 3036, total_rows: 3142
    progress: 96.626%
    wait time: 0:00:21.272335s
    

    row: 3037, total_rows: 3142
    progress: 96.658%
    wait time: 0:00:21.019106s
    



    row: 3119, total_rows: 3142
    progress: 99.268%
    wait time: 0:00:04.604185s
    

    row: 3120, total_rows: 3142
    progress: 99.3%
    wait time: 0:00:04.403998s
    

    row: 3121, total_rows: 3142
    progress: 99.332%
    wait time: 0:00:04.203811s
    

    row: 3122, total_rows: 3142
    progress: 99.363%
    wait time: 0:00:04.003634s
    

    row: 3123, total_rows: 3142
    progress: 99.395%
    wait time: 0:00:03.803457s
    

    row: 3124, total_rows: 3142
    progress: 99.427%
    wait time: 0:00:03.603275s
    

    row: 3125, total_rows: 3142
    progress: 99.459%
    wait time: 0:00:03.403093s
    

    row: 3126, total_rows: 3142
    progress: 99.491%
    wait time: 0:00:03.210918s
    

    row: 3127, total_rows: 3142
    progress: 99.523%
    wait time: 0:00:03.002729s
    

    row: 3128, total_rows: 3142
    progress: 99.554%
    wait time: 0:00:02.809554s
    

    row: 3129, total_rows: 3142
    progress: 99.586%
    wait time: 0:00:02.602365s
    



In [454]:
df_representation_test_v4 = pd.DataFrame(data_lexicon_attributes).set_index("id")
df_representation_test_v4_1 = pd.concat([df_representation_test_v4, df_representation_test_v3_1], axis=1)
df_representation_test_v4_1

Unnamed: 0_level_0,lexicon<&>LiuHu<&>+,lexicon<&>LiuHu<&>-,lexicon<&>sentiwordnet<&>+,lexicon<&>sentiwordnet<&>-,lexicon<&>sentiwordnet<&>o,retro<&>num_tokens,retro<&>lenght,retro<&>num_numbs,retro<&>num_alpha,retro<&>num_with_uppercase,...,BERTweet_758,BERTweet_759,BERTweet_760,BERTweet_761,BERTweet_762,BERTweet_763,BERTweet_764,BERTweet_765,BERTweet_766,BERTweet_767
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10941,0,0,0.750,0.250,6.000,21,109,0,21,1,...,0.123334,0.062934,0.063844,-0.114385,-0.223249,-0.082935,0.096100,0.134355,-0.089524,0.052725
10942,0,0,0.000,0.000,1.000,8,60,2,10,6,...,0.114743,-0.102170,0.210606,0.015215,-0.190899,-0.090932,-0.051202,0.083870,0.000473,0.243169
10943,0,0,0.000,0.625,5.375,23,115,0,23,4,...,-0.032676,0.090184,0.130342,-0.079150,-0.150815,0.017163,0.346057,-0.046520,-0.133298,-0.233223
10944,1,1,0.000,0.625,2.375,19,98,0,20,4,...,-0.017395,-0.029215,-0.080877,0.040828,0.020043,-0.034798,0.240827,0.083698,0.198686,0.055953
10945,0,0,0.000,0.000,2.000,21,131,3,22,4,...,0.052519,-0.201800,-0.094245,0.137064,-0.070205,-0.114456,0.057291,0.062598,0.147984,-0.147572
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41528,0,1,0.125,0.250,0.625,7,41,0,6,3,...,-0.070892,-0.112746,0.078887,0.042544,0.017418,-0.242335,0.189593,0.099153,0.250478,0.002542
41529,1,0,0.000,0.125,3.875,22,132,0,23,3,...,0.145892,-0.139355,0.015763,0.104158,0.099488,0.047747,0.095338,-0.118079,0.227708,-0.134026
41530,0,1,0.875,1.000,7.125,25,139,1,25,1,...,0.093361,-0.109361,0.008455,0.067768,-0.013469,-0.081498,-0.004565,0.038196,-0.042650,-0.140450
41531,0,0,0.125,0.000,2.875,15,80,0,17,2,...,0.108233,-0.009540,-0.049508,0.043545,-0.108673,-0.095933,0.196386,-0.061105,0.095339,0.049243


In [455]:
pickle.dump(df_representation_test_v4_1, open("df_representation_test_v4_1.pickle", "wb"))