# Creating base texts

<br>

**Language: Python**

This notebook shows the process used for creating the three base texts (B1, B2, C1) from the normalized texts (notebooks 01, 02, 03). The base texts are very similar to the normalized texts, but with slightly modified collocations to ensure a more even distribution across levels. See dissertation section 5.3.

**Notebook contents:**
- [Initial setup](#Initial-setup)
- [Creating base text B1](#Creating-base-texts-B1)
- [Creating base text B2](#Creating-base-texts-B2)
- [Creating base text C1](#Creating-base-texts-C1)

## Initial setup

In [1]:
# Import necessary modules

import pandas as pd
import pprint
from IPython.core.interactiveshell import InteractiveShell
import joblib

In [2]:
# Set preferred notebook format

%pprint # Turn off pretty printing
InteractiveShell.ast_node_interactivity = "all" # Show all output, not just last item
pd.set_option('display.max_columns', 999) # Allow viewing of all columns

Pretty printing has been turned OFF


In [3]:
# Import necessary dictionaries

kband_dict = joblib.load('../../COCA_data/COCA_2020_lemma_Kband_dict.pkl') # All items lower-case

In [4]:
# Unpickle normalized texts

B1_norm = joblib.load('../docs/B1_orig&norm.pkl')
B2_norm = joblib.load('../docs/B2_orig&norm.pkl')
C1_norm = joblib.load('../docs/C1_orig&norm.pkl')

In [5]:
# Merge texts and keep only normalized versions

texts_df = pd.concat([B1_norm,B2_norm,C1_norm])
texts_df = texts_df[texts_df.index.isin([1,3,5])].reset_index(drop=True)
texts_df

Unnamed: 0,text_id,text,lemmas_NLTK,lemmas_CLAWS,text_len,MLC,CNC,grammar_errors_per_100,punc_errors_per_100,vocD,AG,bigram_range,mean_MI,absent_prop,mean_tscore,col_errors_per_100,correct_cols_per_100,K10to16_p,K3to9_p,K1to2_p,kband_cols,K1to2_cols,K3to9_cols,K10to16_cols,K1to2_cols_K,K3to9_cols_K,K10to16_cols_K,bad_cols
0,B1_norm,I disagree that point about children brought u...,"[I, disagree, that, point, about, child, bring...","[(I, p), (disagree, v), (that, d), (point, n),...",250,6.41,0.641,6.0,7.6,48.723,0.379,0.095,2.522,0.985,111.672,8.0,4.8,0.0,0.25,0.75,"[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...",9,3,0,"[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(4, [('entrance', 'n'), ('to', 'i'), ('the', ...",[],"[disagree that point, show that situation, cou..."
1,B2_norm,I greatly support the idea.\nraised in a certa...,"[I, greatly, support, the, idea, ., raise, in,...","[(I, p), (greatly, r), (support, v), (the, a),...",250,7.314,0.8,2.8,2.4,43.457,0.938,0.171,2.878,0.964,153.301,3.2,8.8,0.0,0.273,0.727,"[(1, [('support', 'v'), ('the', 'a'), ('idea',...",16,6,0,"[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(3, [('come', 'v'), ('from', 'i'), ('a', 'a')...",[],"[raised in (values), psychological values, opp..."
2,C1_norm,I do agree to the statement that children brou...,"[I, do, agree, to, the, statement, that, child...","[(I, p), (do, v), (agree, v), (to, i), (the, a...",250,11.591,2.045,0.8,3.2,70.776,1.455,0.117,3.229,0.927,92.031,2.0,13.2,0.091,0.364,0.545,"[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...",18,12,3,"[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...","[(7, [('sacrifice', 'v'), ('luxury', 'n'), ('f...","[(14, [('be', 'v'), ('prematurely', 'r'), ('ex...","[agree to the statement, in the weekends, coll..."


In [6]:
# Create correct_cols column

texts_df['correct_cols'] = texts_df.K1to2_cols + texts_df.K3to9_cols + texts_df.K10to16_cols

In [7]:
# Create col_errors column

texts_df['col_errors'] = texts_df.bad_cols.apply(len)

In [8]:
# Create kband_non_cols column

B1_non_cols = [('I', 'p'), ('about', 'i'), ('child', 'n'), ('bring', 'v'), ('up', 'r'), ('in', 'i'), ('family', 'n'), ('be', 'v'), ('and', 'c'), ('then', 'r'), ('be', 'v'), ('because', 'c'), ('I', 'p'),('around', 'i'), ('me', 'p'), ('at', 'i'), ('our', 'a'), ('they', 'p'), ('want', 'v'), ('they', 'p'), ('have', 'v'), ('everything', 'p'), ('give', 'v'), ('to', 'i'), ('their', 'a'), ('child', 'n'), ('and', 'c'), ('also', 'r'), ('could', 'v'), ('buy', 'v'), ('thing', 'n'), ('like', 'i'),('but', 'c'), ('their', 'a'), ('behavior', 'n'), ('them', 'p'), ('child', 'n'), ('bring', 'v'), ('up', 'r'), ('by', 'i'), ('wealthy', 'j'), ('parent', 'n'), ('they', 'p'), ('be', 'v'), ('strong', 'j'), ('that', 'd'), ('mean', 'v'), ('they', 'p'), ('can', 'v'), ('be', 'v'), ('of', 'i'), ('be', 'v'), ('adult', 'n'), ('they', 'p'), ('work', 'v'), ('to', 't'), ('that', 'd'), ('could', 'v'), ('buy', 'v'), ('everything', 'p'), ('they', 'p'), ('want', 'v'), ('I', 'p'), ('I', 'p'), ('start', 'v'), ('and', 'c'), ('for', 'i'), ('money', 'n'), ('I', 'p'), ('have', 'v'), ('and', 'c'), ('I', 'p'), ('get', 'v'), ('a', 'a'), ('money', 'n'), ('for', 'i'), ('myself', 'p'), ('however', 'r'), ('my', 'a'), ('to', 't'), ('child', 'n'), ('age', 'n'), ('and', 'c'), ('I', 'p'), ('child', 'n'), ('do', 'v'), ('work', 'n'), ('by', 'i'), ('another', 'd'), ('country', 'n'), ('that', 'c'), ('country', 'n'), ('have', 'v'), ('a', 'a'), ('child', 'n'), ('they', 'p'), ('start', 'v'), ('work', 'n'), ('when', 'c'), ('they', 'p'), ('fifteen', 'm'), ('and', 'c'), ('it', 'p'), ('be', 'v'),('they', 'p'), ('for', 'i'), ('their', 'a'), ('pocket', 'n'), ('money', 'n'), ('and', 'c'), ('be', 'v'), ('good', 'j'), ('they', 'p'), ('buy', 'v'), ('something', 'p'), ('or', 'c'), ('that', 'd'), ('be', 'v'), ('good', 'j'), ('they', 'p'), ('could', 'v'), ('buy', 'v'), ('something', 'p'), ('now', 'r'), ('and', 'c'), ('also', 'r'), ('our', 'a'), ('do', 'v'), ('this', 'd'), ('but', 'c'), ('many', 'd'), ('child', 'n'), ('their', 'a'), ('parent', 'n'), ('which', 'd'), ('person', 'n'), ('get', 'v'), ('a', 'a'), ('pocket', 'n'), ('money', 'n'), ('but', 'c'), ('I', 'p'), ('be', 'v'), ('that', 'd'), ('point', 'n'), ('I', 'p'), ('think', 'v'), ('if', 'c'), ('child', 'n'), ('have', 'v'), ('a', 'a'), ('work', 'n'), ('and', 'c'), ('they', 'p'), ('they', 'p'), ('their', 'a'), ('adult', 'n'), ('life', 'n'), ('after', 'c'), ('they', 'p'), ('must', 'v'), ('be', 'v'), ('parent', 'n')]
B2_non_cols = [('I', 'p'), ('a', 'a'), ('such', 'i'), ('as', 'i'), ('the', 'a'), ('discipline', 'n'), ('they', 'p'), ('be', 'v'), ('use', 'v'), ('to', 't'), ('be', 'v'), ('where', 'c'), ('money', 'n'), ('do', 'v'), ('not', 'x'), ('compare', 'v'), ('to', 'i'), ('this', 'd'), ('a', 'a'), ('child', 'n'), ('who', 'p'), ('be', 'v'), ('use', 'v'), ('to', 't'),('whenever', 'r'), ('they', 'p'), ('want', 'v'), ('something', 'p'), ('the', 'a'), ('money', 'n'), ('be', 'v'), ('to', 'i'), ('them', 'p'), ('adulthood', 'n'), ('they', 'p'), ('watch', 'v'), ('their', 'a'), ('parent', 'n'), ('every','a'),('day', 'n'), ('just', 'r'), ('to', 't'), ('they', 'p'), ('to', 't'),('they', 'p'), ('too', 'r'), ('have', 'v'), ('work', 'n'), ('hard', 'r'), ('for', 'i'), ('their', 'a'), ('future', 'n'), ('a', 'a'), ('child', 'n'), ('that', 'c'), ('come', 'v'), ('from', 'i'), ('a', 'a'), ('wealthy', 'j'), ('family', 'n'), ('do', 'v'), ('not', 'x'), ('always', 'r'), ('have', 'v'), ('the', 'a'), ('advantage', 'n'), ('this', 'd'), ('be', 'v'), ('because', 'c'), ('their', 'a'), ('eye', 'n'), ('be', 'v'), ('that', 'c'), ('their', 'a'), ('parent', 'n'), ('have', 'v'), ('they', 'p'), ('also', 'r'), ('have', 'v'), ('a', 'a'), ('disadvantage', 'n'), ('of', 'i'), ('a', 'a'), ('commonly', 'r'), ('wealthy', 'j'), ('parent', 'n'), ('so', 'c'), ('they', 'p'), ('buy', 'v'), ('them', 'p'), ('car', 'n'), ('toy', 'n'), ('but', 'c'), ('they', 'p'), ('the', 'a'), ('a', 'a'), ('child', 'n'), ('be', 'v'), ('that', 'c'), ('they', 'p'), ('will', 'v'), ('grow', 'v'), ('up', 'r'), ('and', 'c'), ('think', 'v'), ('that', 'd'), ('money', 'n'), ('be', 'v'), ('everything', 'p'), ('that', 'c'), ('the', 'a'), ('be', 'v'), ('money', 'n'), ('the', 'a'), ('obstacle', 'n'), ('be', 'v'), ('they', 'p'), ('do', 'v'), ('not', 'x'), ('know', 'v'), ('how', 'r'), ('to', 't'), ('get', 'v'), ('it', 'p'), ('they', 'p'), ('have', 'v'), ('be', 'v'), ('spoil', 'v'), ('all', 'd'), ('the', 'a'), ('time', 'n'), ('so', 'r'), ('do', 'v'), ('not', 'x'), ('to', 'i'), ('discover', 'v'), ('money', 'n'), ('make', 'v'), ('only', 'j'), ('money', 'n'), ('spending', 'n'), ('child', 'n'), ('from', 'i'), ('will', 'v'), ('for', 'i'), ('money', 'n'), ('they', 'p'), ('do', 'v'), ('know', 'v'), ('how', 'r'), ('to', 't'), ('of', 'i'), ('adult', 'n'), ('because', 'c'), ('they', 'p'), ('have', 'v'), ('be', 'v'), ('observe', 'v'), ('since', 'c'), ('they', 'p'), ('be', 'v'), ('a', 'a'), ('child', 'n')]
C1_non_cols = [('I', 'p'), ('do', 'v'), ('that', 'c'), ('child', 'n'), ('bring', 'v'), ('up', 'r'), ('in', 'i'), ('child', 'n'), ('of', 'i'), ('problem', 'n'), ('of', 'i'), ('adult', 'n'), ('life', 'n'), ('on', 'i'), ('a', 'a'), ('and', 'c'), ('these', 'd'), ('child', 'n'), ('begin', 'v'), ('to', 't'), ('see', 'v'), ('in', 'i'), ('their', 'a'), ('their', 'a'), ('parent', 'n'), ('own', 'd'), ('struggle', 'n'), ('to', 'i'), ('them', 'p'), ('these', 'd'), ('child', 'n'), ('be', 'v'), ('as', 'i'), ('a', 'a'), ('adult', 'n'), ('many', 'd'), ('child', 'n'), ('work', 'v'), ('to', 't'), ('either', 'r'), ('collect', 'v'), ('some', 'd'), ('pocket', 'n'), ('money', 'n'), ('or', 'c'), ('even', 'r'),('their', 'a'), ('family', 'n'), ('income', 'n'), ('be', 'v'), ('the', 'a'), ('many', 'd'), ('child', 'n'), ('who', 'p'), ('they', 'p'), ('be', 'v'), ('to', 'i'), ('their', 'a'), ('family', 'n'), ('labor', 'n'), ('or', 'c'), ('income', 'n'), ('child', 'n'), ('of', 'i'), ('poor', 'j'), ('family', 'n'), ('also', 'r'), ('be', 'v'), ('they', 'p'), ('tend', 'v'), ('to', 't'), ('would', 'v'), ('be', 'v'), ('Mister', 'n'), ('bill', 'n'), ('gate', 'n'), ('corporation', 'n'), ('he', 'p'), ('have', 'v'), ('a', 'a'), ('but', 'c'), ('he', 'p'), ('use', 'v'), ('his', 'a'), ('talent', 'n'), ('and', 'c'), ('motivation', 'n'), ('to', 't'), ('set', 'v'), ('up', 'r'), ('the', 'a'), ('world', 'n'), ('large', 'j'), ('however', 'r'), ('there', 'e'), ('be', 'v'), ('some', 'd'), ('problem', 'n'), ('that', 'c'), ('child', 'n'), ('from', 'i'), ('encounter', 'v'), ('many', 'd'), ('of', 'i'), ('these', 'd'), ('child', 'n'), ('who', 'p'), ('be', 'v'), ('while', 'c'), ('work', 'v'), ('may', 'v'), ('they', 'p'), ('often', 'r'), ('this', 'd'), ('however', 'r'), ('be', 'v'), ('a', 'a'), ('small', 'j'), ('group', 'n'), ('child', 'n'), ('with', 'i'), ('be', 'v'), ('able', 'j'), ('to', 't'), ('deal', 'v'), ('with', 'i'), ('problem', 'n'), ('of', 'i'), ('adult', 'n'), ('life', 'n'), ('because', 'i'), ('of', 'i'), ('and', 'c')]

In [9]:
B1_non_cols_kband = [(kband_dict[x],x) for x in B1_non_cols]
B2_non_cols_kband = [(kband_dict[x],x) for x in B2_non_cols]
C1_non_cols_kband = [(kband_dict[x],x) for x in C1_non_cols]

texts_df['kband_non_cols'] = [B1_non_cols_kband,B2_non_cols_kband,C1_non_cols_kband]
texts_df

Unnamed: 0,text_id,text,lemmas_NLTK,lemmas_CLAWS,text_len,MLC,CNC,grammar_errors_per_100,punc_errors_per_100,vocD,AG,bigram_range,mean_MI,absent_prop,mean_tscore,col_errors_per_100,correct_cols_per_100,K10to16_p,K3to9_p,K1to2_p,kband_cols,K1to2_cols,K3to9_cols,K10to16_cols,K1to2_cols_K,K3to9_cols_K,K10to16_cols_K,bad_cols,correct_cols,col_errors,kband_non_cols
0,B1_norm,I disagree that point about children brought u...,"[I, disagree, that, point, about, child, bring...","[(I, p), (disagree, v), (that, d), (point, n),...",250,6.41,0.641,6.0,7.6,48.723,0.379,0.095,2.522,0.985,111.672,8.0,4.8,0.0,0.25,0.75,"[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...",9,3,0,"[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(4, [('entrance', 'n'), ('to', 'i'), ('the', ...",[],"[disagree that point, show that situation, cou...",12,20,"[(1, (I, p)), (1, (about, i)), (1, (child, n))..."
1,B2_norm,I greatly support the idea.\nraised in a certa...,"[I, greatly, support, the, idea, ., raise, in,...","[(I, p), (greatly, r), (support, v), (the, a),...",250,7.314,0.8,2.8,2.4,43.457,0.938,0.171,2.878,0.964,153.301,3.2,8.8,0.0,0.273,0.727,"[(1, [('support', 'v'), ('the', 'a'), ('idea',...",16,6,0,"[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(3, [('come', 'v'), ('from', 'i'), ('a', 'a')...",[],"[raised in (values), psychological values, opp...",22,8,"[(1, (I, p)), (1, (a, a)), (1, (such, i)), (1,..."
2,C1_norm,I do agree to the statement that children brou...,"[I, do, agree, to, the, statement, that, child...","[(I, p), (do, v), (agree, v), (to, i), (the, a...",250,11.591,2.045,0.8,3.2,70.776,1.455,0.117,3.229,0.927,92.031,2.0,13.2,0.091,0.364,0.545,"[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...",18,12,3,"[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...","[(7, [('sacrifice', 'v'), ('luxury', 'n'), ('f...","[(14, [('be', 'v'), ('prematurely', 'r'), ('ex...","[agree to the statement, in the weekends, coll...",33,5,"[(1, (I, p)), (1, (do, v)), (1, (that, c)), (1..."


In [10]:
# Keep only relevant columns

texts_df = texts_df[['text_id','text','lemmas_NLTK','lemmas_CLAWS','correct_cols','col_errors','K1to2_cols',
                     'K3to9_cols','K10to16_cols','kband_cols','K1to2_cols_K','K3to9_cols_K','K10to16_cols_K',
                     'bad_cols','kband_non_cols']]

In [11]:
# Rename columns to match upcoming text_versions notebooks

texts_df = texts_df.rename(columns={'K1to2_cols': 'K1to2',
                                    'K3to9_cols': 'K3to9',
                                    'K10to16_cols': 'K10to16'})
                         
texts_df = texts_df.rename(columns={'K1to2_cols_K': 'K1to2_cols',
                                    'K3to9_cols_K': 'K3to9_cols',
                                    'K10to16_cols_K': 'K10to16_cols',})

In [12]:
texts_df

Unnamed: 0,text_id,text,lemmas_NLTK,lemmas_CLAWS,correct_cols,col_errors,K1to2,K3to9,K10to16,kband_cols,K1to2_cols,K3to9_cols,K10to16_cols,bad_cols,kband_non_cols
0,B1_norm,I disagree that point about children brought u...,"[I, disagree, that, point, about, child, bring...","[(I, p), (disagree, v), (that, d), (point, n),...",12,20,9,3,0,"[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(4, [('entrance', 'n'), ('to', 'i'), ('the', ...",[],"[disagree that point, show that situation, cou...","[(1, (I, p)), (1, (about, i)), (1, (child, n))..."
1,B2_norm,I greatly support the idea.\nraised in a certa...,"[I, greatly, support, the, idea, ., raise, in,...","[(I, p), (greatly, r), (support, v), (the, a),...",22,8,16,6,0,"[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(3, [('come', 'v'), ('from', 'i'), ('a', 'a')...",[],"[raised in (values), psychological values, opp...","[(1, (I, p)), (1, (a, a)), (1, (such, i)), (1,..."
2,C1_norm,I do agree to the statement that children brou...,"[I, do, agree, to, the, statement, that, child...","[(I, p), (do, v), (agree, v), (to, i), (the, a...",33,5,18,12,3,"[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...","[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...","[(7, [('sacrifice', 'v'), ('luxury', 'n'), ('f...","[(14, [('be', 'v'), ('prematurely', 'r'), ('ex...","[agree to the statement, in the weekends, coll...","[(1, (I, p)), (1, (do, v)), (1, (that, c)), (1..."


## Creating base text B1
Remove two inaccurate collocations without adding any accurate ones.

In [13]:
texts_df.loc[texts_df.text_id == 'B1_norm'].bad_cols.to_list()[0]

['disagree that point', 'show that situation', 'country parents', 'is not effect to', 'from twenty ages', 'social experience', 'age is late', 'work by (children ages)', 'culture about', 'could their money', 'country children', 'accept the money by', 'study at money', 'positive school', 'prepared their life', 'work my country', 'very disagree', 'prepare with many problems', 'for future time', 'had a work']

country children --> country's children

In [14]:
# New B1 base text (text_id = text1)

text1_text = "I disagree that point about children brought up in families are prepared their life and then are good parents. because, I show that situation around me at our country parents. They want they had everything give to their children and also could buying things like positive school. but, their behavior is not good effect to them\nOn the other hand, children brought up by wealthy parents, they are strong, that means they can be prepare with many problems of being adults. They working to have money, that could buy everything they want\nIn my case, I start work from twenty ages I start work My country and work as a journalist for money I had social experience and I got a money for myself. however, My age is late to work by children ages and I heard about children doing work by another countries that countries had a culture about children They start work when they fifteen, and it is very young. They doing work for their pocket money and is good.\nthey could their money buy something or entrance to the bank. that is good they could buy something. now and for future time. also, our country's children do this. but, many children accept the money by their parents. which persons got a pocket money over the twenty ages. but I am very disagree that point. I think, if children had to work and they study at money, they perfectly prepared their adult life after they must be parents."

In [15]:
# New B1 base text lemmas_NLTK

text1_lemmasNLTK = ['I', 'disagree', 'that', 'point', 'about', 'child', 'bring', 'up', 'in', 'family', 'be', 'prepare', 'their', 'life', 'and', 'then', 'be', 'good', 'parent', '.', 'because', ',', 'I', 'show', 'that', 'situation', 'around', 'me', 'at', 'our', 'country', 'parent', '.', 'They', 'want', 'they', 'have', 'everything', 'give', 'to', 'their', 'child', 'and', 'also', 'could', 'buy', 'thing', 'like', 'positive', 'school', '.', 'but', ',', 'their', 'behavior', 'be', 'not', 'good', 'effect', 'to', 'them', 'on', 'the', 'other', 'hand', ',', 'child', 'bring', 'up', 'by', 'wealthy', 'parent', ',', 'they', 'be', 'strong', ',', 'that', 'mean', 'they', 'can', 'be', 'prepare', 'with', 'many', 'problem', 'of', 'be', 'adult', '.', 'They', 'work', 'to', 'have', 'money', ',', 'that', 'could', 'buy', 'everything', 'they', 'want', 'in', 'my', 'case', ',', 'I', 'start', 'work', 'from', 'twenty', 'age', 'I', 'start', 'work', 'my', 'country', 'and', 'work', 'as', 'a', 'journalist', 'for', 'money', 'I', 'have', 'social', 'experience', 'and', 'I', 'get', 'a', 'money', 'for', 'myself', '.', 'however', ',', 'my', 'age', 'be', 'late', 'to', 'work', 'by', 'child', 'age', 'and', 'I', 'heard', 'about', 'child', 'do', 'work', 'by', 'another', 'country', 'that', 'country', 'have', 'a', 'culture', 'about', 'child', 'They', 'start', 'work', 'when', 'they', 'fifteen', ',', 'and', 'it', 'be', 'very', 'young', '.', 'They', 'do', 'work', 'for', 'their', 'pocket', 'money', 'and', 'be', 'good', '.', 'they', 'could', 'their', 'money', 'buy', 'something', 'or', 'entrance', 'to', 'the', 'bank', '.', 'that', 'be', 'good', 'they', 'could', 'buy', 'something', '.', 'now', 'and', 'for', 'future', 'time', '.', 'also', ',', 'our', "country's", 'child', 'do', 'this', '.', 'but', ',', 'many', 'child', 'accept', 'the', 'money', 'by', 'their', 'parent', '.', 'which', 'person', 'get', 'a', 'pocket', 'money', 'over', 'the', 'twenty', 'age', '.', 'but', 'I', 'be', 'very', 'disagree', 'that', 'point', '.', 'I', 'think', ',', 'if', 'child', 'have', 'to', 'work', 'and', 'they', 'study', 'at', 'money', ',', 'they', 'perfectly', 'prepare', 'their', 'adult', 'life', 'after', 'they', 'must', 'be', 'parent', '.']

In [16]:
# New B1 base text lemmas_CLAWS

text1_lemmasCLAWS = [('I', 'p'), ('disagree', 'v'), ('that', 'd'), ('point', 'n'), ('about', 'i'), ('child', 'n'), ('bring', 'v'), ('up', 'r'), ('in', 'i'), ('family', 'n'), ('be', 'v'), ('prepare', 'v'), ('their', 'a'), ('life', 'n'), ('and', 'c'), ('then', 'r'), ('be', 'v'), ('good', 'j'), ('parent', 'n'), ('because', 'c'), ('I', 'p'), ('show', 'v'), ('that', 'd'), ('situation', 'n'), ('around', 'i'), ('me', 'p'), ('at', 'i'), ('our', 'a'), ('country', 'n'), ('parent', 'n'), ('they', 'p'), ('want', 'v'), ('they', 'p'), ('have', 'v'), ('everything', 'p'), ('give', 'v'), ('to', 'i'), ('their', 'a'), ('child', 'n'), ('and', 'c'), ('also', 'r'), ('could', 'v'), ('buy', 'v'), ('thing', 'n'), ('like', 'i'), ('positive', 'j'), ('school', 'n'), ('but', 'c'), ('their', 'a'), ('behavior', 'n'), ('be', 'v'), ('not', 'x'), ('good', 'j'), ('effect', 'n'), ('to', 'i'), ('them', 'a'), ('on', 'i'), ('the', 'a'), ('other', 'j'), ('hand', 'n'), ('child', 'n'), ('bring', 'v'), ('up', 'r'), ('by', 'i'), ('wealthy', 'j'), ('parent', 'n'), ('they', 'p'), ('be', 'v'), ('strong', 'j'), ('that', 'd'), ('mean', 'v'), ('they', 'p'), ('can', 'v'), ('be', 'v'), ('prepare', 'v'), ('with', 'i'), ('many', 'd'), ('problem', 'n'), ('of', 'i'), ('be', 'v'), ('adult', 'n'), ('they', 'p'), ('work', 'v'), ('to', 't'), ('have', 'v'), ('money', 'n'), ('that', 'd'), ('could', 'v'), ('buy', 'v'), ('everything', 'p'), ('they', 'p'), ('want', 'v'), ('in', 'i'), ('my', 'a'), ('case', 'n'), ('I', 'p'), ('start', 'v'), ('work', 'n'), ('from', 'i'), ('twenty', 'm'), ('age', 'n'), ('I', 'p'), ('start', 'v'), ('work', 'v'), ('my', 'a'), ('country', 'n'), ('and', 'c'), ('work', 'v'), ('as', 'i'), ('a', 'a'), ('journalist', 'n'), ('for', 'i'), ('money', 'n'), ('I', 'p'), ('have', 'v'), ('social', 'j'), ('experience', 'n'), ('and', 'c'), ('I', 'p'), ('get', 'v'), ('a', 'a'), ('money', 'n'), ('for', 'i'), ('myself', 'p'), ('however', 'r'), ('my', 'a'), ('age', 'n'), ('be', 'v'), ('late', 'j'), ('to', 't'), ('work', 'v'), ('by', 'i'), ('child', 'n'), ('age', 'n'), ('and', 'c'), ('I', 'p'), ('hear', 'v'), ('about', 'i'), ('child', 'n'), ('do', 'v'), ('work', 'n'), ('by', 'i'), ('another', 'd'), ('country', 'n'), ('that', 'c'), ('country', 'n'), ('have', 'v'), ('a', 'a'), ('culture', 'n'), ('about', 'i'), ('child', 'n'), ('they', 'p'), ('start', 'v'), ('work', 'n'), ('when', 'c'), ('they', 'p'), ('fifteen', 'm'), ('and', 'c'), ('it', 'p'), ('be', 'v'), ('very', 'r'), ('young', 'j'), ('they', 'p'), ('do', 'v'), ('work', 'n'), ('for', 'i'), ('their', 'a'), ('pocket', 'n'), ('money', 'n'), ('and', 'c'), ('be', 'v'), ('good', 'j'), ('they', 'p'), ('could', 'v'), ('their', 'a'), ('money', 'n'), ('buy', 'v'), ('something', 'p'), ('or', 'c'), ('entrance', 'n'), ('to', 'i'), ('the', 'a'), ('bank', 'n'), ('that', 'd'), ('be', 'v'), ('good', 'j'), ('they', 'p'), ('could', 'v'), ('buy', 'v'), ('something', 'p'), ('now', 'r'), ('and', 'c'), ('for', 'i'), ('future', 'j'), ('time', 'n'), ('also', 'r'), ('our', 'a'), ("country's", 'd'), ('child', 'n'), ('do', 'v'), ('this', 'd'), ('but', 'c'), ('many', 'd'), ('child', 'n'), ('accept', 'v'), ('the', 'a'), ('money', 'n'), ('by', 'i'), ('their', 'a'), ('parent', 'n'), ('which', 'd'), ('person', 'n'), ('get', 'v'), ('a', 'a'), ('pocket', 'n'), ('money', 'n'), ('over', 'i'), ('the', 'a'), ('twenty', 'm'), ('age', 'n'), ('but', 'c'), ('I', 'p'), ('be', 'v'), ('very', 'r'), ('disagree', 'v'), ('that', 'd'), ('point', 'n'), ('I', 'p'), ('think', 'v'), ('if', 'c'), ('child', 'n'), ('have', 'v'), ('to', 't'), ('work', 'n'), ('and', 'c'), ('they', 'p'), ('study', 'v'), ('at', 'i'), ('money', 'n'), ('they', 'p'), ('perfectly', 'r'), ('prepare', 'v'), ('their', 'a'), ('adult', 'n'), ('life', 'n'), ('after', 'c'), ('they', 'p'), ('must', 'v'), ('be', 'v'), ('parent', 'n')]

In [17]:
# New B1 base bad cols

text1_bad_cols = ['disagree that point', 'show that situation', 'country parents', 'is not effect to', 'from twenty ages', 'social experience', 'age is late', 'work by (children ages)', 'culture about', 'could their money', 'accept the money by', 'study at money', 'positive school', 'prepared their life', 'work my country', 'very disagree', 'prepare with many problems', 'for future time']

In [18]:
# New B1 base col columns

text1_kband_cols = texts_df.loc[texts_df.text_id == 'B1_norm'].kband_cols.to_list()[0]
text1_K1to2_cols = texts_df.loc[texts_df.text_id == 'B1_norm'].K1to2_cols.to_list()[0]
text1_K3to9_cols = texts_df.loc[texts_df.text_id == 'B1_norm'].K3to9_cols.to_list()[0]
text1_K10to16_cols = texts_df.loc[texts_df.text_id == 'B1_norm'].K10to16_cols.to_list()[0]
text1_kband_non_cols = texts_df.loc[texts_df.text_id == 'B1_norm'].kband_non_cols.to_list()[0]

In [19]:
# Create new row for B1 base

text1 = {'text_id':'text1','text':text1_text,'lemmas_NLTK':text1_lemmasNLTK,'lemmas_CLAWS':text1_lemmasCLAWS,
        'correct_cols':len(text1_kband_cols),'col_errors':len(text1_bad_cols),
        'K1to2':len(text1_K1to2_cols),'K3to9':len(text1_K3to9_cols),'K10to16':len(text1_K10to16_cols),
        'kband_cols':text1_kband_cols,'K1to2_cols':text1_K1to2_cols,'K3to9_cols':text1_K3to9_cols,'K10to16_cols':text1_K10to16_cols,
        'bad_cols':text1_bad_cols,'kband_non_cols':text1_kband_non_cols}

texts_df = texts_df.append(text1, ignore_index=True)

  texts_df = texts_df.append(text1, ignore_index=True)


In [20]:
texts_df

Unnamed: 0,text_id,text,lemmas_NLTK,lemmas_CLAWS,correct_cols,col_errors,K1to2,K3to9,K10to16,kband_cols,K1to2_cols,K3to9_cols,K10to16_cols,bad_cols,kband_non_cols
0,B1_norm,I disagree that point about children brought u...,"[I, disagree, that, point, about, child, bring...","[(I, p), (disagree, v), (that, d), (point, n),...",12,20,9,3,0,"[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(4, [('entrance', 'n'), ('to', 'i'), ('the', ...",[],"[disagree that point, show that situation, cou...","[(1, (I, p)), (1, (about, i)), (1, (child, n))..."
1,B2_norm,I greatly support the idea.\nraised in a certa...,"[I, greatly, support, the, idea, ., raise, in,...","[(I, p), (greatly, r), (support, v), (the, a),...",22,8,16,6,0,"[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(3, [('come', 'v'), ('from', 'i'), ('a', 'a')...",[],"[raised in (values), psychological values, opp...","[(1, (I, p)), (1, (a, a)), (1, (such, i)), (1,..."
2,C1_norm,I do agree to the statement that children brou...,"[I, do, agree, to, the, statement, that, child...","[(I, p), (do, v), (agree, v), (to, i), (the, a...",33,5,18,12,3,"[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...","[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...","[(7, [('sacrifice', 'v'), ('luxury', 'n'), ('f...","[(14, [('be', 'v'), ('prematurely', 'r'), ('ex...","[agree to the statement, in the weekends, coll...","[(1, (I, p)), (1, (do, v)), (1, (that, c)), (1..."
3,text1,I disagree that point about children brought u...,"[I, disagree, that, point, about, child, bring...","[(I, p), (disagree, v), (that, d), (point, n),...",12,18,9,3,0,"[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(4, [('entrance', 'n'), ('to', 'i'), ('the', ...",[],"[disagree that point, show that situation, cou...","[(1, (I, p)), (1, (about, i)), (1, (child, n))..."


## Creating base text B2
Add four inaccurate collocations without removing any accurate ones.

1. usually the money is gave --> the money is easily gave (original bad col)
2. love from the family --> a family love life (original bad col)
3. problems of adults --> adult life problems (original bad col)
4. all the time --> in every time (new bad col, all K1)

In [21]:
# New B2 base text (text_id = text11)

text11_text = 'I greatly support the idea.\nraised in a certain psychological values. Such as the value of hard work, discipline, they are used to be in the condition where money does not come easily. Oppose to it, a child who comes from a wealthy family is used to have money all the time. Whenever they want something, the money is easily gave to them.\nwell-trained to face adulthood. They watched their parent every day worked very hard just to put food in the table. They have the advantage to see the reality and embrace it, set their mind that they too have work hard for their future. A child that came from a wealthy family does not always have the advantage. This is because their eyes are blinded by the power of money, that their parent has. They also have a disadvantage of a family love life. Commonly wealthy parents express love by money, so they bought them cars, expensive clothes, toys, but they are never home. The impact to a child is that they will grow up and think that money is everything, that the source of happiness is money. The obstacle is they do not know how to get it, they have been spoiled in every time, so does not have the time to discovered the art of money making, only money spending. On the contrary, children from  will grow up with the sense of respect for money, they do know how to face adult life problems because they have been observing since they were a child.'

In [22]:
# New B2 base text lemmas_NLTK

text11_lemmasNLTK = ['I', 'greatly', 'support', 'the', 'idea', '.', 'raise', 'in', 'a', 'certain', 'psychological', 'value', '.', 'such', 'as', 'the', 'value', 'of', 'hard', 'work', ',', 'discipline', ',', 'they', 'be', 'use', 'to', 'be', 'in', 'the', 'condition', 'where', 'money', 'do', 'not', 'come', 'easily', '.', 'oppose', 'to', 'this', ',', 'a', 'child', 'who', 'come', 'from', 'a', 'wealthy', 'family', 'be', 'use', 'to', 'have', 'money', 'all', 'the', 'time', '.', 'Whenever', 'they', 'want', 'something', ',', 'the', 'money', 'be', 'easily', 'give', 'to', 'them', '.', 'well-trained', 'to', 'face', 'adulthood', '.', 'They', 'watch', 'their', 'parent', 'every day', 'work', 'very', 'hard', 'just', 'to', 'put', 'food', 'in', 'the', 'table', '.', 'They', 'have', 'the', 'advantage', 'to', 'see', 'the', 'reality', 'and', 'embrace', 'it', ',', 'set', 'their', 'mind', 'that', 'they', 'too', 'have', 'work', 'hard', 'for', 'their', 'future', '.', 'A', 'child', 'that', 'come', 'from', 'a', 'wealthy', 'family', 'do', 'not', 'always', 'have', 'the', 'advantage', '.', 'this', 'be', 'because', 'their', 'eye', 'be', 'blind', 'by', 'the', 'power', 'of', 'money', ',', 'that', 'their', 'parent', 'have', '.', 'They', 'also', 'have', 'a', 'disadvantage', 'of', 'a', 'family', 'love', 'life', '.', 'commonly', 'wealthy', 'parent', 'express', 'love', 'by', 'money', ',', 'so', 'they', 'buy', 'them', 'car', ',', 'expensive', 'clothes', ',', 'toy', ',', 'but', 'they', 'be', 'never', 'home', '.', 'the', 'impact', 'to', 'a', 'child', 'be', 'that', 'they', 'will', 'grow', 'up', 'and', 'think', 'that', 'money', 'be', 'everything', ',', 'that', 'the', 'source', 'of', 'happiness', 'be', 'money', '.', 'the', 'obstacle', 'be', 'they', 'do', 'not', 'know', 'how', 'to', 'get', 'it', ',', 'they', 'have', 'be', 'spoil', 'in', 'every', 'time', ',', 'so', 'do', 'not', 'have', 'the','time', 'to', 'discovered', 'the', 'art', 'of', 'money', 'making', ',', 'only', 'money', 'spending', '.', 'on', 'the', 'contrary', ',', 'child', 'from', 'will', 'grow', 'up', 'with', 'the', 'sense', 'of', 'respect', 'for', 'money', ',', 'they', 'do', 'know', 'how', 'to', 'face', 'adult', 'life', 'problem', 'because', 'they', 'have', 'be', 'observe', 'since', 'they', 'be', 'a', 'child', '.']

In [23]:
# New B2 base text lemmas_CLAWS

text11_lemmasCLAWS = [('I', 'p'), ('greatly', 'r'), ('support', 'v'), ('the', 'a'), ('idea', 'n'), ('raise', 'v'), ('in', 'i'), ('a', 'a'), ('certain', 'j'), ('psychological', 'j'), ('value', 'n'), ('such', 'i'), ('as', 'i'), ('the', 'a'), ('value', 'n'), ('of', 'i'), ('hard', 'j'), ('work', 'n'), ('discipline', 'n'), ('they', 'p'), ('be', 'v'), ('use', 'v'), ('to', 't'), ('be', 'v'), ('in', 'i'), ('the', 'a'), ('condition', 'n'), ('where', 'c'), ('money', 'n'), ('do', 'v'), ('not', 'x'), ('come', 'v'), ('easily', 'r'), ('oppose', 'v'), ('to', 'i'), ('it', 'p'), ('a', 'a'), ('child', 'n'), ('who', 'p'), ('come', 'v'), ('from', 'i'), ('a', 'a'), ('wealthy', 'j'), ('family', 'n'), ('be', 'v'), ('use', 'v'), ('to', 't'), ('have', 'v'), ('money', 'n'), ('all', 'd'), ('the', 'a'), ('time', 'n'), ('whenever', 'r'), ('they', 'p'), ('want', 'v'), ('something', 'p'), ('the', 'a'), ('money', 'n'), ('be', 'v'), ('easily', 'r'), ('give', 'v'), ('to', 'i'), ('them', 'p'), ('well-trained', 'j'), ('to', 't'), ('face', 'v'), ('adulthood', 'n'), ('they', 'p'), ('watch', 'v'), ('their', 'a'), ('parent', 'n'), ('every','a'),('day', 'n'), ('work', 'v'), ('very', 'r'), ('hard', 'r'), ('just', 'r'), ('to', 't'), ('put', 'v'), ('food', 'n'), ('in', 'i'), ('the', 'a'), ('table', 'n'), ('they', 'p'), ('have', 'v'), ('the', 'a'), ('advantage', 'n'), ('to', 't'), ('see', 'v'), ('the', 'a'), ('reality', 'n'), ('and', 'c'), ('embrace', 'v'), ('it', 'p'), ('set', 'v'), ('their', 'a'), ('mind', 'n'), ('that', 'c'), ('they', 'p'), ('too', 'r'), ('have', 'v'), ('work', 'n'), ('hard', 'r'), ('for', 'i'), ('their', 'a'), ('future', 'n'), ('a', 'a'), ('child', 'n'), ('that', 'c'), ('come', 'v'), ('from', 'i'), ('a', 'a'), ('wealthy', 'j'), ('family', 'n'), ('do', 'v'), ('not', 'x'), ('always', 'r'), ('have', 'v'), ('the', 'a'), ('advantage', 'n'), ('this', 'd'), ('be', 'v'), ('because', 'c'), ('their', 'a'), ('eye', 'n'), ('be', 'v'), ('blind', 'v'), ('by', 'i'), ('the', 'a'), ('power', 'n'), ('of', 'i'), ('money', 'n'), ('that', 'c'), ('their', 'a'), ('parent', 'n'), ('have', 'v'), ('they', 'p'), ('also', 'r'), ('have', 'v'), ('a', 'a'), ('disadvantage', 'n'), ('of', 'i'), ('a', 'a'), ('family', 'n'), ('love', 'n'), ('life', 'n'), ('commonly', 'r'), ('wealthy', 'j'), ('parent', 'n'), ('express', 'v'), ('love', 'n'), ('by', 'i'), ('money', 'n'), ('so', 'c'), ('they', 'p'), ('buy', 'v'), ('them', 'p'), ('car', 'n'), ('expensive', 'j'), ('clothes', 'n'), ('toy', 'n'), ('but', 'c'), ('they', 'p'), ('be', 'v'), ('never', 'r'), ('home', 'r'), ('the', 'a'), ('impact', 'n'), ('to', 'i'), ('a', 'a'), ('child', 'n'), ('be', 'v'), ('that', 'c'), ('they', 'p'), ('will', 'v'), ('grow', 'v'), ('up', 'r'), ('and', 'c'), ('think', 'v'), ('that', 'd'), ('money', 'n'), ('be', 'v'), ('everything', 'p'), ('that', 'c'), ('the', 'a'), ('source', 'n'), ('of', 'i'), ('happiness', 'n'), ('be', 'v'), ('money', 'n'), ('the', 'a'), ('obstacle', 'n'), ('be', 'v'), ('they', 'p'), ('do', 'v'), ('not', 'x'), ('know', 'v'), ('how', 'r'), ('to', 't'), ('get', 'v'), ('it', 'p'), ('they', 'p'), ('have', 'v'), ('be', 'v'), ('spoil', 'v'), ('in', 'i'), ('every', 'a'), ('time', 'n'), ('so', 'r'), ('do', 'v'), ('not', 'x'), ('have', 'v'), ('the','a'),('time', 'n'), ('to', 'i'), ('discover', 'v'), ('the', 'a'), ('art', 'n'), ('of', 'i'), ('money', 'n'), ('make', 'v'), ('only', 'j'), ('money', 'n'), ('spending', 'n'), ('on', 'i'), ('the', 'a'), ('contrary', 'n'), ('child', 'n'), ('from', 'i'), ('will', 'v'), ('grow', 'v'), ('up', 'r'), ('with', 'i'), ('the', 'a'), ('sense', 'n'), ('of', 'i'), ('respect', 'n'), ('for', 'i'), ('money', 'n'), ('they', 'p'), ('do', 'v'), ('know', 'v'), ('how', 'r'), ('to', 't'), ('face', 'v'), ('adult', 'n'), ('life', 'n'), ('problem', 'n'), ('because', 'c'), ('they', 'p'), ('have', 'v'), ('be', 'v'), ('observe', 'v'), ('since', 'c'), ('they', 'p'), ('be', 'v'), ('a', 'a'), ('child', 'n')]

In [24]:
# New B2 base col columns

text11_kband_cols = texts_df.loc[texts_df.text_id == 'B2_norm'].kband_cols.to_list()[0]
text11_K1to2_cols = texts_df.loc[texts_df.text_id == 'B2_norm'].K1to2_cols.to_list()[0]
text11_K3to9_cols = texts_df.loc[texts_df.text_id == 'B2_norm'].K3to9_cols.to_list()[0]
text11_K10to16_cols = texts_df.loc[texts_df.text_id == 'B2_norm'].K10to16_cols.to_list()[0]

In [25]:
# New B2 base bad cols

text11_bad_cols = ['raise in (values)', 'psychological values', 'well-trained to face adulthood', 'oppose to it', 'money is easily gave','put food in the table', 'set their mind that', 'family love life','express love by money', 'impact to','in every time','adult life problems']
len(text11_bad_cols) # Should be 12

12

In [26]:
# New B2 base non cols

text11_kband_non_cols = [(1, ('I', 'p')), (1, ('a', 'a')), (1, ('such', 'i')), (1, ('as', 'i')), (1, ('the', 'a')), (3, ('discipline', 'n')), (1, ('they', 'p')), (1, ('be', 'v')), (1, ('use', 'v')), (1, ('to', 't')), (1, ('be', 'v')), (1, ('where', 'c')), (1, ('money', 'n')), (1, ('do', 'v')), (1, ('not', 'x')), (1, ('compare', 'v')), (1, ('to', 'i')), (1, ('this', 'd')), (1, ('a', 'a')), (1, ('child', 'n')), (1, ('who', 'p')), (1, ('be', 'v')), (1, ('use', 'v')), (1, ('to', 't')), (3, ('whenever', 'r')), (1, ('they', 'p')), (1, ('want', 'v')), (1, ('something', 'p')), (1, ('the', 'a')), (1, ('money', 'n')), (1, ('be', 'v')), (1, ('to', 'i')), (1, ('them', 'p')), (9, ('adulthood', 'n')), (1, ('they', 'p')), (1, ('watch', 'v')), (1, ('their', 'a')), (1, ('parent', 'n')), (1, ('every','a')),(1,('day', 'n')), (1, ('just', 'r')), (1, ('to', 't')), (1, ('they', 'p')), (1, ('to', 't')), (1, ('they', 'p')), (1, ('too', 'r')), (1, ('have', 'v')), (1, ('work', 'n')), (1, ('hard', 'r')), (1, ('for', 'i')), (1, ('their', 'a')), (1, ('future', 'n')), (1, ('a', 'a')), (1, ('child', 'n')), (1, ('that', 'c')), (1, ('come', 'v')), (1, ('from', 'i')), (1, ('a', 'a')), (3, ('wealthy', 'j')), (1, ('family', 'n')), (1, ('do', 'v')), (1, ('not', 'x')), (1, ('always', 'r')), (1, ('have', 'v')), (1, ('the', 'a')), (2, ('advantage', 'n')), (1, ('this', 'd')), (1, ('be', 'v')), (1, ('because', 'c')), (1, ('their', 'a')), (1, ('eye', 'n')), (1, ('be', 'v')), (1, ('that', 'c')), (1, ('their', 'a')), (1, ('parent', 'n')), (1, ('have', 'v')), (1, ('they', 'p')), (1, ('also', 'r')), (1, ('have', 'v')), (1, ('a', 'a')), (7, ('disadvantage', 'n')), (1, ('of', 'i')), (1, ('a', 'a')), (4, ('commonly', 'r')), (3, ('wealthy', 'j')), (1, ('parent', 'n')), (1, ('so', 'c')), (1, ('they', 'p')), (1, ('buy', 'v')), (1, ('them', 'p')), (1, ('car', 'n')), (3, ('toy', 'n')), (1, ('but', 'c')), (1, ('they', 'p')), (1, ('the', 'a')), (1, ('a', 'a')), (1, ('child', 'n')), (1, ('be', 'v')), (1, ('that', 'c')), (1, ('they', 'p')), (1, ('will', 'v')), (1, ('grow', 'v')), (1, ('up', 'r')), (1, ('and', 'c')), (1, ('think', 'v')), (1, ('that', 'd')), (1, ('money', 'n')), (1, ('be', 'v')), (1, ('everything', 'p')), (1, ('that', 'c')), (1, ('the', 'a')), (1, ('be', 'v')), (1, ('money', 'n')), (1, ('the', 'a')), (5, ('obstacle', 'n')), (1, ('be', 'v')), (1, ('they', 'p')), (1, ('do', 'v')), (1, ('not', 'x')), (1, ('know', 'v')), (1, ('how', 'r')), (1, ('to', 't')), (1, ('get', 'v')), (1, ('it', 'p')), (1, ('they', 'p')), (1, ('have', 'v')), (1, ('be', 'v')), (7, ('spoil', 'v')), (1, ('all', 'd')), (1, ('the', 'a')), (1, ('time', 'n')), (1, ('so', 'r')), (1, ('do', 'v')), (1, ('not', 'x')), (1, ('to', 'i')), (2, ('discover', 'v')), (1, ('money', 'n')), (1, ('make', 'v')), (1, ('only', 'j')), (1, ('money', 'n')), (3, ('spending', 'n')), (1, ('child', 'n')), (1, ('from', 'i')), (1, ('will', 'v')), (1, ('for', 'i')), (1, ('money', 'n')), (1, ('they', 'p')), (1, ('do', 'v')), (1, ('know', 'v')), (1, ('how', 'r')), (1, ('to', 't')), (1, ('because', 'c')), (1, ('they', 'p')), (1, ('have', 'v')), (1, ('be', 'v')), (2, ('observe', 'v')), (1, ('since', 'c')), (1, ('they', 'p')), (1, ('be', 'v')), (1, ('a', 'a')), (1, ('child', 'n'))]

In [27]:
# Create new row for B2 base

text11 = {'text_id':'text11','text':text11_text,'lemmas_NLTK':text11_lemmasNLTK,'lemmas_CLAWS':text11_lemmasCLAWS,
        'correct_cols':len(text11_kband_cols),'col_errors':len(text11_bad_cols),
        'K1to2':len(text11_K1to2_cols),'K3to9':len(text11_K3to9_cols),'K10to16':len(text11_K10to16_cols),
        'kband_cols':text11_kband_cols,'K1to2_cols':text11_K1to2_cols,'K3to9_cols':text11_K3to9_cols,'K10to16_cols':text11_K10to16_cols,
        'bad_cols':text11_bad_cols,'kband_non_cols':text11_kband_non_cols}

texts_df = texts_df.append(text11, ignore_index=True)

  texts_df = texts_df.append(text11, ignore_index=True)


In [28]:
texts_df

Unnamed: 0,text_id,text,lemmas_NLTK,lemmas_CLAWS,correct_cols,col_errors,K1to2,K3to9,K10to16,kband_cols,K1to2_cols,K3to9_cols,K10to16_cols,bad_cols,kband_non_cols
0,B1_norm,I disagree that point about children brought u...,"[I, disagree, that, point, about, child, bring...","[(I, p), (disagree, v), (that, d), (point, n),...",12,20,9,3,0,"[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(4, [('entrance', 'n'), ('to', 'i'), ('the', ...",[],"[disagree that point, show that situation, cou...","[(1, (I, p)), (1, (about, i)), (1, (child, n))..."
1,B2_norm,I greatly support the idea.\nraised in a certa...,"[I, greatly, support, the, idea, ., raise, in,...","[(I, p), (greatly, r), (support, v), (the, a),...",22,8,16,6,0,"[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(3, [('come', 'v'), ('from', 'i'), ('a', 'a')...",[],"[raised in (values), psychological values, opp...","[(1, (I, p)), (1, (a, a)), (1, (such, i)), (1,..."
2,C1_norm,I do agree to the statement that children brou...,"[I, do, agree, to, the, statement, that, child...","[(I, p), (do, v), (agree, v), (to, i), (the, a...",33,5,18,12,3,"[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...","[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...","[(7, [('sacrifice', 'v'), ('luxury', 'n'), ('f...","[(14, [('be', 'v'), ('prematurely', 'r'), ('ex...","[agree to the statement, in the weekends, coll...","[(1, (I, p)), (1, (do, v)), (1, (that, c)), (1..."
3,text1,I disagree that point about children brought u...,"[I, disagree, that, point, about, child, bring...","[(I, p), (disagree, v), (that, d), (point, n),...",12,18,9,3,0,"[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(4, [('entrance', 'n'), ('to', 'i'), ('the', ...",[],"[disagree that point, show that situation, cou...","[(1, (I, p)), (1, (about, i)), (1, (child, n))..."
4,text11,I greatly support the idea.\nraised in a certa...,"[I, greatly, support, the, idea, ., raise, in,...","[(I, p), (greatly, r), (support, v), (the, a),...",22,12,16,6,0,"[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(3, [('come', 'v'), ('from', 'i'), ('a', 'a')...",[],"[raise in (values), psychological values, well...","[(1, (I, p)), (1, (a, a)), (1, (such, i)), (1,..."


## Creating base text C1
Add one inaccurate collocation and remove one accurate collocation.

1. direct contribution -> straight contribution

In [29]:
kband_dict[('direct','j')]
kband_dict[('straight','j')]
kband_dict[('contribution','n')]

2

3

3

In [30]:
# New C1 base text (text_id = text21)

text21_text = "I do agree to the statement that children brought up in poor families.\nChildren of poor parents are prematurely exposed to problems of adult life e.g. learning to survive on a low family income and sacrificing luxuries for essential items. These children began to see the 'realities' of life in their home or social environment. Their parents own struggles serve as an example to them.\nThese children are taught necessary skills for survival as an adult from a very early age. Many children eg work in the weekends to either collect some pocket money or even contribute to their families' income. A good example is the many children who accompany their parents to sell produce at the market. They are making a straight contribution to their families in terms of labor or income.\nChildren of poor families also are highly motivated. They tend to set high goals to improve their economic and social situation. A relevant example would be Mister Bill Gates (founder of Microsoft Corporation) He had an impoverished background but he used his talent and motivation to set up the worlds largest computer organization.\nHowever, there are some problems that children from poor backgrounds encounter. Many of these children who are 'robbed' of their childhood eg while working, may feel cheated. They often turn to crime. This however, is a small group.\nIn summing up, children with impoverished backgrounds are able to deal with problems of adult life because of early exposure, family role models and sheer motivation."

In [31]:
# New C1 base text lemmas_NLTK

text21_lemmasNLTK = ['I', 'do', 'agree', 'to', 'the', 'statement', 'that', 'child', 'bring', 'up', 'in', 'poor', 'family', '.', 'child', 'of', 'poor', 'parent', 'be', 'prematurely', 'expose', 'to', 'problem', 'of', 'adult', 'life', 'e.g', '.', 'learn', 'to', 'survive', 'on', 'a', 'low', 'family', 'income', 'and', 'sacrifice', 'luxury', 'for', 'essential', 'item', '.', 'these', 'child', 'begin', 'to', 'see', 'the', "'", 'reality', "'", 'of', 'life', 'in', 'their', 'home', 'or', 'social', 'environment', '.', 'their', 'parent', 'own', 'struggle', 'serve', 'as', 'a', 'example', 'to', 'them', '.', 'these', 'child', 'be', 'teach', 'necessary', 'skill', 'for', 'survival', 'as', 'a', 'adult', 'from', 'a', 'very', 'early', 'age', '.', 'many', 'child', 'eg', 'work', 'in', 'the', 'weekend', 'to', 'either', 'collect', 'some', 'pocket', 'money', 'or', 'even', 'contribute', 'to', 'their', 'family', "'", 'income', '.', 'A', 'good', 'example', 'be', 'the', 'many', 'child', 'who', 'accompany', 'their', 'parent', 'to', 'sell', 'produce', 'at', 'the', 'market', '.', 'They', 'be', 'make', 'a', 'straight', 'contribution', 'to', 'their', 'family', 'in', 'term', 'of', 'labor', 'or', 'income', '.', 'child', 'of', 'poor', 'family', 'also', 'be', 'highly', 'motivated', '.', 'They', 'tend', 'to', 'set', 'high', 'goal', 'to', 'improve', 'their', 'economic', 'and', 'social', 'situation', '.', 'A', 'relevant', 'example', 'would', 'be', 'Mister', 'Bill', 'Gates', '(', 'founder', 'of', 'Microsoft', 'Corporation', ')', 'He', 'have', 'a', 'impoverished', 'background', 'but', 'he', 'use', 'his', 'talent', 'and', 'motivation', 'to', 'set', 'up', 'the', 'world', 'large', 'computer', 'organization', '.', 'however', ',', 'there', 'be', 'some', 'problem', 'that', 'child', 'from', 'poor', 'background', 'encounter', '.', 'many', 'of', 'these', 'child', 'who', 'be', "'", 'rob', "'", 'of', 'their', 'childhood', 'eg', 'while', 'work', ',', 'may', 'feel', 'cheat', '.', 'They', 'often', 'turn', 'to', 'crime', '.', 'this', 'however', ',', 'be', 'a', 'small', 'group', '.', 'in', 'sum', 'up', ',', 'child', 'with', 'impoverished', 'background', 'be', 'able', 'to', 'deal', 'with', 'problem', 'of', 'adult', 'life', 'because', 'of', 'early', 'exposure', ',', 'family', 'role', 'model', 'and', 'sheer', 'motivation', '.']

In [32]:
# New C1 base text lemmas_CLAWS

text21_lemmasCLAWS = [('I', 'p'), ('do', 'v'), ('agree', 'v'), ('to', 'i'), ('the', 'a'), ('statement', 'n'), ('that', 'c'), ('child', 'n'), ('bring', 'v'), ('up', 'r'), ('in', 'i'), ('poor', 'j'), ('family', 'n'), ('child', 'n'), ('of', 'i'), ('poor', 'j'), ('parent', 'n'), ('be', 'v'), ('prematurely', 'r'), ('expose', 'v'), ('to', 'i'), ('problem', 'n'), ('of', 'i'), ('adult', 'n'), ('life', 'n'), ('eg', 'r'), ('learn', 'v'), ('to', 't'), ('survive', 'v'), ('on', 'i'), ('a', 'a'), ('low', 'j'), ('family', 'n'), ('income', 'n'), ('and', 'c'), ('sacrifice', 'v'), ('luxury', 'n'), ('for', 'i'), ('essential', 'j'), ('item', 'n'), ('these', 'd'), ('child', 'n'), ('begin', 'v'), ('to', 't'), ('see', 'v'), ('the', 'a'), ("'realities", 'n'), ('of', 'i'), ('life', 'n'), ('in', 'i'), ('their', 'a'), ('home', 'n'), ('or', 'c'), ('social', 'j'), ('environment', 'n'), ('their', 'a'), ('parent', 'n'), ('own', 'd'), ('struggle', 'n'), ('serve', 'v'), ('as', 'i'), ('a', 'a'), ('example', 'n'), ('to', 'i'), ('them', 'p'), ('these', 'd'), ('child', 'n'), ('be', 'v'), ('teach', 'v'), ('necessary', 'j'), ('skill', 'n'), ('for', 'i'), ('survival', 'n'), ('as', 'i'), ('a', 'a'), ('adult', 'n'), ('from', 'i'), ('a', 'a'), ('very', 'r'), ('early', 'j'), ('age', 'n'), ('many', 'd'), ('child', 'n'), ('eg', 'r'), ('work', 'v'), ('in', 'i'), ('the', 'a'), ('weekend', 'n'), ('to', 't'), ('either', 'r'), ('collect', 'v'), ('some', 'd'), ('pocket', 'n'), ('money', 'n'), ('or', 'c'), ('even', 'r'), ('contribute', 'v'), ('to', 'i'), ('their', 'a'), ('family', 'n'), ('income', 'n'), ('a', 'a'), ('good', 'j'), ('example', 'n'), ('be', 'v'), ('the', 'a'), ('many', 'd'), ('child', 'n'), ('who', 'p'), ('accompany', 'v'), ('their', 'a'), ('parent', 'n'), ('to', 't'), ('sell', 'v'), ('produce', 'v'), ('at', 'i'), ('the', 'a'), ('market', 'n'), ('they', 'p'), ('be', 'v'), ('make', 'v'), ('a', 'a'), ('straight', 'j'), ('contribution', 'n'), ('to', 'i'), ('their', 'a'), ('family', 'n'), ('in', 'i'), ('terms', 'i'), ('of', 'i'), ('labor', 'n'), ('or', 'c'), ('income', 'n'), ('child', 'n'), ('of', 'i'), ('poor', 'j'), ('family', 'n'), ('also', 'r'), ('be', 'v'), ('highly', 'r'), ('motivated', 'j'), ('they', 'p'), ('tend', 'v'), ('to', 't'), ('set', 'v'), ('high', 'j'), ('goal', 'n'), ('to', 't'), ('improve', 'v'), ('their', 'a'), ('economic', 'j'), ('and', 'c'), ('social', 'j'), ('situation', 'n'), ('a', 'a'), ('relevant', 'j'), ('example', 'n'), ('would', 'v'), ('be', 'v'), ('Mister', 'n'), ('bill', 'n'), ('gate', 'n'), ('founder', 'n'), ('of', 'i'), ('microsoft', 'n'), ('corporation', 'n'), ('he', 'p'), ('have', 'v'), ('a', 'a'), ('impoverished', 'j'), ('background', 'n'), ('but', 'c'), ('he', 'p'), ('use', 'v'), ('his', 'a'), ('talent', 'n'), ('and', 'c'), ('motivation', 'n'), ('to', 't'), ('set', 'v'), ('up', 'r'), ('the', 'a'), ('world', 'n'), ('large', 'j'), ('computer', 'n'), ('organization', 'n'), ('however', 'r'), ('there', 'e'), ('be', 'v'), ('some', 'd'), ('problem', 'n'), ('that', 'c'), ('child', 'n'), ('from', 'i'), ('poor', 'j'), ('background', 'n'), ('encounter', 'v'), ('many', 'd'), ('of', 'i'), ('these', 'd'), ('child', 'n'), ('who', 'p'), ('be', 'v'), ('rob', 'v'), ('of', 'i'), ('their', 'a'), ('childhood', 'n'), ('eg', 'r'), ('while', 'c'), ('work', 'v'), ('may', 'v'), ('feel', 'v'), ('cheated', 'j'), ('they', 'p'), ('often', 'r'), ('turn', 'v'), ('to', 'i'), ('crime', 'n'), ('this', 'd'), ('however', 'r'), ('be', 'v'), ('a', 'a'), ('small', 'j'), ('group', 'n'), ('in', 'i'), ('sum', 'v'), ('up', 'r'), ('child', 'n'), ('with', 'i'), ('impoverished', 'j'), ('background', 'n'), ('be', 'v'), ('able', 'j'), ('to', 't'), ('deal', 'v'), ('with', 'i'), ('problem', 'n'), ('of', 'i'), ('adult', 'n'), ('life', 'n'), ('because', 'i'), ('of', 'i'), ('early', 'j'), ('exposure', 'n'), ('family', 'n'), ('role', 'n'), ('model', 'n'), ('and', 'c'), ('sheer', 'j'), ('motivation', 'n')]

In [33]:
# New C1 collocations

new_text21_K3to9_cols = [(7, [('sacrifice', 'v'), ('luxury', 'n'), ('for', 'i')]), (3, [('essential', 'j'), ('item', 'n')]), (3, [('skill', 'n'), ('for', 'i'), ('survival', 'n')]), (4, [('accompany', 'v'), ('their', 'a'), ('parent', 'n')]), (8, [('sell', 'v'), ('produce', 'n')]), (3, [('a', 'a'), ('relevant', 'j'), ('example', 'n')]), (3, [('founder', 'n'), ('of', 'i')]), (5, [('rob', 'v'), ('of', 'i'), ('their', 'a'), ('childhood', 'n')]), (4, [('feel', 'v'), ('cheat', 'v')]), (3, [('early', 'j'), ('exposure', 'n')]), (5, [('sheer', 'j'), ('motivation', 'n')])]

In [34]:
# New C1 base bad cols

text21_bad_cols = ['agree to the statement', 'in the weekends', 'collect some pocket money', 'computer organization', 'making a straight contribution','in summing up']

In [35]:
# New C1 kband cols

text21_kband_cols = [(1, [('poor', 'j'), ('family', 'n')]), (1, [('poor', 'j'), ('parent', 'n')]), (14, [('be', 'v'), ('prematurely', 'r'), ('expose', 'v'), ('to', 'i')]), (2, [('learn', 'v'), ('to', 't'), ('survive', 'v')]), (2, [('low', 'j'), ('family', 'n'), ('income', 'n')]), (7, [('sacrifice', 'v'), ('luxury', 'n'), ('for', 'i')]), (3, [('essential', 'j'), ('item', 'n')]), (1, [('reality', 'n'), ('of', 'i'), ('life', 'n')]), (1, [('home', 'n'), ('or', 'c'), ('social', 'j'), ('environment', 'n')]), (1, [('serve', 'v'), ('as', 'i'), ('a', 'a'), ('example', 'n')]), (2, [('teach', 'v'), ('necessary', 'j'), ('skill', 'n')]), (3, [('skill', 'n'), ('for', 'i'), ('survival', 'n')]), (1, [('from', 'i'), ('a', 'a'), ('very', 'r'), ('early', 'j'), ('age', 'n')]), (2, [('contribute', 'v'), ('to', 'i')]), (1, [('good', 'j'), ('example', 'n')]), (4, [('accompany', 'v'), ('their', 'a'), ('parent', 'n')]), (8, [('sell', 'v'), ('produce', 'n')]), (1, [('at', 'i'), ('the', 'a'), ('market', 'n')]), (1, [('in', 'i'), ('term', 'n'), ('of', 'i')]), (10, [('highly', 'r'), ('motivated', 'j')]), (1, [('set', 'v'), ('high', 'j'), ('goal', 'n')]), (2, [('improve', 'v'), ('their', 'a'), ('economic', 'j'), ('and', 'c'), ('social', 'j'), ('situation', 'n')]), (3, [('a', 'a'), ('relevant', 'j'), ('example', 'n')]), (3, [('founder', 'n'), ('of', 'i')]), (10, [('impoverished', 'j'), ('background', 'n')]), (2, [('poor', 'j'), ('background', 'n')]), (5, [('rob', 'v'), ('of', 'i'), ('their', 'a'), ('childhood', 'n')]), (4, [('feel', 'v'), ('cheat', 'v')]), (1, [('turn', 'v'), ('to', 'i'), ('crime', 'n')]), (3, [('early', 'j'), ('exposure', 'n')]), (1, [('family', 'n'), ('role', 'n'), ('model', 'n')]), (5, [('sheer', 'j'), ('motivation', 'n')])]

In [36]:
# New C1 base col columns

text21_K1to2_cols = texts_df.loc[texts_df.text_id == 'C1_norm'].K1to2_cols.to_list()[0]
text21_K3to9_cols = new_text21_K3to9_cols
text21_K10to16_cols = texts_df.loc[texts_df.text_id == 'C1_norm'].K10to16_cols.to_list()[0]
text21_kband_non_cols = texts_df.loc[texts_df.text_id == 'C1_norm'].kband_non_cols.to_list()[0]

In [37]:
# Create new row for C1 base

text21 = {'text_id':'text21','text':text21_text,'lemmas_NLTK':text21_lemmasNLTK,'lemmas_CLAWS':text21_lemmasCLAWS,
        'correct_cols':len(text21_kband_cols),'col_errors':len(text21_bad_cols),
        'K1to2':len(text21_K1to2_cols),'K3to9':len(text21_K3to9_cols),'K10to16':len(text21_K10to16_cols),
        'kband_cols':text21_kband_cols,'K1to2_cols':text21_K1to2_cols,'K3to9_cols':text21_K3to9_cols,'K10to16_cols':text21_K10to16_cols,
        'bad_cols':text21_bad_cols,'kband_non_cols':text21_kband_non_cols}

texts_df = texts_df.append(text21, ignore_index=True)

  texts_df = texts_df.append(text21, ignore_index=True)


In [38]:
texts_df

Unnamed: 0,text_id,text,lemmas_NLTK,lemmas_CLAWS,correct_cols,col_errors,K1to2,K3to9,K10to16,kband_cols,K1to2_cols,K3to9_cols,K10to16_cols,bad_cols,kband_non_cols
0,B1_norm,I disagree that point about children brought u...,"[I, disagree, that, point, about, child, bring...","[(I, p), (disagree, v), (that, d), (point, n),...",12,20,9,3,0,"[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(4, [('entrance', 'n'), ('to', 'i'), ('the', ...",[],"[disagree that point, show that situation, cou...","[(1, (I, p)), (1, (about, i)), (1, (child, n))..."
1,B2_norm,I greatly support the idea.\nraised in a certa...,"[I, greatly, support, the, idea, ., raise, in,...","[(I, p), (greatly, r), (support, v), (the, a),...",22,8,16,6,0,"[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(3, [('come', 'v'), ('from', 'i'), ('a', 'a')...",[],"[raised in (values), psychological values, opp...","[(1, (I, p)), (1, (a, a)), (1, (such, i)), (1,..."
2,C1_norm,I do agree to the statement that children brou...,"[I, do, agree, to, the, statement, that, child...","[(I, p), (do, v), (agree, v), (to, i), (the, a...",33,5,18,12,3,"[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...","[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...","[(7, [('sacrifice', 'v'), ('luxury', 'n'), ('f...","[(14, [('be', 'v'), ('prematurely', 'r'), ('ex...","[agree to the statement, in the weekends, coll...","[(1, (I, p)), (1, (do, v)), (1, (that, c)), (1..."
3,text1,I disagree that point about children brought u...,"[I, disagree, that, point, about, child, bring...","[(I, p), (disagree, v), (that, d), (point, n),...",12,18,9,3,0,"[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(4, [('entrance', 'n'), ('to', 'i'), ('the', ...",[],"[disagree that point, show that situation, cou...","[(1, (I, p)), (1, (about, i)), (1, (child, n))..."
4,text11,I greatly support the idea.\nraised in a certa...,"[I, greatly, support, the, idea, ., raise, in,...","[(I, p), (greatly, r), (support, v), (the, a),...",22,12,16,6,0,"[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(3, [('come', 'v'), ('from', 'i'), ('a', 'a')...",[],"[raise in (values), psychological values, well...","[(1, (I, p)), (1, (a, a)), (1, (such, i)), (1,..."
5,text21,I do agree to the statement that children brou...,"[I, do, agree, to, the, statement, that, child...","[(I, p), (do, v), (agree, v), (to, i), (the, a...",32,6,18,11,3,"[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...","[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...","[(7, [('sacrifice', 'v'), ('luxury', 'n'), ('f...","[(14, [('be', 'v'), ('prematurely', 'r'), ('ex...","[agree to the statement, in the weekends, coll...","[(1, (I, p)), (1, (do, v)), (1, (that, c)), (1..."


In [39]:
# Pickle base_texts dataframe

base_texts = texts_df.iloc[3:,:].reset_index(drop=True)
base_texts
joblib.dump(base_texts,'../docs/base_texts.pkl')

Unnamed: 0,text_id,text,lemmas_NLTK,lemmas_CLAWS,correct_cols,col_errors,K1to2,K3to9,K10to16,kband_cols,K1to2_cols,K3to9_cols,K10to16_cols,bad_cols,kband_non_cols
0,text1,I disagree that point about children brought u...,"[I, disagree, that, point, about, child, bring...","[(I, p), (disagree, v), (that, d), (point, n),...",12,18,9,3,0,"[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(1, [('good', 'j'), ('effect', 'n')]), (1, [(...","[(4, [('entrance', 'n'), ('to', 'i'), ('the', ...",[],"[disagree that point, show that situation, cou...","[(1, (I, p)), (1, (about, i)), (1, (child, n))..."
1,text11,I greatly support the idea.\nraised in a certa...,"[I, greatly, support, the, idea, ., raise, in,...","[(I, p), (greatly, r), (support, v), (the, a),...",22,12,16,6,0,"[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(1, [('support', 'v'), ('the', 'a'), ('idea',...","[(3, [('come', 'v'), ('from', 'i'), ('a', 'a')...",[],"[raise in (values), psychological values, well...","[(1, (I, p)), (1, (a, a)), (1, (such, i)), (1,..."
2,text21,I do agree to the statement that children brou...,"[I, do, agree, to, the, statement, that, child...","[(I, p), (do, v), (agree, v), (to, i), (the, a...",32,6,18,11,3,"[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...","[(1, [('poor', 'j'), ('family', 'n')]), (1, [(...","[(7, [('sacrifice', 'v'), ('luxury', 'n'), ('f...","[(14, [('be', 'v'), ('prematurely', 'r'), ('ex...","[agree to the statement, in the weekends, coll...","[(1, (I, p)), (1, (do, v)), (1, (that, c)), (1..."


['../docs/base_texts.pkl']

[Back to top](#Creating-base-texts)