### Keyword and Concordance Analysis
Author: Caroline Jung \
Last modified: 5/9/24

#### Define General Functions

In [10]:
import pandas as pd

In [31]:
pd.set_option("display.max_colwidth",1000)

In [11]:
def get_cognates(language):
    file = open(f"cognate-data/{language.lower()}_english_cognates.txt")
    cognates = file.readline()
    return cognates.split(", ")

In [12]:
def read_keywords(file, lang):
    file = open(f"keyword-{lang}/{file}")
    keywords = file.readlines()
    return [word.strip("\n") for word in keywords]
# keywords based on min freq = 25 and likelihood with native reference corpus

def compile_all_text(file):
    f = open(file, "r")
    data = f.readlines()
    f.close()
    all_participants = ""
    for participant in data:
        all_participants += participant + " "
    return all_participants.split(" ")

#### Native speakers

In [13]:
german_cognates = get_cognates("german")
spanish_cognates = get_cognates("spanish")

text_order = ["written", "spoken"]
natives = pd.DataFrame(index=text_order, columns=["text", "german_cognates", "spanish_cognates"])

# fill in the dataframe
text_data = []

for row in range(natives.shape[0]):
    text_data.append(compile_all_text(f"native_{text_order[row]}.txt"))

natives.loc[:,"text"] = text_data
natives

Unnamed: 0,text,german_cognates,spanish_cognates
written,"[A, man, is, walking, down, an, unpaved, stree...",,
spoken,"[ok, so, in, the, video, there, is, a, man, at...",,


In [14]:
len(german_cognates)
len(spanish_cognates)

473

In [15]:
# now, can count cognates
def count_cognates_native(row):
    german = []
    for cognate in german_cognates:
        text = natives.loc[f"{row}", "text"]
        num_words = len(text)
        if cognate in text:
            freq = text.count(cognate) # absolute frequency
            freq_relative = round((freq/num_words)*1000,3) # relative frequency, per thousand words
            german.append((freq_relative, freq, cognate))
    
    spanish = []
    for cognate in spanish_cognates:
        text = natives.loc[f"{row}", "text"]
        num_words = len(text)
        if cognate in text:
            freq = text.count(cognate) # absolute frequency
            freq_relative = round((freq/num_words)*1000,3) # relative frequency, per thousand words
            spanish.append((freq_relative, freq, cognate))
    
    return sorted(german, reverse=True), sorted(spanish, reverse=True)

In [70]:
natives.loc["written", "german_cognates"] = count_cognates_native("written")[0]
natives.loc["spoken", "german_cognates"] = count_cognates_native("spoken")[0]
natives.loc["written", "spanish_cognates"] = count_cognates_native("written")[1]
natives.loc["spoken", "spanish_cognates"] = count_cognates_native("spoken")[1]
natives

Unnamed: 0,text,german_cognates,spanish_cognates
written,"[A, man, is, walking, down, an, unpaved, street., Things, are, being, thrown, from, the, windows., The, road, is, cluttered, and, hazardous., He, gets, hit, with, trash,, a, mixture, of, dirt, and, rock,, that, was, thrown, from, the, window, of, an, overhead, apartment., He, then, sighs, and, smokes, a, cigarette., Shortly, after, he, sees, a, baby, abandoned, on, the, side, of, the, hazardous, street., He, quickly, picks, up, the, baby,, as, he, turns, around, he, spots, a, mother, with, a, stroller., He, mistakenly, thinks, that, the, baby, most, be, hers, and, puts, the, baby, in, the, stroller, while, the, mother, is, ...]","[(37.815, 135, baby), (17.367, 62, man), (0.56, 2, moment), (0.56, 2, cigarette), (0.28, 1, storm), (0.28, 1, paper), (0.28, 1, ground), (0.28, 1, fall), (0.28, 1, end)]","[(5.322, 19, found), (0.56, 2, abandon), (0.28, 1, perspective), (0.28, 1, context), (0.28, 1, attitude)]"
spoken,"[ok, so, in, the, video, there, is, a, man, at, the, beginning, and, he, walks, around, pretty, carelessly, he, is, walking, through, a, construction, zone, /, and, kinda, looks, like, somebody, that, just, doesn't, have, a, clue, /, uh, so, he, gets, hit, /, by, some, like, scaffolding, or, something, that, falls, /, uh, and, then, /, he, decides, the, solution, to, that, is, to, smoke, a, cigarette, so, he, goes, for, that, and, then, he, sees, a, baby, /, uh, that, just, left, in, the, street, and, he, picks, the, baby, up, /, uh, has, it, then, kinda, decides, ...]","[(36.85, 270, baby), (10.919, 80, man), (1.911, 14, ground), (0.819, 6, cigarette), (0.409, 3, scene), (0.409, 3, moment), (0.409, 3, idea), (0.273, 2, system), (0.273, 2, end), (0.136, 1, wonder), (0.136, 1, title), (0.136, 1, paper), (0.136, 1, character)]","[(2.457, 18, found), (0.273, 2, assume), (0.273, 2, area), (0.136, 1, reverse), (0.136, 1, obvious), (0.136, 1, context), (0.136, 1, brief), (0.136, 1, attitude)]"


#### German Speakers

In [43]:
# GERMAN DATA
german_cognates = get_cognates("german")
german_text_order = ["written_intermediate", "written_advanced", "spoken_intermediate", "spoken_advanced"]
german = pd.DataFrame(index=german_text_order,
             columns=["keywords", "keyword_cognates", "text", "text_cognates", "text_new_cognates"])

In [44]:
# fill in the dataframe
keyword_data, text_data = [], []
keyword_order = ["w_int", "w_adv", "s_int", "s_adv"]

for row in range(german.shape[0]):
    keyword_data.append(read_keywords(f"{keyword_order[row]}.txt", "german"))
    text_data.append(compile_all_text(f"german_{german_text_order[row]}.txt"))

german.loc[:,"keywords"] = keyword_data
german.loc[:,"text"] = text_data

In [59]:
# now, can count cognates
def count_cognates(row, text):
    all_cognates = []
    for cognate in german_cognates:
        if text=="keywords":
            text = german.loc[f"{row}", "keywords"]
        elif text=="text":
            text = german.loc[f"{row}", "text"]
        
        num_words = len(text)
        if cognate in text:
            freq = text.count(cognate) # absolute frequency
            freq_relative = round((freq/num_words)*1000,2) # relative frequency, per thousand words
            all_cognates.append((freq_relative, freq, cognate))

    return sorted(all_cognates, reverse=True) # top used cognates first

In [60]:
# fill in df with keywords and concordances
keyword_cognates, text_cognates = [], []
for row in range(german.shape[0]):
    keyword_cognates.append(count_cognates(german_text_order[row], "keywords"))
    text_cognates.append(count_cognates(german_text_order[row], "text"))
german.loc[:, "keyword_cognates"] = keyword_cognates
german.loc[:, "text_cognates"] = text_cognates

In [61]:
# new cognates (not mentioned by natives)
def count_new_cognates(row):
    all_cognates = []
    if "spoken" in row:
        native = natives.loc["spoken", "german_cognates"]
    elif "written" in row:
        native = natives.loc["written", "german_cognates"]

    native_cognates = [cognate[2] for cognate in native]

    for cognate in german_cognates:
        if cognate not in native_cognates: #not spoken by natives
            text = german.loc[f"{row}", "text"]
            num_words = len(text)
            if cognate in text: # spoken by german speakers
                freq = text.count(cognate) # absolute frequency
                freq_relative = round((freq/num_words)*1000,1) # relative frequency, per thousand words
                all_cognates.append((freq_relative, freq, cognate))

    return sorted(all_cognates, reverse=True) # top used cognates first

In [62]:
# final df
text_new_cognates = []
for row in range(german.shape[0]):
    text_new_cognates.append(count_new_cognates(german_text_order[row]))
german.loc[:, "text_new_cognates"] = text_new_cognates
german

Unnamed: 0,keywords,keyword_cognates,text,text_cognates,text_new_cognates
written_intermediate,"[chaplin, his, by, walks, but, her, down, as, for, away, at, out, take, an, finds, after, when, who, not, into, sees, stroller, care, again, street, where, this, found, so, puts, police, while, tries, which, officer, br, takes, orphan]",[],"[Charlie, Chaplin, is, walking, down, a, street, with, quite, shabby, looking, houses, when, he, suddenly, got, hit, by, some, trash, that, seemed, to, be, thrown, out, of, a, window, (which, is, not, visible, to, the, audience, in, the, screen, ), and, should, have, fallen, into, the, waste, can, next, to, Chaplin., <br/>, , <br/>, Chaplin, lights, another, cigarette,, throws, his, gloves, into, the, trash, and, looks, to, the, other, side, where, he, finds, a, baby, covered, in, a, blanket, lying, on, the, Floor., He, looks, up, to, the, window, from, where, the, trash, was, thrown, out, and, picks, the, baby, ...]","[(27.06, 224, baby), (12.08, 100, man), (1.21, 10, cigarette), (1.09, 9, person), (0.97, 8, paper), (0.97, 8, moment), (0.97, 8, end), (0.97, 8, character), (0.85, 7, scene), (0.85, 7, ground), (0.36, 3, house), (0.36, 3, hand), (0.24, 2, fall), (0.24, 2, action), (0.12, 1, title), (0.12, 1, number), (0.12, 1, notice), (0.12, 1, idea)]","[(1.1, 9, person), (1.0, 8, character), (0.8, 7, scene), (0.4, 3, house), (0.4, 3, hand), (0.2, 2, action), (0.1, 1, title), (0.1, 1, number), (0.1, 1, notice), (0.1, 1, idea)]"
written_advanced,"[chaplin, his, by, walks, down, after, her, for, away, out, an, finds, again, into, where, s, this, police, but, at, officer, when, takes, as, stroller, not, puts, found, sees, take, from, picks, who, street, care, next, orphan, which, pram, has, runs, so, while, old, note, tries, love, sits, around, another, gets, put, off, cigarette, mother, place, moment, looks, notices, starts, charles, decides, can, arms, leaves, find]","[(15.15, 1, moment), (15.15, 1, cigarette)]","[In, the, video, the, famous, actor, Charlie, Chaplin, walks, down, a, alley,, while, people, throw, things,, such, as, bricks, out, of, their, window, and, hit, him., Afterwards, Chaplin, lights, a, cigarette, and, smokes,, until, he, sees, a, baby, lying, on, the, floor., While, he, tries, to, find, the, parents, of, the, baby,, he, gets, admonished, by, a, woman, with, a, baby, buggy, and, two, police, officers., Finally,, he, ends, up, finding, a, note, in, the, blanket, of, the, baby,, in, which, the, finder, of, the, baby, is, asked, to, take, care, of, it., Chaplin, takes, the, baby,, walks, away, and, ...]","[(27.01, 367, baby), (9.72, 132, man), (1.18, 16, moment), (0.96, 13, scene), (0.96, 13, cigarette), (0.81, 11, ground), (0.74, 10, paper), (0.59, 8, idea), (0.29, 4, person), (0.29, 4, house), (0.22, 3, end), (0.15, 2, plan), (0.15, 2, hand), (0.15, 2, fall), (0.15, 2, character), (0.15, 2, action), (0.07, 1, title), (0.07, 1, number), (0.07, 1, minute), (0.07, 1, line), (0.07, 1, family)]","[(1.0, 13, scene), (0.6, 8, idea), (0.3, 4, person), (0.3, 4, house), (0.1, 2, plan), (0.1, 2, hand), (0.1, 2, character), (0.1, 2, action), (0.1, 1, title), (0.1, 1, number), (0.1, 1, minute), (0.1, 1, line), (0.1, 1, family)]"
spoken_intermediate,"[n, yeah, or, uh, take, i, there, street, out, where, tries, comes, has, as, think, not, from, do, which, takes, care, what, wants, into, who, found, old, around, doesn, because, gets, orphan, looks, when, puts, picks, runs, officer, an, mother, thinks, another, decides, about, after, goes, pram, can, before]",[],"[Ok, this, story, is, about, toch, uh, Charles, Chaplin, get, in, touch, with, the, baby, and, it, starts, when, he, walks, down, a, shabby, side, road, /, and, people, uh, threw, their, trash, out, of, the, window, and, /, he, was, hit, by, some, trash, and, then, he, um, put, on, a, new, cigarette, /, and, he, heard, a, baby, screaming, and, he, looked, down, on, the, ground, where, the, baby, lies, /, and, he, picks, it, up, /, and, uh, /, when, he, looks, at, the, baby, behind, him, there’s, a, woman, /, uh, , carrying, a, ba=baggy, with, ...]","[(34.73, 452, baby), (9.91, 129, man), (1.77, 23, ground), (1.54, 20, cigarette), (1.15, 15, moment), (0.85, 11, end), (0.77, 10, person), (0.61, 8, character), (0.46, 6, paper), (0.31, 4, idea), (0.31, 4, house), (0.23, 3, scene), (0.23, 3, minute), (0.15, 2, wagon), (0.15, 2, notice), (0.15, 2, hand), (0.15, 2, fall), (0.08, 1, son), (0.08, 1, plan), (0.08, 1, finger), (0.08, 1, action)]","[(0.8, 10, person), (0.3, 4, house), (0.2, 3, minute), (0.2, 2, wagon), (0.2, 2, notice), (0.2, 2, hand), (0.2, 2, fall), (0.1, 1, son), (0.1, 1, plan), (0.1, 1, finger), (0.1, 1, action)]"
spoken_advanced,"[n, uh, there, or, out, take, like, when, where, takes, into, street, has, an, puts, care, yeah, not, i, old, tries, doesn, thinks, comes, looks, gets, officer, picks, next, can, from, do, runs, around, who, pram, what, orphan, found, as, after, mother, also, another, about, think, says, love, which, see, something, cigarette, seems, sits, scene, get, kind, goes, be, decides, gives, some, please, wants, angry, really, inside, policeman, corner, same, because, tells, other, off, little, starts, know, child, letter, video, trash, does, ground, find, note, hit, before]","[(11.49, 1, scene), (11.49, 1, ground), (11.49, 1, cigarette)]","[so, in, the, video, is, Charlie, Chaplin, who, uh, walks, through, a, street, and, people, throw, their, trash, out, of, the, window, and, in, the, corner, of, the, street, he, finds, a, baby, and, he, tries, to, find, the, mother, of, the, baby, and, then, a, woman, walks, by, and, she, and, he, thinks, that, it’s, her, baby, and, tries, to, give, it, back, to, her, and, she, tells, him, that, it, was, not, her, baby, and, is, kind, of, mad, with, him, so, he, takes, the, bay, back, and, tries, to, put, it, back, into, the, corner, and, a, ...]","[(29.77, 599, baby), (8.8, 177, man), (1.84, 37, cigarette), (1.79, 36, scene), (1.39, 28, ground), (0.89, 18, person), (0.75, 15, moment), (0.7, 14, end), (0.55, 11, wagon), (0.45, 9, paper), (0.35, 7, house), (0.25, 5, notice), (0.25, 5, hand), (0.15, 3, garage), (0.15, 3, fall), (0.15, 3, character), (0.15, 3, arm), (0.1, 2, problem), (0.1, 2, idea), (0.05, 1, wonder), (0.05, 1, title), (0.05, 1, minute), (0.05, 1, finger), (0.05, 1, family), (0.05, 1, curve)]","[(0.9, 18, person), (0.5, 11, wagon), (0.3, 7, house), (0.2, 5, notice), (0.2, 5, hand), (0.1, 3, garage), (0.1, 3, fall), (0.1, 3, arm), (0.1, 2, problem), (0.0, 1, minute), (0.0, 1, finger), (0.0, 1, family), (0.0, 1, curve)]"


In [30]:
# calculate percentage of cognates in concordance
for row in german.index:
    len_text = len(german.loc[row, "text"])
    len_cognates = len(german.loc[row, "text_cognates"])
    percent_cognates = round((len_cognates/len_text)*100,2)
    print(row, "\t", percent_cognates)

written_intermediate 	 0.22
written_advanced 	 0.15
spoken_intermediate 	 0.16
spoken_advanced 	 0.12


#### Spanish Speakers

In [63]:
spanish_cognates = get_cognates("spanish")
spanish_text_order = ["written_beginner", "written_intermediate", "written_advanced", 
                      "spoken_beginner", "spoken_intermediate", "spoken_advanced"]
spanish = pd.DataFrame(index=spanish_text_order,
             columns=["keywords", "keyword_cognates", "text", "text_cognates", "text_new_cognates"])

In [64]:
# fill in the dataframe
keyword_data, text_data = [], []
keyword_order = ["w_beg", "w_int", "w_adv", "s_beg", "s_int", "s_adv"]

for row in range(spanish.shape[0]):
    keyword_data.append(read_keywords(f"{keyword_order[row]}.txt", "spanish"))
    text_data.append(compile_all_text(f"spanish_{spanish_text_order[row]}.txt"))

spanish.loc[:,"keywords"] = keyword_data
spanish.loc[:,"text"] = text_data

In [65]:
def count_cognates(row, text):
    all_cognates = []
    for cognate in spanish_cognates:
        if text=="keywords":
            text = spanish.loc[f"{row}", "keywords"]
        elif text=="text":
            text = spanish.loc[f"{row}", "text"]
        
        num_words = len(text)
        if cognate in text:
            freq = text.count(cognate) # absolute frequency
            freq_relative = round((freq/num_words)*1000,1) # relative frequency, per thousand words
            all_cognates.append((freq_relative, freq, cognate))

    return sorted(all_cognates, reverse=True) # top used cognates first

In [66]:
# fill in df with keywords and concordance
keyword_cognates, text_cognates = [], []
for row in range(spanish.shape[0]):
    keyword_cognates.append(count_cognates(spanish_text_order[row], "keywords"))
    text_cognates.append(count_cognates(spanish_text_order[row], "text"))
spanish.loc[:, "keyword_cognates"] = keyword_cognates
spanish.loc[:, "text_cognates"] = text_cognates

In [68]:
def count_new_cognates(row):
    all_cognates = []
    if "spoken" in row:
        native = natives.loc["spoken", "spanish_cognates"]
    elif "written" in row:
        native = natives.loc["written", "spanish_cognates"]

    native_cognates = [cognate[2] for cognate in native]

    for cognate in spanish_cognates:
        if cognate not in native_cognates:
            text = spanish.loc[f"{row}", "text"]
            num_words = len(text)
            if cognate in text:
                freq = text.count(cognate) # absolute frequency
                freq_relative = round((freq/num_words)*1000,1) # relative frequency, per thousand words
                all_cognates.append((freq_relative, freq, cognate))

    return sorted(all_cognates, reverse=True) # top used cognates first

In [69]:
# final df
text_new_cognates = []
for row in range(spanish.shape[0]):
    text_new_cognates.append(count_new_cognates(spanish_text_order[row]))
spanish.loc[:, "text_new_cognates"] = text_new_cognates
spanish

Unnamed: 0,keywords,keyword_cognates,text,text_cognates,text_new_cognates
written_beginner,"[charles, floor, give, chaplin, leave, cart, ran, said, women]",[],"[Charles, Chaplis, is, a, on, famous, person., He, was, wearing, a, suit, and, a, cane., In, this, , video, story,, we, can, see, him., He, tries, to, get, to, rid, of, a, baby,, but, it, is, impossible., , <br/>, Charles, Chaplis, is, hopeless, to, get, rid, of, the, baby., He, tries, , to, put, the, baby, in, the, cart, of, a, one, mother., He, tries, leave, the, baby, on, the, floor, but, the, police, come, and, he, picked, up, the, baby, again., More, late,, he, tries, to, leave, the, baby, in, the, hands, of, an, older, man., Finally,, he, reads, ...]","[(9.0, 9, found), (1.0, 1, abandon)]",[]
written_intermediate,"[chaplin, but, so, again, when, this, charles, street, his, who, take, her, an, finds, takes, found, care, for, sees, where, at, leaves, there, saw, has, old, leave, not, finally, same, police, away, orphan, him, tries, note, gives, after, see, policeman, video, while, give, by, decides, love, can, suddenly, another, thinks, from, had, down, we, out]","[(18.2, 1, found)]","[This, video, is, starring, Charles, Chaplin., First,, he, is, walking, down, the, street, with, some, misfortunes, when, he, finds, a, baby, in, the, trash., After,, he, sees, a, baby, cart, with, a, lady,, so, he, think, that, the, baby, is, hers, and, Charles, places, the, baby, in, the, baby, cart., Nevertheless,, the, baby, isn't, hers,, so, she, attack, to, Charles., Moreover,, Chaplin, tries, to, trick, a, man, to, give, him, the, baby,, but, this, man, returns, the, baby, to, the, baby, cart., When, Chaplin, thinks, that, he, has, got, to, return, the, baby,, the, lady, accuse, him, of, gives, hers, ...]","[(4.4, 48, found), (0.5, 5, abandon), (0.2, 2, sole), (0.2, 2, previous), (0.1, 1, site), (0.1, 1, similar), (0.1, 1, retain), (0.1, 1, policy), (0.1, 1, extract), (0.1, 1, brief)]","[(0.2, 2, sole), (0.2, 2, previous), (0.1, 1, site), (0.1, 1, similar), (0.1, 1, retain), (0.1, 1, policy), (0.1, 1, extract), (0.1, 1, brief)]"
written_advanced,"[this, when, so, chaplin, at, take, finds, an, sees, him, but, street, there, takes, not, from, as, decides, tries, down, where, old, care, away, has, what, leaves, who, out, leave, trolley, same, after, officer, police, do, while, starts, one, runs, lady, another, policeman, gives, about, mother, place]",[],"[At, the, beginning, of, the, scene,, Chaplin, appears, walking, through, a, street,, and, he, does, not, seem, very, worried., He, goes, under, a, window, where, a, man, is, throwing, something, that, provoques, some, dust,, this, is, ignored, by, the, protagonist., He, keeps, on, walking, and, while, he, grabs, his, flexible, walking, stick,, a, lot, of, rubbish, coming, from, above, him,, falls, over, Charlie, making, him, dirty, and, also, scaring, him., Then, he, decides, to, take, his, gloves, off, and, throwing, them, to, the, rubbish, and, to, smoke, a, cigarette, he, took, from, a, metallic, box, that, he, had, in, his, ...]","[(3.6, 32, found), (0.5, 4, previous), (0.3, 3, sole), (0.3, 3, option), (0.2, 2, final), (0.1, 1, sum), (0.1, 1, sequence), (0.1, 1, image), (0.1, 1, flexible), (0.1, 1, area)]","[(0.5, 4, previous), (0.3, 3, sole), (0.3, 3, option), (0.2, 2, final), (0.1, 1, sum), (0.1, 1, sequence), (0.1, 1, image), (0.1, 1, flexible), (0.1, 1, area)]"
spoken_beginner,"[n, uh, charles, hhh, protagonist, cart, floor, letter, policeman, when, chaplin, finally, gave, was, this, see, older, smoking, say, return, car]",[],"[Charles, Charplin, is, /, one, famous, person, /, he, was, wearing, a, suit, and, a, cane, /, in, this, video, story, we, can, see, him, /, he, tries, to, get, a, right, /, of, a, baby, but, it, is, impossible, /, Charles, Chaplin, is, hopeless, to, get, right, of, the, baby, /, he, tries, to, put, the, baby, in, the, cart, of, one, mother, /, he, tries, leave, the, baby, on, the, floor, but, the, police, come, and, he, pick, up, the, baby, again, /, more, late, he, tries, to, leave, the, baby, in, the, hands, of, an, older, man, ...]","[(5.8, 12, found), (0.5, 1, previous)]","[(0.5, 1, previous)]"
spoken_intermediate,"[uh, n, hhh, street, charles, when, chaplin, take, who, found, see, has, there, care, policeman, leave, because, do, video, takes, what, doesn, from, same, where, give, note, saw, thinks, another, an, decides, finally, can, suddenly, tries, orphan, know, place, i, car, gives, start, angry, out, after, leaves, not, him, appears, says, mother, we, gets, get, old, floor, other, like, about, some, run, love, find, starts, have, as, while, look, again, to]","[(14.1, 1, found)]","[Charles, is, walking, down, a, very, poor, street, when, he, finds, a, baby, /, hhh, he, take, the, baby, and, he, place, in, a, lady's, baby, carriage, who, was, with, another, baby, /, hhh, the, lady, returns, the, baby, to, Charles, and, he, tries, to, leave, the, baby, hhh, where, he, found, /, hhh, but, when, he, saw, the, police, /, hhh, she, gets, the, baby, back, and, gives, the, baby, to, a, homeless, /, hhh, the, homeless, leaves, the, baby, again, in, the, lady's, baby, carriage, /, hhh, and, at, that, moment, /, Charles, was, walking, around, that, place, ...]","[(4.0, 79, found), (0.5, 10, abandon), (0.3, 5, previous), (0.2, 3, site), (0.1, 2, final), (0.1, 2, assume), (0.1, 1, remove), (0.1, 1, project), (0.1, 1, option), (0.1, 1, extract), (0.1, 1, encounter), (0.1, 1, dispose), (0.1, 1, deny), (0.1, 1, brief)]","[(0.5, 10, abandon), (0.3, 5, previous), (0.2, 3, site), (0.1, 2, final), (0.1, 1, remove), (0.1, 1, project), (0.1, 1, option), (0.1, 1, extract), (0.1, 1, encounter), (0.1, 1, dispose), (0.1, 1, deny)]"
spoken_advanced,"[hhh, charles, when, chaplin, him, n, who, while, because, suddenly, policeman, leave, trolley, do, uh, see, floor, video, thinks, another, can, says, gives, had, leaves, about, place, runs, gets, from, kid, know, one, finally, doesn, some, mother, start, pick, cigarette, smoking, looking, inside, something, again, sits, abandoned, left, find, into, he, didn, we, end, please, saw, someone, off, umbrella, love, moment, get, keep, first, well, arms]",[],"[Charlie, Chaplin, was, walking, in, the, street, while, he, found, a, baby, on, the, ground, hhh, he, did, not, know, where, the, baby, come, from, hhh, he, tried, to, find, it, but, he, did, not, get, it, hhh, he, thought, that, the, solution, is, giving, up, the, baby, to, a, woman, but, the, woman, did, not, want, another, another, baby, another, one, hhh, secondly, Charlie, Chaplin, gave, the, baby, to, an, old, man, but, he, he, did, not, want, him, hhh, the, old, man, put, the, baby, in, the, same, place, that, Charlie, Chaplin, did, hhh, with, the, woman, hhh, ...]","[(3.2, 48, found), (0.3, 5, abandon), (0.3, 4, option), (0.2, 3, previous), (0.1, 2, strategy), (0.1, 1, sum), (0.1, 1, sole), (0.1, 1, site), (0.1, 1, remove), (0.1, 1, normal), (0.1, 1, initial), (0.1, 1, final), (0.1, 1, despite), (0.1, 1, assume), (0.1, 1, area)]","[(0.3, 5, abandon), (0.3, 4, option), (0.2, 3, previous), (0.1, 2, strategy), (0.1, 1, sum), (0.1, 1, sole), (0.1, 1, site), (0.1, 1, remove), (0.1, 1, normal), (0.1, 1, initial), (0.1, 1, final), (0.1, 1, despite)]"


In [55]:
# count percentage of cognates in concordance
for row in spanish.index:
    len_text = len(spanish.loc[row, "text"])
    len_cognates = len(spanish.loc[row, "text_cognates"])
    percent_cognates = round((len_cognates/len_text)*100,2)
    print(row, "\t", percent_cognates)

written_beginner 	 0.2
written_intermediate 	 0.09
written_advanced 	 0.11
spoken_beginner 	 0.1
spoken_intermediate 	 0.07
spoken_advanced 	 0.1
