In [1]:
import pandas as pd
import numpy as np
from glob2 import glob
import re
import nltk
import os 
os.chdir('/Users/gracelyons/Desktop/MSDS/Capstone/Transcripts/Teacher and Avatar/')

from nltk.stem.porter import PorterStemmer
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.lancaster import LancasterStemmer

In [2]:
transcripts = [trans for trans in sorted(glob('*.txt'))]
OHCO = ['speaker_id', 'line_num', 'sent_num', 'token_num']

In [3]:
def create_files(trans_list, OHCO=OHCO, tokenizer = nltk.WhitespaceTokenizer(), ws = False, 
                 remove_blank_strings = True, add_stop_words = True, add_stems = True,
                 stems = [PorterStemmer(), SnowballStemmer('english'), LancasterStemmer()], 
                 alternate_rank = False, bag = 'speaker_id', count_method = 'n', 
                 tf_method = 'sum', tf_norm_k = 0.5, idf_method = 'standard', save_work = False):
    
    my_lib = []
    my_doc = []
    
    def word_tokenize(x):
        if ws:
            s = pd.Series(nltk.pos_tag(tokenizer.tokenize(x))) 
            # we can change the tokenizer if we want to
        else:
            s = pd.Series(nltk.pos_tag(nltk.word_tokenize(x)))
        return s

    for trans_file in trans_list:
        
        # Get ID from filename
        speaker_id = int(trans_file.split('_')[0].replace('th', ''))
        print("File ID:", speaker_id)
        
        # Import file as lines
        lines = open(trans_file, 'r', encoding='utf-8-sig').readlines()
        df = pd.DataFrame(lines, columns=['line_str'])
        df.line_str = df.line_str.str.strip()
        df['speaker_id'] = speaker_id
        df = df[df['line_str'].str.contains('Speaker ') == False] # drop lines with speaker
        df = df[~df['line_str'].str.match(r'^\s*$')].reset_index(drop = True) # drop empty lines 
        df.index.name = 'line_num'
        
        # Group
        df = df.groupby(OHCO[:2]).line_str.apply(lambda x: '\n'.join(x)).to_frame()
        
        # Set index
        df.index.names = OHCO[:-2]

        # Register
        my_lib.append((speaker_id, trans_file))
        my_doc.append(df)

    docs = pd.concat(my_doc)
    library = pd.DataFrame(my_lib, columns=['speaker_id', 'book_file']).set_index('speaker_id')
    
    print('Tokenizing')
    # Lines to Sentences
    tokens = docs.line_str.apply(lambda x: pd.Series(nltk.sent_tokenize(x))).stack()\
                .to_frame().rename(columns = {0: 'sent_str'})
    
    # Sentences to Tokens
    tokens = tokens.sent_str.apply(word_tokenize).stack().to_frame().rename(columns = {0: 'pos_tuple'})
    
    # Grab infor from tuple
    tokens['pos'] = tokens.pos_tuple.apply(lambda x: x[1])
    tokens['token_str'] = tokens.pos_tuple.apply(lambda x: x[0])
    
    # Add index and do some cleaning
    tokens.index.names = OHCO
    tokens['term_str'] = tokens['token_str'].str.lower().str.replace('[\W_]', '')
    if remove_blank_strings:
        tokens = tokens[tokens.term_str != '']
    
    # Reduce and extract vocabulary from tokens table 
    print('creating vocab')
    vocab = tokens.term_str.value_counts().to_frame()\
            .rename(columns = {'index': 'term_str', 'term_str': 'n'}).sort_index().reset_index()\
            .rename(columns = {'index': 'term_str'})
    vocab.index.name = 'term_id'
    vocab['num'] = vocab.term_str.str.match("\d+").astype('int')
    
    # Add stopwords
    if add_stop_words:
        sw = pd.DataFrame(nltk.corpus.stopwords.words('english'), columns = ['term_str'])\
                .reset_index().set_index('term_str')
        sw.columns = ['dummy']
        sw.dummy = 1
        
        vocab['stop'] = vocab.term_str.map(sw.dummy).fillna(0).astype('int')
        
    # Add stems
    # Used Porter, Snowball, and Lancaster stemmers for this but there are other options
    if add_stems:
        for i, stem in enumerate(stems):
            str_stem = str(stems[i]).split('Stemmer')[0].split('.')[-1].replace('<', '').lower()
            vocab['stemmer_' + str_stem] = vocab.term_str.apply(stem.stem)
    
    # update token and vocab tables
    tokens['term_id'] = tokens.term_str.map(vocab.reset_index().set_index('term_str').term_id)
    vocab['pos_max'] = tokens.groupby(['term_id', 'pos']).count().iloc[:,0].unstack().idxmax(1)
    
    if alternate_rank:
        rank = vocab.n.value_counts().sort_index(ascending = False).reset_index().reset_index()\
                .rename(columns = {'level_0': 'term_rank', 'index': 'n', 'n': 'nn'}).set_index('n')
        vocab['term_rank'] = vocab.n.map(rank.term_rank) + 1
    else:
        vocab = vocab.sort_values('n', ascending = False).reset_index()
        vocab.index.name = 'term_rank'
        vocab = vocab.reset_index().set_index('term_id')
        vocab['term_rank'] = vocab['term_rank'] + 1
        
    vocab['p'] = vocab.n/vocab.n.sum() # prior, or marginal, probability of a term
    vocab['h'] = vocab.p * np.log2(1/vocab.p) # self entropy of each word
    
    # Create BOW
    print('creating bag-of-words')
    print('bag is: ' + bag)
    for i,j in enumerate(OHCO):
        if bag == j:
            bag = OHCO[:i+1]
    
    # default bag is speaker
    bow = tokens.groupby(bag + ['term_id']).term_id.count().to_frame().rename(columns = {'term_id': 'n'})
    bow['c'] = bow.n.astype('bool').astype('int')
    
    # Create TFIDF
    print('creating TFIDF')
    dtcm = bow[count_method].unstack().fillna(0).astype('int')
    
    print('tf method: ', tf_method)
    if tf_method == 'sum':
        tf = dtcm.T / dtcm.T.sum()
    elif tf_method == 'max':
        tf = dtcm.T / dtcm.T.max()
    elif tf_method == 'log':
        tf = np.log10(1 + dtcm.T)
    elif tf_method == 'raw':
        tf = dtcm.T
    elif tf_method == 'double_norm':
        tf = dtcm.T / dtcm.T.max()
        tf = tf_norm_k + (1 - tf_norm_k) * tf[tf > 0]
    elif tf_method == 'binary':
        tf = dtcm.T.astype('bool').astype('int')
    tf = tf.T
    
    df = dtcm[dtcm > 0].count()
    n = dtcm.shape[0]
    
    print('idf method: ', idf_method)
    if idf_method == 'standard':
        idf = np.log10(n / df)
    elif idf_method == 'max':
        idf = np.log10(df.max() / df)
    elif idf_method == 'smooth':
        idf = np.log10((1 + n) / (1 + df)) + 1
        
    tfidf = tf * idf
    
    # update vocab and bow
    bow['tf'] = tf.stack()
    bow['tfidf'] = tfidf.stack()
    
    vocab['df'] = df
    vocab['idf'] = idf
    vocab['tfidf_mean'] = tfidf[tfidf > 0].mean().fillna(0)
    vocab['tfidf_sum'] = tfidf.sum()
    vocab['tfidf_median'] = tfidf[tfidf > 0].median().fillna(0)
    vocab['tfidf_max'] = tfidf.max()
    
    # save work as csvs 
    if save_work:
        library.to_csv('LIB.csv')
        docs.to_csv('DOC.csv')
        tokens.to_csv('TOKEN.csv')
        bow.to_csv('BOW.csv')
        tfidf.to_csv('TFIDF.csv')
    
    print("Done.")
    return library, docs, tokens, vocab, bow, tfidf

In [4]:
LIB, DOC, TOKEN, VOCAB, BOW, TFIDF = create_files(transcripts, save_work = True)

File ID: 201
File ID: 202
File ID: 203
File ID: 204
File ID: 205
File ID: 206
File ID: 207
File ID: 208
File ID: 209
File ID: 210
File ID: 211
File ID: 212
File ID: 213
File ID: 214
File ID: 215
File ID: 216
File ID: 217
File ID: 218
File ID: 219
File ID: 220
File ID: 221
File ID: 222
File ID: 223
File ID: 224
File ID: 225
File ID: 226
File ID: 227
File ID: 228
File ID: 229
File ID: 229
File ID: 301
File ID: 302
File ID: 303
File ID: 304
File ID: 305
File ID: 306
File ID: 307
File ID: 308
File ID: 309
File ID: 310
File ID: 311
File ID: 312
File ID: 313
File ID: 314
File ID: 315
File ID: 316
File ID: 317
File ID: 318
File ID: 319
File ID: 320
File ID: 321
File ID: 322
File ID: 323
File ID: 324
File ID: 325
File ID: 326
File ID: 327
File ID: 328
File ID: 329
File ID: 330
File ID: 331
File ID: 332
File ID: 333
File ID: 334
File ID: 335
File ID: 336
File ID: 337
File ID: 338
File ID: 339
File ID: 340
File ID: 341
File ID: 342
File ID: 343
File ID: 344
File ID: 345
File ID: 346
File ID: 347

  tokens['term_str'] = tokens['token_str'].str.lower().str.replace('[\W_]', '')


creating vocab
creating bag-of-words
bag is: speaker_id
creating TFIDF
tf method:  sum
idf method:  standard
Done.


In [5]:
DOC.sample(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,line_str
speaker_id,line_num,Unnamed: 2_level_1
348,29,"Right? Yeah, very true."
220,24,Yeah. So I love that idea of excitement. So I ...
352,44,Yeah. Like when we're maybe transitioning from...
305,24,Mm-hmm. <affirmative>. Jayla.
211,5,"Yes, exactly. A norm is a rule that everyone a..."
213,7,"Hi, Jayla."
357,3,"Say the norm. Oh, okay. So I just go. Okay, ev..."
319,31,"Okay, cool. We gotta meet at the flag then if ..."
208,20,"Well, not, I mean, I guess I, I just was wonde..."
333,7,Nina.


In [6]:
DOC.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,line_str
speaker_id,line_num,Unnamed: 2_level_1
201,0,Begin simulation.
201,1,Good morning class. I'm Ms. Tachi. How are you...
201,2,"Good teacher. Good, good,"
201,3,Good. That's good to hear. So today we're gonn...
201,4,"Um,"
201,5,"Yeah. Good. Emily, Carlos, you guys Nice. So w..."
201,6,"Yeah, I got text from my mom. Um, I have, um, ..."
201,7,"Okay. Um, Emily, um, after small group, we can..."
201,8,Sorry.
201,9,"No worries. Yeah, we can talk about it. Um, Ca..."


In [7]:
LIB.head()

Unnamed: 0_level_0,book_file
speaker_id,Unnamed: 1_level_1
201,201_1.24.20_S_SC.txt
202,202_1.30.20_S_SC.txt
203,203_1.30.20_S_SC.txt
204,204_1.30.20_S_SC.txt
205,205_1.30.20_S_SC.txt


In [8]:
TOKEN.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,pos_tuple,pos,token_str,term_str,term_id
speaker_id,line_num,sent_num,token_num,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
201,0,0,0,"(Begin, DT)",DT,Begin,begin,154
201,0,0,1,"(simulation, NN)",NN,simulation,simulation,1469
201,1,0,0,"(Good, JJ)",JJ,Good,good,670
201,1,0,1,"(morning, NN)",NN,morning,morning,1021
201,1,0,2,"(class, NN)",NN,class,class,282
201,1,1,0,"(I, PRP)",PRP,I,i,772
201,1,1,1,"('m, VBP)",VBP,'m,m,948
201,1,1,2,"(Ms., NNP)",NNP,Ms.,ms,1033
201,1,1,3,"(Tachi, NNP)",NNP,Tachi,tachi,1602
201,1,2,0,"(How, WRB)",WRB,How,how,761


In [9]:
TOKEN.sample(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,pos_tuple,pos,token_str,term_str,term_id
speaker_id,line_num,sent_num,token_num,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
226,39,3,5,"(be, VB)",VB,be,be,146
344,44,1,10,"(first, JJ)",JJ,first,first,599
353,2,1,0,"(How, WRB)",WRB,How,how,761
327,6,3,2,"(you, PRP)",PRP,you,you,1913
357,36,8,5,"(you, PRP)",PRP,you,you,1913
304,21,2,5,"(our, PRP$)",PRP$,our,our,1130
313,6,3,1,"(can, MD)",MD,can,can,236
308,45,1,7,"(boring, NN)",NN,boring,boring,190
341,9,2,14,"(him, PRP)",PRP,him,him,734
311,35,2,11,"(so, RB)",RB,so,so,1494


In [10]:
VOCAB.head(10)

Unnamed: 0_level_0,term_rank,term_str,n,num,stop,stemmer_porter,stemmer_snowball,stemmer_lancaster,pos_max,p,h,df,idf,tfidf_mean,tfidf_sum,tfidf_median,tfidf_max
term_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
772,1,i,2809,0,1,i,i,i,PRP,0.037129,0.17641,89,0.0,0.0,0.0,0.0,0.0
1913,2,you,2347,0,1,you,you,you,PRP,0.031022,0.155438,89,0.0,0.0,0.0,0.0,0.0
1827,3,we,2187,0,1,we,we,we,PRP,0.028907,0.147786,89,0.0,0.0,0.0,0.0,0.0
1694,4,to,2016,0,1,to,to,to,TO,0.026647,0.13936,89,0.0,0.0,0.0,0.0,0.0
1648,5,that,1936,0,1,that,that,that,DT,0.02559,0.135325,89,0.0,0.0,0.0,0.0,0.0
1382,6,s,1356,0,1,s,s,s,VBZ,0.017923,0.103991,89,0.0,0.0,0.0,0.0,0.0
71,7,and,1267,0,1,and,and,and,CC,0.016747,0.098806,89,0.0,0.0,0.0,0.0,0.0
1494,8,so,1256,0,1,so,so,so,RB,0.016601,0.098157,89,0.0,0.0,0.0,0.0,0.0
1755,9,um,1125,0,0,um,um,um,NNP,0.01487,0.090282,89,0.0,0.0,0.0,0.0,0.0
6,10,a,1088,0,1,a,a,a,DT,0.014381,0.088007,89,0.0,0.0,0.0,0.0,0.0


In [11]:
VOCAB.sample(10)

Unnamed: 0_level_0,term_rank,term_str,n,num,stop,stemmer_porter,stemmer_snowball,stemmer_lancaster,pos_max,p,h,df,idf,tfidf_mean,tfidf_sum,tfidf_median,tfidf_max
term_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1134,1719,outbursts,1,0,0,outburst,outburst,outburst,NNS,1.3e-05,0.000214,1,1.94939,0.0022,0.0022,0.0022,0.0022
384,1459,cute,1,0,0,cute,cute,cut,NN,1.3e-05,0.000214,1,1.94939,0.002645,0.002645,0.002645,0.002645
1747,323,two,33,0,0,two,two,two,CD,0.000436,0.004869,18,0.694118,0.001471,0.026486,0.001017,0.00548
448,680,dog,6,0,0,dog,dog,dog,NN,7.9e-05,0.00108,4,1.34733,0.00245,0.009801,0.001542,0.005215
906,1182,lessons,2,0,0,lesson,lesson,lesson,NNS,2.6e-05,0.000402,2,1.64836,0.002014,0.004029,0.002014,0.002258
1899,715,wrote,5,0,0,wrote,wrote,wrot,VBD,6.6e-05,0.000918,4,1.34733,0.002035,0.00814,0.001558,0.003656
1894,493,wow,11,0,0,wow,wow,wow,NNP,0.000145,0.001853,9,0.995147,0.001603,0.014428,0.00123,0.002906
1287,1775,reasons,1,0,0,reason,reason,reason,NNS,1.3e-05,0.000214,1,1.94939,0.002565,0.002565,0.002565,0.002565
77,1560,announcements,1,0,0,announc,announc,annount,NNS,1.3e-05,0.000214,1,1.94939,0.002225,0.002225,0.002225,0.002225
947,1864,lunchtime,1,0,0,lunchtim,lunchtim,lunchtim,NN,1.3e-05,0.000214,1,1.94939,0.002094,0.002094,0.002094,0.002094


In [12]:
BOW.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,n,c,tf,tfidf
speaker_id,term_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
201,6,5,1,0.007102,0.0
201,11,7,1,0.009943,4.9e-05
201,23,5,1,0.007102,0.00618
201,28,1,1,0.00142,0.000736
201,38,4,1,0.005682,0.001738
201,52,4,1,0.005682,5.6e-05
201,57,1,1,0.00142,0.001486
201,60,2,1,0.002841,0.001301
201,70,1,1,0.00142,0.000343
201,71,18,1,0.025568,0.0


In [13]:
BOW.sample(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,n,c,tf,tfidf
speaker_id,term_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
360,1090,2,1,0.002928,0.00027
350,866,1,1,0.001074,0.001771
214,1542,2,1,0.001947,0.000676
341,784,4,1,0.004061,0.000914
305,609,1,1,0.001289,0.000539
222,65,1,1,0.001316,0.000567
306,1903,8,1,0.009569,4.7e-05
206,1661,1,1,0.001074,0.000409
356,492,1,1,0.001433,0.00136
314,414,1,1,0.001245,0.002428


In [14]:
TFIDF.head(10)

term_id,0,1,2,3,4,5,6,7,8,9,...,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923
speaker_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
201,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000106,0.0,0.000862,0.00129,0.0,0.0,0.0,0.0,0.0
202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000104,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
203,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000176,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00022,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
205,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000176,0.0,0.0,0.001071,0.0,0.0,0.0,0.0,0.0
206,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002094,0.0,...,0.0,0.000128,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
207,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,7.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
208,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3.4e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
209,0.002558,0.0,0.002558,0.002558,0.0,0.0,0.0,0.0,0.0,0.0,...,0.002558,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
210,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,4.6e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
TFIDF.sample(10)

term_id,0,1,2,3,4,5,6,7,8,9,...,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923
speaker_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,2.8e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.001525,0.001803
205,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000176,0.0,0.0,0.001071,0.0,0.0,0.0,0.0,0.0
339,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,7.6e-05,0.0,0.000616,0.0,0.0,0.0,0.0,0.0,0.0
337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3.4e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
215,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,5e-05,0.0,0.0,0.0,0.001521,0.0,0.0,0.0,0.0
304,0.0,0.001823,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000165,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
207,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,7.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
341,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000151,0.0,0.0,0.004609,0.0,0.0,0.0,0.0,0.0
344,0.0,0.0,0.0,0.0,0.001533,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000136,0.0,0.001381,0.0,0.0,0.0,0.0,0.0,0.0
