In [22]:
import os
import pandas as pd

from typing import Tuple

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
DATASET_FOLDER = 'feedback-prize-2021'

def load_data_csv() -> pd.DataFrame:
    return pd.read_csv(
        os.path.join(DATASET_FOLDER, 'train.csv'), 
        dtype={'discourse_id': 'int64', 'discourse_start': int, 'discourse_end': int})

def load_file(file_id: str, folder: str = 'train') -> str:
    path = os.path.join(DATASET_FOLDER, folder, file_id + '.txt')
    with open(path, 'r') as f:
        text = f.read()
    return text

def load_texts(folder: str = 'train') -> pd.Series:
    data_path = os.path.join(DATASET_FOLDER, folder)

    def read(filename):
        with open(os.path.join(data_path, filename), 'r') as f:
            text = f.read()
        return text

    return pd.Series({fname.replace('.txt', ''): read(fname) for fname in os.listdir(data_path)})     

def load_dataset() -> Tuple[pd.Series, pd.DataFrame]:
    return load_texts(), load_data_csv()


In [23]:
texts, data = load_dataset()

In [7]:
labels = data['discourse_type'].unique()

print(labels)

['Lead' 'Position' 'Evidence' 'Claim' 'Concluding Statement'
 'Counterclaim' 'Rebuttal']


In [27]:
test_texts = load_texts('test')

test_texts

0FB0700DAF44    During a group project, have you ever asked a ...
18409261F5C2    80% of Americans believe seeking multiple opin...
D46BCB48440A    When people ask for advice,they sometimes talk...
D72CB1C11673    Making choices in life can be very difficult. ...
DF920E0A7337    Have you ever asked more than one person for h...
dtype: object

In [21]:
parts_no_label = {}

for group_id, group in data.groupby(by='id'):
    init = -1
    for _, row in group.iterrows():
        g_init = row.discourse_start
        if g_init > init + 1:
            part = texts[group_id][init: g_init]
            if len(part) > 100:
                if group_id not in parts_no_label.keys():
                    parts_no_label[group_id] = [part]
                else:
                    parts_no_label[group_id].append(part)
        init = row.discourse_end
    if len(parts_no_label) > 1:
        break

for g, l in parts_no_label.items():
    print(g)
    print(l)
    

0045BE2791A2
["Why would you want bad grades? you should pass. Also it just doesn't seem right. Or you could get a scholarship into a college for sports but it wouldn't boost your grade up all the way to get in a great college. "]
004EA492DA04
["\n\nCommunity service should be something that somebody wants to do, not something that they have to do. Some people don't have time to do community service and some people have obligated their time to other things such as piano, dance, football, basketball, and etc. ", '\nIf my reasons did not convince you then, I think you must have a good reason why you want to do all the community service. ']


In [31]:
# example_id = '423A1CA112E2'
# example_id = '00203C45FC55'
# example_id = '0491C7BFA9B4'

example_id = '004EA492DA04'

example_text = texts[example_id]
group = data.groupby(by='id').get_group(example_id)

In [32]:
example_text

"Dear Principal,\n\nI think that making someone choose to do community service is the right thing to do. To make people do community service is unfair to the people who have after-school activities, tutoring, and other things they have committed to.\n\nCommunity service should be something that somebody wants to do, not something that they have to do. Some people don't have time to do community service and some people have obligated their time to other things such as piano, dance, football, basketball, and etc. It would be nice for people to give back ,but people can do it on their own time, not on a schedule.\n\nIf you did make people do community service, kids will not have extra time to do homework or study. You might see some of the smarter kids grades drop and that means that if you had kids that aren't already doing well in school would be worse in school than usual. I also think that it would put more work on the faculty. Which also means that you would have to start paying more

In [33]:
group

Unnamed: 0,id,discourse_id,discourse_start,discourse_end,discourse_text,discourse_type,discourse_type_num,predictionstring
68446,004EA492DA04,1615153210579,17,102,I think that making someone choose to do commu...,Position,Position 1,2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
68447,004EA492DA04,1615153219763,102,246,To make people do community service is unfair ...,Claim,Claim 1,18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 3...
68448,004EA492DA04,1615153264329,512,613,"It would be nice for people to give back ,but ...",Evidence,Evidence 1,85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 1...
68449,004EA492DA04,1615153278009,614,714,"If you did make people do community service, k...",Claim,Claim 2,107 108 109 110 111 112 113 114 115 116 117 11...
68450,004EA492DA04,1615153291966,714,879,You might see some of the smarter kids grades ...,Evidence,Evidence 2,126 127 128 129 130 131 132 133 134 135 136 13...
68451,004EA492DA04,1615153308612,879,936,I also think that it would put more work on th...,Claim,Claim 3,158 159 160 161 162 163 164 165 166 167 168 169
68452,004EA492DA04,1615153321443,936,1193,Which also means that you would have to start ...,Evidence,Evidence 3,170 171 172 173 174 175 176 177 178 179 180 18...
68453,004EA492DA04,1615247034583,1317,1814,I have had many good reasons why you should no...,Concluding Statement,Concluding Statement 1,242 243 244 245 246 247 248 249 250 251 252 25...


In [34]:
group['discourse_text'][68453]

"I have had many good reasons why you should not make everyone do community service like, money, after school activities and other reasons i hope I have changed your mind. I look forward to what you will choose I think that community service should be something that people want to do and not what you are making them do. This might make people think that community service is a punishment and it isn't, so I feel like they might think that this is a bad thing to do and never want to do it again. "

In [35]:
example_text[1317:]

"I have had many good reasons why you should not make everyone do community service like, money, after school activities and other reasons i hope I have changed your mind. I look forward to what you will choose I think that community service should be something that people want to do and not what you are making them do. This might make people think that community service is a punishment and it isn't, so I feel like they might think that this is a bad thing to do and never want to do it again. I hope that you have taking this letter seriously and have listed your options. Always give things second thoughts because one thing you might think is a good idea, but then later find out that it isn't.\n\nFrom, a student      "