In [5]:
import pandas as pd
import numpy as np
import os

def REINDEX(array):
    '''Sorts sentences by a given index in asencing order'''
    array_ascend = np.sort(array)
    unique, idx = [], []
    for i, x in enumerate(array_ascend):
        if x not in unique:
            idx.append(np.where(array==x)[0].tolist())
            unique.append(x)
    return sum(idx, [])

def main(df, exp_thresh=4.5, plaus_thresh=5, val_thresh=2, condition=None, drop=True):
    '''exp_thresh: To drop those zero-cloze unexpected sentences rated as highly expected. If set to, for instance, 5.75,
       then those zero-cloze sentences with rated expectancy >= 5.75 will be dropped. If set to 8, no such dropping will happen
       as expectancy was rated on a scale of 1 to 7.
       
       plaus_thresh: To drop sentences with plausibility <= plaus_thresh.
       
       val_thresh: If condition="Emo", then sentences with absolute valence <= val_thresh will be dropped
                   If condition="Neu", then sentences with absolute valence >= val_thesh will be dropped. '''
    df = df.reindex(REINDEX(df['Cloze'])).reset_index(drop=True);
    
    num_zero_cloze = len([i for i in df['Cloze'].tolist() if i==0])
    new_df = pd.DataFrame(None, columns=df.columns)
    for i in range(len(df.head(num_zero_cloze))):
        if df['Exp. mean'].iloc[i] <= exp_thresh: new_df.loc[len(new_df)] = df.iloc[i]     

    for i in range(len(df.head(num_zero_cloze)),len(df)):
        if df['Index'].iloc[i] in new_df['Index'].tolist(): new_df.loc[len(new_df)] = df.iloc[i]
    
    new_df = new_df.reindex(REINDEX(new_df['Index'])).reset_index(drop=True)
    
    new_df = new_df[new_df['Plaus. mean']>=plaus_thresh]
    if condition == 'Emo':
        new_df = new_df[(new_df['Val. mean']>=5+val_thresh) | (new_df['Val. mean']<=5-val_thresh)].reset_index(drop=True)
    elif condition == 'Neu':
        new_df = new_df[(new_df['Val. mean']<=5+val_thresh) & (new_df['Val. mean']>=5-val_thresh)].reset_index(drop=True)
    new_df = new_df.reindex(REINDEX(new_df['Index'])).reset_index(drop=True)
    
    # drops sentences that do not have a counterpart
    if drop == True:
        idx = new_df['Index'].tolist()
        for i, x in enumerate(idx):
            if idx.count(x) == 1:
                new_df = new_df.drop(index=i)
                
    if drop == True:
        sc, wc = new_df[(new_df['Index']%4==0) | (new_df['Index']%4==1)], new_df[(new_df['Index']%4==2) | (new_df['Index']%4==3)]
        print('number of '+condition+'_SC frames =', int(len(new_df[(new_df['Index']%4==0) | (new_df['Index']%4==1)])/2))
        display(new_df[(new_df['Index']%4==0) | (new_df['Index']%4==1)])
        print('\n')
        print('number of '+condition+'_WC frames =', int(len(new_df[(new_df['Index']%4==2) | (new_df['Index']%4==3)])/2))
        display(new_df[(new_df['Index']%4==2) | (new_df['Index']%4==3)])
        print('\n')
    else:
        sc, wc = new_df[(new_df['Index']%4==0) | (new_df['Index']%4==1)], new_df[(new_df['Index']%4==2) | (new_df['Index']%4==3)]
        print('number of '+condition+'_SC sentences =', len(new_df[(new_df['Index']%4==0) | (new_df['Index']%4==1)]))
        display(new_df[(new_df['Index']%4==0) | (new_df['Index']%4==1)])
        print('\n')
        print('number of '+condition+'_WC sentences =', len(new_df[(new_df['Index']%4==2) | (new_df['Index']%4==3)]))
        display(new_df[(new_df['Index']%4==2) | (new_df['Index']%4==3)])
        print('\n')
        
    
    return sc, wc

In [7]:
# If we don't drop singled out sentences
os.chdir('/home/amandalin047/july_ratings/results/1st_round')
df = pd.read_excel('1st_all_means.xlsx')

emo_sc, emo_wc = main(df, exp_thresh=8, plaus_thresh=4.5, val_thresh=1.8, condition='Emo', drop=False);
neu_sc, neu_wc = main(df, exp_thresh=8, plaus_thresh=4.5, val_thresh=0.9, condition='Neu', drop=False);

number of Emo_SC sentences = 48


Unnamed: 0,Index,Sentences,Cloze,Exp. mean,Plaus. mean,Val. mean,Arous. mean
3,12,你這樣花光家產完全不知孝順，別到父母都掛了才後悔。,0.866667,5.733333,5.8,2.428571,4.285714
8,28,他痛苦到鐵了心要自殺並留下絕筆，便於今晨寫下遺書。,0.933333,5.6,6.133333,1.785714,4.428571
9,32,不滿官員暴利欺壓，憤怒的鄉民聚集凱道大聲喊話。,0.0,4.533333,5.2,3.0,3.714286
10,32,不滿官員暴利欺壓，憤怒的鄉民聚集凱道大聲抗議。,0.888889,6.266667,6.4,3.0,3.714286
13,44,他車禍重傷血肉模糊狀況淒慘，搶救的希望全沒了。,0.0,5.2,5.4,2.071429,4.428571
14,44,他車禍重傷血肉模糊狀況淒慘，搶救的希望渺茫。,0.866667,5.666667,6.2,2.071429,4.428571
16,56,聽到女兒被綁架犯毆打的聲音，小雅當場放聲大哭。,0.888889,6.066667,6.0,2.142857,5.214286
17,60,我精心籌備浪漫的派對，要給女友大大的驚喜。,1.0,6.333333,6.333333,6.857143,4.285714
18,64,她沉魚落雁傾城傾國，連西施都會讚歎她的美貌。,0.8,5.466667,5.666667,6.928571,3.357143
19,68,發現老公外遇偷吃，妻子氣不過決定和他談判。,0.0,4.6,5.666667,2.785714,4.642857




number of Emo_WC sentences = 47


Unnamed: 0,Index,Sentences,Cloze,Exp. mean,Plaus. mean,Val. mean,Arous. mean
0,6,為了獲取個人私利，他竟不惜放棄家人的名譽。,0.111111,4.666667,5.733333,2.5,3.357143
1,10,他總愛設計卑鄙騙局誘人上當，行徑又狡猾又難猜。,0.0,4.333333,5.333333,2.142857,4.0
2,10,他總愛設計卑鄙騙局誘人上當，行徑又狡猾又陰險。,0.111111,6.4,6.333333,2.142857,4.0
4,14,他哮喘急性發作又不要命地熬夜做工，已經快要缺氧。,0.0,3.6,5.066667,2.785714,4.071429
5,14,他哮喘急性發作又不要命地熬夜做工，已經快要死去。,0.125,4.2,5.6,2.785714,4.071429
6,22,最愛的奶奶突然間去世，小明臉上的神情變得很冷淡。,0.0,3.533333,4.733333,2.428571,5.0
7,22,最愛的奶奶突然間去世，小明臉上的神情變得很黯然。,0.133333,5.733333,6.4,2.428571,5.0
11,38,少年時我成天酗酒吸毒，當時我覺得我的生命好無趣。,0.133333,4.066667,4.933333,2.785714,3.857143
12,42,罹患極重度憂鬱症後，他常像洩了氣的皮球一樣神色憔悴。,0.133333,5.066667,5.666667,2.214286,3.714286
15,50,總是第一名又努力的阿邱，在學習方面有非常棒的成就。,0.2,5.933333,6.466667,6.928571,2.428571




number of Neu_SC sentences = 82


Unnamed: 0,Index,Sentences,Cloze,Exp. mean,Plaus. mean,Val. mean,Arous. mean
5,9,人與人相處靠的是真心，人與人相遇則是契機。,0.0,4.533333,5.933333,5.357143,1.785714
6,9,人與人相處靠的是真心，人與人相遇則是緣分。,0.8,6.333333,6.733333,5.357143,1.785714
7,17,這一個問題可以問一下設計師，聽聽他的意見。,0.8,6.4,6.666667,5.142857,2.357143
11,29,我上次出國行李安檢卡關，還好後來有順利趕上來了。,0.0,4.933333,5.733333,5.5,3.285714
12,29,我上次出國行李安檢卡關，還好後來有順利趕上飛機。,0.733333,6.333333,6.4,5.5,3.285714
...,...,...,...,...,...,...,...
148,1365,夜晚時應避免在屋內大聲吵鬧，以免吵到保全。,0.0,2.588235,4.705882,4.571429,2.214286
149,1365,夜晚時應避免在屋內大聲吵鬧，以免吵到鄰居。,0.9,6.823529,6.882353,4.571429,2.214286
150,1376,你不能什麼都想要，這樣實在太貪心。,0.9,6.705882,6.705882,4.357143,3.214286
151,1405,小華明天要考英文，難怪他今天這麼認真在背筆記。,0.0,4.941176,6.470588,5.0,1.928571




number of Neu_WC sentences = 71


Unnamed: 0,Index,Sentences,Cloze,Exp. mean,Plaus. mean,Val. mean,Arous. mean
0,2,男星在臉書放與辣妹親暱的合照，粉絲們說是姐姐。,0.0,3.533333,5.066667,4.285714,3.142857
1,2,男星在臉書放與辣妹親暱的合照，粉絲們說是女友。,0.066667,5.0,5.533333,4.285714,3.142857
2,3,每天上學都是爸爸接送的小明，放學時會在門口等爸爸。,0.4,6.066667,6.066667,5.071429,1.5
3,7,這位演員說他拍片的原因，是想要展現出他的理念。,0.0,5.266667,5.866667,5.785714,2.357143
4,7,這位演員說他拍片的原因，是想要展現出他的魅力。,0.2,5.266667,5.733333,5.785714,2.357143
...,...,...,...,...,...,...,...
96,579,我開了一家公司，在財務方面都請下屬來處理。,0.333333,6.352941,6.529412,4.857143,1.928571
97,583,注重膚質保養的人都會擦這罐，才不會角化。,0.0,4.352941,5.529412,4.928571,2.5
98,583,注重膚質保養的人都會擦這罐，才不會乾燥。,0.333333,5.882353,6.470588,4.928571,2.5
99,587,這部電影的劇情內容，沒想到是以這樣的方式安排。,0.0,5.705882,6.411765,4.642857,3.5






In [9]:
# If we drop singled out sentences
os.chdir('/home/amandalin047/july_ratings/results/1st_round')
df1 = pd.read_excel('1st_all_means.xlsx')
os.chdir('/home/amandalin047/july_ratings/results/2nd_round')
df2 = pd.DataFrame(data=None, columns=pd.read_excel('2nd_all_means.xlsx').columns)

emo_sc, emo_wc = main(df, exp_thresh=8, plaus_thresh=4.5, val_thresh=1.8, condition='Emo', drop=True);
neu_sc, neu_wc = main(df, exp_thresh=8, plaus_thresh=4.5, val_thresh=0.9, condition='Neu', drop=True);

number of Emo_SC frames = 18


Unnamed: 0,Index,Sentences,Cloze,Exp. mean,Plaus. mean,Val. mean,Arous. mean
9,32,不滿官員暴利欺壓，憤怒的鄉民聚集凱道大聲喊話。,0.0,4.533333,5.2,3.0,3.714286
10,32,不滿官員暴利欺壓，憤怒的鄉民聚集凱道大聲抗議。,0.888889,6.266667,6.4,3.0,3.714286
13,44,他車禍重傷血肉模糊狀況淒慘，搶救的希望全沒了。,0.0,5.2,5.4,2.071429,4.428571
14,44,他車禍重傷血肉模糊狀況淒慘，搶救的希望渺茫。,0.866667,5.666667,6.2,2.071429,4.428571
19,68,發現老公外遇偷吃，妻子氣不過決定和他談判。,0.0,4.6,5.666667,2.785714,4.642857
20,68,發現老公外遇偷吃，妻子氣不過決定和他離婚。,0.933333,6.2,6.466667,2.785714,4.642857
23,76,感謝你在我最低潮時給予我無盡的支持，我內心充滿暖意。,0.0,5.2,5.866667,7.071429,4.928571
24,76,感謝你在我最低潮時給予我無盡的支持，我內心充滿感激。,0.722222,6.266667,6.4,7.071429,4.928571
25,77,在森林緩緩漫步並感受微風與陽光，可以為內心帶來暖意。,0.0,4.533333,5.866667,7.0,3.214286
26,77,在森林緩緩漫步並感受微風與陽光，可以為內心帶來平靜。,0.8125,6.466667,6.8,7.0,3.214286




number of Emo_WC frames = 17


Unnamed: 0,Index,Sentences,Cloze,Exp. mean,Plaus. mean,Val. mean,Arous. mean
1,10,他總愛設計卑鄙騙局誘人上當，行徑又狡猾又難猜。,0.0,4.333333,5.333333,2.142857,4.0
2,10,他總愛設計卑鄙騙局誘人上當，行徑又狡猾又陰險。,0.111111,6.4,6.333333,2.142857,4.0
4,14,他哮喘急性發作又不要命地熬夜做工，已經快要缺氧。,0.0,3.6,5.066667,2.785714,4.071429
5,14,他哮喘急性發作又不要命地熬夜做工，已經快要死去。,0.125,4.2,5.6,2.785714,4.071429
6,22,最愛的奶奶突然間去世，小明臉上的神情變得很冷淡。,0.0,3.533333,4.733333,2.428571,5.0
7,22,最愛的奶奶突然間去世，小明臉上的神情變得很黯然。,0.133333,5.733333,6.4,2.428571,5.0
21,70,這棟爬滿藤蔓的凶宅陰森恐怖，滿地都是青苔。,0.0,3.8,5.4,3.0,4.785714
22,70,這棟爬滿藤蔓的凶宅陰森恐怖，滿地都是血跡。,0.133333,4.466667,6.0,3.0,4.785714
27,78,孩子前腳剛出門就遇車禍身亡，父母倆都哭到天亮。,0.0,3.733333,4.733333,1.928571,5.214286
28,78,孩子前腳剛出門就遇車禍身亡，父母倆都哭到斷腸。,0.133333,5.533333,6.466667,1.928571,5.214286




number of Neu_SC frames = 33


Unnamed: 0,Index,Sentences,Cloze,Exp. mean,Plaus. mean,Val. mean,Arous. mean
5,9,人與人相處靠的是真心，人與人相遇則是契機。,0.0,4.533333,5.933333,5.357143,1.785714
6,9,人與人相處靠的是真心，人與人相遇則是緣分。,0.8,6.333333,6.733333,5.357143,1.785714
11,29,我上次出國行李安檢卡關，還好後來有順利趕上來了。,0.0,4.933333,5.733333,5.5,3.285714
12,29,我上次出國行李安檢卡關，還好後來有順利趕上飛機。,0.733333,6.333333,6.4,5.5,3.285714
13,33,他最近有點低潮，看到他可以給他加油喊話。,0.0,5.333333,5.6,5.142857,3.142857
...,...,...,...,...,...,...,...
147,1357,小明放學後都會搭捷運，再到站牌這裡等公車。,0.9,6.470588,6.470588,5.071429,1.571429
148,1365,夜晚時應避免在屋內大聲吵鬧，以免吵到保全。,0.0,2.588235,4.705882,4.571429,2.214286
149,1365,夜晚時應避免在屋內大聲吵鬧，以免吵到鄰居。,0.9,6.823529,6.882353,4.571429,2.214286
151,1405,小華明天要考英文，難怪他今天這麼認真在背筆記。,0.0,4.941176,6.470588,5.0,1.928571




number of Neu_WC frames = 23


Unnamed: 0,Index,Sentences,Cloze,Exp. mean,Plaus. mean,Val. mean,Arous. mean
0,2,男星在臉書放與辣妹親暱的合照，粉絲們說是姐姐。,0.0,3.533333,5.066667,4.285714,3.142857
1,2,男星在臉書放與辣妹親暱的合照，粉絲們說是女友。,0.066667,5.0,5.533333,4.285714,3.142857
3,7,這位演員說他拍片的原因，是想要展現出他的理念。,0.0,5.266667,5.866667,5.785714,2.357143
4,7,這位演員說他拍片的原因，是想要展現出他的魅力。,0.2,5.266667,5.733333,5.785714,2.357143
9,27,今天的課程，主要是為了要讓學生瞭解這個篇章。,0.0,4.733333,6.066667,5.071429,1.571429
10,27,今天的課程，主要是為了要讓學生瞭解這個概念。,0.266667,6.533333,6.8,5.071429,1.571429
32,71,今早和家人出外踏青，看到山上有許多青苔。,0.0,3.266667,5.533333,5.642857,1.857143
33,71,今早和家人出外踏青，看到山上有許多遊客。,0.2,5.333333,6.2,5.642857,1.857143
34,79,他擔任公園清掃人員，打掃完已經快要天亮。,0.0,4.533333,4.933333,5.0,2.5
35,79,他擔任公園清掃人員，打掃完已經快要中午。,0.266667,4.466667,5.866667,5.0,2.5




