In [43]:
import pandas as pd
import utils

In [44]:
# Conjunctions under analysis
CONJUNCTIONS = ['and', 'or', 'but', 'nor']

# Categories under analysis
NOUN_CATEGORIES = ['NN', 'NNS', 'NNP', 'NNPS', 'NP', 'NX']
VERB_CATEGORIES = ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'VP']
ADJ_CATEGORIES = ['JJ', 'JJR', 'JJS', 'ADJP']
ADV_CATEGORIES = ['RB', 'RBR', 'RBS', 'ADVP']

PHRASAL_CATEGORIES = ['NP', 'VP', 'ADJP', 'ADVP']

In [59]:
def likes_df(df):
    '''
    Returns a DataFrame of the like coordinations contained in the
    given DataFrame.

    Keyword Arguments:
        df -- DataFrame containing coordinations
    Return:
        Dataframe of like coordinations
    '''
    
    nouns = df[(df['1st Conjunct Category'].isin(NOUN_CATEGORIES)) & (
        df['2nd Conjunct Category'].isin(NOUN_CATEGORIES))]

    verbs = df[(df['1st Conjunct Category'].isin(VERB_CATEGORIES)) & (
        df['2nd Conjunct Category'].isin(VERB_CATEGORIES))]

    adjps = df[(df['1st Conjunct Category'].isin(ADJ_CATEGORIES)) & (
        df['2nd Conjunct Category'].isin(ADJ_CATEGORIES))]

    advps = df[(df['1st Conjunct Category'].isin(ADV_CATEGORIES)) & (
        df['2nd Conjunct Category'].isin(ADV_CATEGORIES))]

    likes = pd.concat([nouns, verbs, adjps, advps],
                      axis=0, ignore_index=True)

    return likes


def unlikes_df(df):
    '''
    Returns a DataFrame of the unlike coordinations contained in the
    given DataFrame.

    Keyword Arguments:
        df -- DataFrame containing coordinations
    Return:
        Dataframe of unlike coordinations
    '''

    df = df[df['1st Conjunct Category'].isin(PHRASAL_CATEGORIES)]
    df = df[df['2nd Conjunct Category'].isin(PHRASAL_CATEGORIES)]

    # Get unlike category combinations
    unlikes = df.loc[df['1st Conjunct Category']
                     != df['2nd Conjunct Category']]

    return unlikes

In [46]:
# Load CSV file with coordination samples
samples = pd.read_csv("csv/samples.csv", index_col=None, header=0)

# Load CSV files with raters' judgments
rater1 = pd.read_csv("csv/raters/rater1.csv", index_col=None, header=0)
rater2 = pd.read_csv("csv/raters/rater2.csv", index_col=None, header=0)
rater3 = pd.read_csv("csv/raters/rater3.csv", index_col=None, header=0)

# Take majority of three raters' judgments
r1 = rater1['Correct?']
r2 = rater2['Correct?']
r3 = rater3['Correct?']
samples['Correct? (Majority)'] = r1 & r2 & r3
correct = samples[samples['Correct? (Majority)']]
correct.to_csv('csv/correct_samples.csv')

In [63]:
unlikes = unlikes_df(correct)
likes = likes_df(correct)