In [9]:
import pandas as pd

In [10]:
df = pd.read_csv('raw-datasets/socialisolation.csv')

In [11]:
# create items.csv file
items_df = df[['title', 'abstract']].sample(frac=1)
items_df.columns = ['itemTitle', 'itemDescription']
items_df.to_csv('processed-datasets/socialisolation.items.csv', index=False)

In [12]:
# create filters.csv file
filters = {
    'no intervention': 'Does the paper describe an intervention?',
    'no tech': 'Are technology and/or technological solutions involved?',
    'not OA': 'Are the elderly involved?',
    'not SI': 'Is it related to loneliness, social isolation, or social connectedness?'
}

filters_df = pd.DataFrame(list(filters.values()), columns=['filterDescription'])
filters_df.to_csv('processed-datasets/socialisolation.filters.csv', index=False)

In [13]:
# create tests.csv
tests = []
tests_src_df = df[['title', 'abstract', 'criteria']]

for index, row in tests_src_df.iterrows():
    for criteria in filters.keys():
        tests.append({
            'itemTitle': row['title'],
            'itemDescription': row['abstract'],
            'filterDescription': filters[criteria],
            'answer': 'no' if criteria in row['criteria'] else 'yes'
        })
        

tests_df = pd.DataFrame(tests).sample(frac=1)
tests_df.to_csv('processed-datasets/socialisolation.tests.csv', index=False)

In [14]:
# create datasets just for one worker. 
#   maxTasksRule = 3
#   votesPerTaskRule = 1
#   initialTestsRule = 2
#   testFrequencyRule = 2

# items.csv
items_df_1w = df[['title', 'abstract']].sample(frac=1).iloc[:3]
items_df_1w.columns = ['itemTitle', 'itemDescription']
items_df_1w.to_csv('processed-datasets/socialisolation.items-1w.csv', index=False)

# filters.csv
filters_df_1w = filters_df.iloc[:1]
filters_df_1w.to_csv('processed-datasets/socialisolation.filters-1w.csv', index=False)

# tests.csv
selected_filter = filters_df_1w.iloc[0]
tests_df_1w = tests_df.sample(frac=1).loc[lambda df: df.filterDescription == selected_filter.filterDescription, :].iloc[:3]
tests_df_1w.to_csv('processed-datasets/socialisolation.tests-1w.csv', index=False)

In [15]:
# create tests-200
tests_df_200 = tests_df.sample(frac=1).loc[lambda df: df.filterDescription == selected_filter.filterDescription, :].iloc[:200]
tests_df_200.to_csv('processed-datasets/socialisolation.tests-200.csv', index=False)

In [16]:
# create tests-20 intervention
tests_df_20 = tests_df.sample(frac=1).loc[lambda df: (df.filterDescription == selected_filter.filterDescription) & ('intervention' in df.itemDescription) & (df.answer == 'yes'), :].iloc[:20]
tests_df_20.to_csv('processed-datasets/socialisolation.tests-intervention-20.csv', index=False)





In [19]:
# create tests-small-intervention.csv
tests_small = []
df_small = pd.read_csv('raw-datasets/socialisolation.tests-intervention-small-raw.csv')
tests_small_src_df = df_small[['title', 'abstract', 'criteria', 'has intervention']]

for index, row in tests_small_src_df.iterrows():
    tests_small.append({
        'itemTitle': row['title'],
        'itemDescription': row['abstract'],
        'filterDescription': filters['no intervention'],
        'answer': 'yes' if row['has intervention'] == 1 else 'no'
    })
        

tests_small_df = pd.DataFrame(tests_small).sample(frac=1)
tests_small_df.to_csv('processed-datasets/socialisolation.tests-intervention-small.csv', index=False)