In [17]:
import pandas as pd
import json

## Constants

In [18]:
# CONSTANTS

# Object template to communicate data to the CM

TEMPLATE = {
    "id": None,
    "userid": None,
    "origin": None,
    "source_id": "fake90e6d701748f08514b01",
    "source": "fake",
    "pname": None,
    "pvalue": None,
    "context": "application",
    "datapoints": 0,
    "category": None
}

FOLDER = "./data/"

contribActs = ['English translation','degari_extracted_emotions','plutchik_emotions','emotion_recognition','subjectivity_detection', 'polarity_classification','intensity_ranking','MFT_Values','EkmanEmotions']
contribActs2 = ['emotions', 'sentiment', 'entities']
contribActs.extend(contribActs2)

C_CONTRIB = dict(
    datafile = 'DMH_CombinedStories.json',
    userIdAtt = 'User ID',
    artifactIdAtt = 'Object number',
    contribIdAtt = '_id',
    contribAtts = contribActs,
    ugcfile = "ugcContributions.json"
)

_ID = 0

## Users

It transforms demographic data from a LDH dataset into the ugc data sent to the CM by the UM

In [19]:
# User constants

C_USER =  dict(
    datafile ='DMH_CombinedStories.json',
    demographicAtts = ['End-user community'],
    explicitCommunityAtt = 'End-user community',
    userIdAtt = 'User ID',
    ugcfile = "ugcUsers.json"
)

In [20]:
with open(FOLDER+C_USER["datafile"],encoding='utf-8') as f:
    userData = json.load(f)

In [21]:
userData[0]

{'_id': '63738b5eb6bab71ffc679aab',
 'Object name': 'Cup - Myrna',
 'Object number': '44176',
 'Date of recording': '15/3/2022',
 'Context/event/workshop': 'Workshop for a group, at Design Museum',
 'Contributor name': 'Eeva-Kaarina Öyry',
 'User ID': '751188',
 'End-user community': 'Senior citizens',
 'Story ID(#)': '#44176S3',
 'Original language ': 'Finnish',
 'Audio recording file name': 'MYRNA 3, Group recording, 15.3.2022 Helsinki',
 'Start timestamp': '0:58',
 'End timestamp': '1:32',
 'Finnish translation': 'Ja nyt mun mopo tuolta, Tunturimopo, toi mieleen matkan. Et voisin mennä hakemaan sitä Myrnaa takaisin Suomeen, että sitten isosiskon pojatkin sais perinnöksi, perinnöksi nää kahvikupit. Itse en sitä sen takia halunnut, koska se on aina käsin tiskattava. Joo.',
 'English translation': "And now my moped there, the Tunturi moped, reminded me of a trip. So, I could go to get those Myrnas back to Finland, so that the big sister's sons could also inherit these cups. Personally,

In [22]:
postDict= {}
for user in userData:  
    #user = user["0"]
    if user[C_USER["userIdAtt"]] not in postDict:
        result = []
        currentData = TEMPLATE.copy()
        currentData["category"] = "demographics"
        currentData["userid"] = user[C_USER["userIdAtt"]]
        for item in C_USER["demographicAtts"]:
            if item in user:
                attData = currentData.copy()
                attData["id"] = _ID
                _ID+=1
                attData["origin"] = attData["userid"]
                attData["pvalue"] = user[item]
                item = "explicit-community" if item == C_USER["explicitCommunityAtt"] else item
                attData["pname"]  = item 
                result.append(attData)
        postDict[user[C_USER["userIdAtt"]]] = result

In [23]:
with open(FOLDER+C_USER["ugcfile"], "w") as outfile:
    json.dump(postDict, outfile, indent=2)

In [24]:
# postDict is a dictionary with userids as fields and arrays of demographic data for the CM as values
# To POST users to a CM server in localhost:8080 uncomment next lines
# server = "http://localhost:8080"
# for key, value in postDict.items():
#    response=requests.post(f'{server}/v1.1/users/{key}/update-generated-content', json = value)
#    procesar el resultado de response

## Contributions


In [25]:
with open(FOLDER+C_CONTRIB["datafile"]) as f:
    contribData = json.load(f)

In [26]:
postDict= { att: {} for att in C_CONTRIB["contribAtts"]}

In [27]:
for contrib in contribData:  
    #contrib = contrib["0"]
    currentData = TEMPLATE.copy()
    currentData["category"] = "interest"
    currentData["userid"] = contrib[C_CONTRIB["userIdAtt"]]
    currentData["id"] = contrib[C_CONTRIB["contribIdAtt"]]
    currentData["origin"] = contrib[C_CONTRIB["artifactIdAtt"]]
    for interactionAtt in C_CONTRIB["contribAtts"]:
        if interactionAtt in contrib:
            interactionData = currentData.copy()
            interactionData['pname'] = interactionAtt
            interactionData['pvalue'] = contrib[interactionAtt]
            if contrib[C_CONTRIB["userIdAtt"]] in postDict[interactionAtt]:
                postDict[interactionAtt][contrib[C_CONTRIB["userIdAtt"]]].append(interactionData)
            else:
                postDict[interactionAtt][contrib[C_CONTRIB["userIdAtt"]]] = [interactionData]

In [28]:
postDict

{'English translation': {'751188': [{'id': '63738b5eb6bab71ffc679aab',
    'userid': '751188',
    'origin': '44176',
    'source_id': 'fake90e6d701748f08514b01',
    'source': 'fake',
    'pname': 'English translation',
    'pvalue': "And now my moped there, the Tunturi moped, reminded me of a trip. So, I could go to get those Myrnas back to Finland, so that the big sister's sons could also inherit these cups. Personally, I didn’t want these because they always have to be washed by hand. Yeah.",
    'context': 'application',
    'datapoints': 0,
    'category': 'interest'},
   {'id': '63738b6f619d8873905ef709',
    'userid': '751188',
    'origin': '32361',
    'source_id': 'fake90e6d701748f08514b01',
    'source': 'fake',
    'pname': 'English translation',
    'pvalue': 'I myself have a rug woven by my late mother, “Muisto Karjalasta”. By Norma Heimola. It was made in the 1970s. And she had this situation in the 1950s, after being evacuated, that with the wives of her uncles, they u

# Change pname to allow the CM to know the text associated to the interaction

In [29]:
changeNameDict = {}
changeNameDict['English translation'] = 'text'

In [30]:
postDict2 = {}
for key in postDict:
    if (key in changeNameDict):
        for userid in postDict[key]:
            for interaction in postDict[key][userid]:
                interaction['pname'] = changeNameDict[key]
        postDict2[changeNameDict[key]] = postDict[key]
    else:
        postDict2[key] = postDict[key]
postDict = postDict2 

In [31]:
# Save the file

In [32]:
with open(FOLDER+C_CONTRIB["ugcfile"], "w") as outfile:
    json.dump(postDict, outfile, indent=2)