In [1]:
import pandas as pd
import json

## Constants

In [2]:
# CONSTANTS

# Object template to communicate data to the CM

TEMPLATE = {
    "id": None,
    "userid": None,
    "origin": None,
    "source_id": "fake90e6d701748f08514b01",
    "source": "fake",
    "pname": None,
    "pvalue": None,
    "context": "application",
    "datapoints": 0,
    "category": None
}

FOLDER = "./data/"


C_CONTRIB = dict(
    datafile = 'DMH_127-AudioRecorded-DEGARI.json',
    userIdAtt = 'User ID',
    artifactIdAtt = 'Object number',
    contribIdAtt = '_id',
    contribAtts = ['English translation','degari_extracted_emotions','plutchik_emotions','emotion_recognition','subjectivity_detection', 'polarity_classification','intensity_ranking'],
    ugcfile = "ugcContributions.json"
)

_ID = 0

## Users

It transforms demographic data from a LDH dataset into the ugc data sent to the CM by the UM

In [3]:
# User constants

C_USER =  dict(
    datafile ='DMH_127-AudioRecorded-DEGARI.json',
    demographicAtts = ['End-user community'],
    explicitCommunityAtt = 'End-user community',
    userIdAtt = 'User ID',
    ugcfile = "ugcUsers.json"
)

In [4]:
with open(FOLDER+C_USER["datafile"]) as f:
    userData = json.load(f)

In [5]:
userData[0]

{'0': {'_id': '63738b60fb9758706e727881',
  'Object name': 'Cup - Myrna',
  'Object number': '44176',
  'Date of recording': '17/3/2022',
  'Context/event/workshop': 'Lahti',
  'Contributor name': 'Terttu Valonen',
  'User ID': '677821',
  'End-user community': 'Senior citizens',
  'Story ID(#)': '#44176S11',
  'Original language ': 'Finnish',
  'Audio recording file name': 'MYRNA 5, Terttu Valonen, 17.3.2022 Lahti',
  'Start timestamp': '2:10',
  'End timestamp': '2:47',
  'Finnish translation': 'Ja just tää Myrnan kahvikuppi, että kun mä oon tämmöinen keräilijä, mulla on paljon näitä, että ottaa useampia. Kahdentoista hengen kahviastiasto löytyy kannuineen mun, tota, kodista. Ja tota on kaapit täynnä. Olen... Tykkään näistä isoista. Mulla on iso ruokasali ja siel on nää kaikki pitkät piirongit. Ne on täynnä näitä mistä mä en raaski luopua, enkä vaikken mä niitä paljon käytä, kun silloin tällöin, mutta mä rakastan kaikkea vanhaa.\xa0\xa0',
  'English translation': 'And just this Myrna

In [6]:
postDict= {}
for user in userData:  
    user = user["0"]
    result = []
    currentData = TEMPLATE.copy()
    currentData["category"] = "demographics"
    currentData["userid"] = user[C_USER["userIdAtt"]]
    for item in C_USER["demographicAtts"]:
        if item in user:
            attData = currentData.copy()
            attData["id"] = _ID
            _ID+=1
            attData["origin"] = attData["userid"]
            attData["pvalue"] = user[item]
            item = "explicit-community" if item == C_USER["explicitCommunityAtt"] else item
            attData["pname"]  = item 
            result.append(attData)
    postDict[user[C_USER["userIdAtt"]]] = result

In [7]:
with open(FOLDER+C_USER["ugcfile"], "w") as outfile:
    json.dump(postDict, outfile, indent=2)

In [8]:
# postDict is a dictionary with userids as fields and arrays of demographic data for the CM as values
# To POST users to a CM server in localhost:8080 uncomment next lines
# server = "http://localhost:8080"
# for key, value in postDict.items():
#    response=requests.post(f'{server}/v1.1/users/{key}/update-generated-content', json = value)
#    procesar el resultado de response

## Contributions


In [9]:
with open(FOLDER+C_CONTRIB["datafile"]) as f:
    contribData = json.load(f)

In [10]:
postDict= { att: {} for att in C_CONTRIB["contribAtts"]}

In [11]:
for contrib in contribData:  
    contrib = contrib["0"]
    currentData = TEMPLATE.copy()
    currentData["category"] = "interest"
    currentData["userid"] = contrib[C_CONTRIB["userIdAtt"]]
    currentData["id"] = contrib[C_CONTRIB["contribIdAtt"]]
    currentData["origin"] = contrib[C_CONTRIB["artifactIdAtt"]]
    for interactionAtt in C_CONTRIB["contribAtts"]:
        if interactionAtt in contrib:
            interactionData = currentData.copy()
            interactionData['pname'] = interactionAtt
            interactionData['pvalue'] = contrib[interactionAtt]
            if contrib[C_CONTRIB["userIdAtt"]] in postDict[interactionAtt]:
                postDict[interactionAtt][contrib[C_CONTRIB["userIdAtt"]]].append(interactionData)
            else:
                postDict[interactionAtt][contrib[C_CONTRIB["userIdAtt"]]] = [interactionData]

In [12]:
postDict

{'English translation': {'677821': [{'id': '63738b60fb9758706e727881',
    'userid': '677821',
    'origin': '44176',
    'source_id': 'fake90e6d701748f08514b01',
    'source': 'fake',
    'pname': 'English translation',
    'pvalue': 'And just this Myrna coffee cup, so as I am a collector like this, I have a lot of these, several. At home, I have a coffee set for twelve people, including the pot. And the cupboards are full. I am... I like these big ones. I have a big dining room and there are these long chests of drawers. They are full of these things that I don’t want to give away, although I don’t use them much, only every now and then, but I love everything that is old.\xa0',
    'context': 'application',
    'datapoints': 0,
    'category': 'interest'},
   {'id': '63738b6eb6bab71ffc679ab5',
    'userid': '677821',
    'origin': 'C370',
    'source_id': 'fake90e6d701748f08514b01',
    'source': 'fake',
    'pname': 'English translation',
    'pvalue': "And then there is this pot. H

In [13]:
with open(FOLDER+C_CONTRIB["ugcfile"], "w") as outfile:
    json.dump(postDict, outfile, indent=2)

In [14]:
# postDict is a dictionary where:
# - Keys are the interaction/contribution types that we are interested in to send to CM
# For example: itRemindsMeOf_text key contains all the contributions with the text of "itRemindsMeOf" (whose pname is itRemindsMeOf.text)
# - Values are dictionaries where keys are userIds and values are a list of contributions for this user of the type described by the key.

# # To POST users to a CM server in localhost:8080 uncomment next lines
# server = "http://localhost:8080"
# contribTypes = ["itRemindsMeOf_text", "itRemindsMeOf_emotions"] 
# for ctype in contribTypes:
#    contribsDict = postDict[cType]
#    for key, value in contribsDict.items():
#        response=requests.post(f'{server}/v1.1/users/{key}/update-generated-content', json = value)
         # Process response value