## Description
This script will convert the raw JSON from the UrbanBelonging app to a CSV file containing aggregated results per photo.
It depends on 3 different JSON files:
- An export of the users, with their corresponding organisation values. 
- An export of the routes walked by the participants
- An export of all the photos and corresponding annotations and reactions. 

If you only want to convert the export of all the photos, you have to remove the part of the main script where we map photos to a specific route. The user orginisation data is only used to create the unique code at the end of this notebook.

In [2]:
import csv
import json
from datetime import datetime

Retrieve users and demographic group from JSON for later use

In [None]:
users = {}

In [3]:
with open("Export_Users-1631800777087.json", "r", encoding='utf8') as read_userfiles:
    userlist = json.load(read_userfiles)
    for user in userlist['result']['users']:
        if 'demographicGroup' in user.keys():
            users[user['id']] = user['demographicGroup']
        else:
            users[user['id']] = None

Retrieve routes from JSON to match with photos datetimes later

In [4]:
routeList = []
with open('Export_Event_Walks-1632149465145_final.json', newline='', encoding='utf8') as routefile:
    routes = json.load(routefile)
    for route in routes['result']['walks']:
        routeList.append({'routeid': route['id'], 'start_time':route['startedAt'], 'end_time': route['endedAt'], 'userId':route['createdBy']})


Open and process event file contain all photos 

In [5]:
with open("../Export_Event-1633420386924.json", "r", encoding='utf-8') as read_file:
    data = json.load(read_file)
    dataList = []
    
    for photo in data['result']['photos']:
        
        #Setup dictionaries for counting annotations and reactions. 
        slider_categories= {
                'slider_pos': 0,
                'slider_amb': 0,
                'slider_neg': 0 }
        
        annotation_categories = {
            "Architecture": 0,
            "Urban Environment": 0,
            "Objects": 0,
            "Signs & symbols": 0,
            "Urban Nature": 0,
            "Wind & Weather": 0,
            "Culture": 0,
            "Consumption": 0,
            "People /community": 0,
            "Infrastructure": 0,
            "Memories & associations": 0,
            "Atmosphere": 0,
            "Don't know": 0,
            "Other": 0,
            "OtherText": ''}
    
        reaction_categories = {
            "Architecture": 0,
            "Urban Environment": 0,
            "Objects": 0,
            "Signs & symbols": 0,
            "Urban Nature": 0,
            "Wind & Weather": 0,
            "Culture": 0,
            "Consumption": 0,
            "People /community": 0,
            "Infrastructure": 0,
            "Memories & associations": 0,
            "Atmosphere": 0,
            "Don't know": 0,
            "Other": 0,
            "OtherText": ''}
        
        pos_reaction_categories = {
            "Architecture": 0,
            "Urban Environment": 0,
            "Objects": 0,
            "Signs & symbols": 0,
            "Urban Nature": 0,
            "Wind & Weather": 0,
            "Culture": 0,
            "Consumption": 0,
            "People /community": 0,
            "Infrastructure": 0,
            "Memories & associations": 0,
            "Atmosphere": 0,
            "Don't know": 0,
            "Other": 0,
            "OtherText": ''}
         
        neg_reaction_categories = {
            "Architecture": 0,
            "Urban Environment": 0,
            "Objects": 0,
            "Signs & symbols": 0,
            "Urban Nature": 0,
            "Wind & Weather": 0,
            "Culture": 0,
            "Consumption": 0,
            "People /community": 0,
            "Infrastructure": 0,
            "Memories & associations": 0,
            "Atmosphere": 0,
            "Don't know": 0,
            "Other": 0,
            "OtherText": ''}
        
        #Create empty dict for each photo and start filling it with relevant info from raw JSON
        photo_data = {}
        photo_data['photo_url_thumb'] = photo['thumbnails'][2]['url']
        photo_data['photo_url_original'] = photo['imageUrl']
        photo_data['userId'] = photo['createdBy']  
        photo_data['photoId'] = photo['id']
        photo_data['createdAt'] = photo['createdAt']
        # !! Remove if no User Data !!
        if users:
            photo_data['userOrg'] = users[photo['createdBy']]
        else:
            photo_data['userOrg'] = None
        
        # Try to map photos to a specific route if photo is taken in the route timeframe by the same user. 
        for route in routeList:
            if route['userId'] == photo['createdBy']:
                startTime = datetime.strptime(route['start_time'], '%Y-%m-%dT%H:%M:%S.%fZ')
                endTime = datetime.strptime(route['end_time'], '%Y-%m-%dT%H:%M:%S.%fZ')
                photoTime = datetime.strptime(photo['createdAt'], '%Y-%m-%dT%H:%M:%S.%fZ')
                if photoTime > startTime and photoTime < endTime:
                    photo_data['routeId'] = route['routeid']
        if 'routeId'  not in photo_data:
            photo_data['routeId'] = None
            
        # sometimes location data is not properly attached and is missing.  
        if photo['locationData'] is not None:
            photo_data['latitude'] = photo['locationData']['latitude']
            photo_data['longitude'] = photo['locationData']['longitude']
        else:
            photo_data['latitude'] = None
            photo_data['longitude'] = None
        
        # Check which annotations are given to it by the user taking the photo. Add texts from 'Other' variable in seperate field.
        if len(photo['annotationAnswers']) != 0:
           
            # Store data from slider
            photo_data['slider_answer'] = photo['annotationAnswers'][0]['sliderAnswer']
            
            # Handle one or more annotation labels
            mp_answer = photo['annotationAnswers'][1]['multipleChoiceAnswer']
            photo_data['annot_answerString'] = ', '.join(mp_answer)
            for answer in mp_answer:
                if answer in annotation_categories:
                    annotation_categories[answer] += 1
                elif answer not in annotation_categories:
                    annotation_categories['Other'] += 1
                    if len(annotation_categories['OtherText']) == 0:
                        annotation_categories['OtherText'] = answer
                    else:
                        annotation_categories['OtherText'] = annotation_categories['OtherText']+ '|'+answer
                    
        else:
            photo_data['slider_answer'] = None
        annotation_categories = {'annot_'+k: v for k, v in annotation_categories.items()}

        # REACTIONS
        sliderTotal = 0
        annotationCount = len(photo['reactions'])
        photo_data['reaction_count'] = annotationCount
        for reaction in photo['reactions']:
            # To better understand which labels are given based in a positive or negative context,
            # based on the slider value, seperate counts are being done:
            #  - Counts of labels based on negative sentiment (slider value => 4)
            #  - Counts of labels based on positive sentiment (slider value <= 2)
            #  - Counts of labels regardsless of sentiment
            
            answers = reaction['answers'][1]['multipleChoiceAnswer']
            sliderAnswer = reaction['answers'][0]['sliderAnswer']
            
            # Positive
            if sliderAnswer >= 4:
                slider_categories['slider_pos'] += 1
                for answer in answers:
                    if answer in reaction_categories:
                        pos_reaction_categories[answer] += 1
                    else:
                        pos_reaction_categories['Other'] += 1
                        if len(pos_reaction_categories['OtherText']) == 0:
                            pos_reaction_categories['OtherText'] = answer
                        else:
                            pos_reaction_categories['OtherText'] = pos_reaction_categories['OtherText']+ '|'+answer
            # Negative
            elif sliderAnswer <= 2:
                slider_categories['slider_neg'] += 1
                for answer in answers:
                    if answer in reaction_categories:
                        neg_reaction_categories[answer] += 1
                    else:
                        neg_reaction_categories['Other'] += 1
                        if len(neg_reaction_categories['OtherText']) == 0:
                            neg_reaction_categories['OtherText'] = answer
                        else:
                            neg_reaction_categories['OtherText'] = neg_reaction_categories['OtherText']+ '|'+answer
            elif sliderAnswer == 3:
                slider_categories['slider_amb'] += 1
            
            sliderTotal += sliderAnswer
            
            # No sentiment (all reactions)
            for answer in answers:
                if answer in reaction_categories:
                    reaction_categories[answer] += 1
                else:
                    print('cant find'+answer)
                    reaction_categories['Other'] += 1
                    if len(reaction_categories['OtherText']) == 0:
                        reaction_categories['OtherText'] = answer
                    else:
                        reaction_categories['OtherText'] = reaction_categories['OtherText']+ '|'+answer
       
        #change the future columns names so we know which category we are looking at.
        reaction_categories = {'react'+k: v for k, v in reaction_categories.items()}
        pos_reaction_categories = {'pos_react'+k: v for k, v in pos_reaction_categories.items()}
        neg_reaction_categories = {'neg_react'+k: v for k, v in neg_reaction_categories.items()}
        
        #calculate the average slider values from all reactions
        if annotationCount != 0:
            sliderAverage = round(sliderTotal/annotationCount, 1)
        else:
            sliderAverage = 0
        photo_data['sliderAverage'] = sliderAverage
        
        # Check whether there is a large difference between the input from the user who created the photo compared to the
        # average of reactions. 
        if annotationCount != 0:
            if photo_data['slider_answer'] != None:
                photo_data['sliderDiff'] = (photo_data['slider_answer']-sliderAverage)*-1
            else:
                photo_data['sliderDiff'] = None
        else:
            photo_data['sliderDiff'] = None
        if photo_data['sliderDiff'] != None:
            if photo_data['sliderDiff'] >=2 or photo_data['sliderDiff'] <= -2:
                photo_data['largeSliderGap'] = True
            else:
                photo_data['largeSliderGap'] = False
        else:
            photo_data['largeSliderGap'] = False
            
        #combine all aggregated data in one dictionary and add to datalist    
        fullDict = {**photo_data, **slider_categories, **annotation_categories, **reaction_categories, **pos_reaction_categories, **neg_reaction_categories}
        dataList.append(fullDict)

cant findMin barndom
cant findObstrucle
cant findInclusion
cant findObstacle
cant findLiv&Død
cant findFredagsbar
cant findHåbefulde studerende
cant findA smile on your lip
cant findSkæbner
cant findNeutral 
cant findNeutral 
cant findNeutral 
cant findNeutral 
cant findNeutral 
cant findKunst
cant findSkæbner
cant findHistorier
cant findHumours
cant findKunst
cant findGammelt er blevet til nyt
cant findSamlingspunkt
cant findOpofrelse
cant findKrig og død
cant findSamarbejde
cant findFamilier i sorg og glæde
cant findRespect
cant findKunst
cant findGod tur til kørestolen
cant findFremtidens ghetto
cant findByggesjusk
cant findEn bænk til at slappe af på 
cant findOp af bakke
cant findUtydeligr
cant findByggesjusk
cant findGrådighed
cant findTømmermandsarkitektur
cant findParkering lag på lag
cant findTransportmiddel
cant findGlæde og sorg
cant findEufori
cant findNationalsymbol
cant findSjæl
cant findHistorier
cant findSammenhold
cant findIkonesk
cant findHensyn
cant findOpsyn
cant fi

# Create Unique code for each photo

In [6]:
from operator import itemgetter
sorted_list = sorted(dataList, key=itemgetter('userOrg', 'userId'))


# Codelist to create unique id for each user to enable link back to specific organisation in later phase (if needed)
codeList = {
        'DDL': 'A',
        'DHF': 'B',
        'ENG': 'C',
        'HUGS': 'D',
        'LGBTQ': 'E',
        'MINO': 'F',
        'SIND':  'G',
        }

previousOrg = sorted_list[0]['userOrg']
previousUser = sorted_list[0]['userId'] 
photoNr = 0
userCode = 1
for item in sorted_list:
    currentOrg = item['userOrg']
    orgCode = codeList[currentOrg]
    currentUser = item['userId']
    if currentOrg == previousOrg:
        if currentUser == previousUser:
            photoNr += 1
            item['uniqueCode'] = orgCode + str(userCode) + '-' + str(photoNr)
        else:
            userCode += 1
            photoNr = 1
            item['uniqueCode'] = orgCode + str(userCode) + '-' + str(photoNr)
    else:
        userCode = 1
        photoNr = 1
        item['uniqueCode'] = orgCode + str(userCode) + '-' + str(photoNr)
    previousUser = currentUser
    previousOrg = currentOrg

# Write result to CSV

In [7]:
with open('dataoutput_test2.csv', 'w', encoding='utf8', newline='') as output_file:
    fc = csv.DictWriter(output_file, 
                        fieldnames=sorted_list[0].keys(),
                       )
    fc.writeheader()
    fc.writerows(sorted_list)