In [154]:
import pandas as pd
import json
from IPython.display import display
from pandas.api.types import is_numeric_dtype

In [155]:
def print_df(df, header = "df"):
    print(header)
    print("Number of elements: " + str(len(df)))
    display(df)
    print("\n")

In [156]:
def columnDistribution(df, column, displayColumns = []):
    displayColumns = displayColumns.copy()
    if column not in displayColumns:
        displayColumns.append(column)
        
    df = df.copy()
        
    # Check for number columns
    if (is_numeric_dtype(df[column])):
        df[column] = df[column].astype(str)
        
    # With value
    rows_df = df.loc[ (df[column].str.len() != 0) & (df[column] != "*") & (df[column] != "") & (df[column].isna() == False) ]
    print_df(rows_df[displayColumns], "Rows with non-empty values in column (" + column + ")")
    
    # Empty value
    emptyRows_df = df.loc[ (df[column].str.len() == 0) | (df[column] == "*") | (df[column] == "") | (df[column].isna()) ]
    print_df(emptyRows_df[displayColumns], "Rows with empty values in column (" + column + ")")
    
    
    # Unique values
    # If value is a [list], explode it first
    explode_rows_df = rows_df.copy()
    if len(rows_df) > 0 and (isinstance(rows_df[column].to_list()[0], list) or isinstance(rows_df[column].to_list()[0], dict)):
        explode_rows_df = explode_rows_df.explode(column)
        
        # Exploded dataframe
        print("Dataframe exploded in column " + "(" + column + ")")
        display(explode_rows_df[displayColumns])
        print("\n")
    
    print("Different values in column " + "(" + column + ")" + ": " + str(len(pd.unique(explode_rows_df[column]))))
    print("\n")
    print(list(pd.unique(explode_rows_df[column])))
    print("\n")
    
    
    
    # Value distribution
    distribution_rows_df = explode_rows_df[column].value_counts()#.reset_index()
    print("Value distribution: ")
    display(distribution_rows_df)
    print("\n")
    
    """
    # Duplicated values
    duplicated_df = explode_rows_df[explode_rows_df.duplicated(subset=[column],keep=False)]
    print("Duplicated values in column " + "(" + column + ")")
    display(duplicated_df)
    """



## DMH - DATA

In [157]:
folder = 'data'
parsedFolder = folder + "/" + 'parsed'

**Artwork data**

Title: 	DMH - Pop_up_VR_Museum - Objects classification - English
- Description: 	Classification of all objects from Design Museum Helsinki's permanent collection that are currently in the Pop-up VR Museum. The information about these objects is stored in this dataset in English.
- UUID: 	0daa0287-d7f4-4f03-a068-95f43afcc347
- URL: https://spice.kmi.open.ac.uk/dataset/details/111
- Number of items: 63

Each json entry includes the following information (they don't use ontologies):
- _id
- Object_name
- Object
- Special name
- Object number
- Designer
- Production_date
- Collection
- Manufacturer
- Dimension in cm: n x m x p format
- Weight in kg
- Material
- Colour
- Timestamp

In [158]:
filename = 'DB111_DMH - Pop_up_VR_Museum - Objects classification - English'
objects_df = pd.read_json(parsedFolder + '/' + filename + '.json')
objects_df

Unnamed: 0,_id,tittle,Object,Special name,id,author,Production date,Collection,Manufacturer,Dimension in cm,...,_timestamp_year,_timestamp_month,_timestamp_day,_timestamp_hour,_timestamp_minute,_timestamp_second,_updated,year,ColourRGB,image
0,634f19096c70ef5cf152f237,Scandia,CUTLERY,*,44171,"Franck, Kaj",1952,KÃ¤yttÃ¶kokoelma,Hackman Sorsakoski,fork: 17.5 cm (length) x 0.15cm (thickness) k...,...,2022,11,16,11,44,16,1.0,1952.0,"[[109, 114, 120]]",
1,634f1908b7693f159a62e2f6,Savonia,CUTLERY,*,44168,"Babel, Adolf",1967,KÃ¤yttÃ¶kokoelma,Hackman Sorsakoski,fork: 19.5 cm (length) x 0.1 cm (thickness) k...,...,2022,11,16,11,43,14,1.0,1967.0,"[[109, 114, 120]]",
2,636bf322a8e46b2e35302848,Ultima Thule 2332,BOWL,fruit or dessert bowl,14219,"Wirkkala, Tapio",1970,*,"A AhlstrÃ¶m Oy, Iitalan lasitehdas",20x20x9,...,2022,11,9,18,36,18,,1970.0,[],https://designmuseum.api.digimuseum.co/wp-cont...
3,634f1907d8af1b50664089d0,*,JUG,teapot,B561,*,19th century,Antell,*,"16,8 x 14 x 15",...,2022,10,18,22,33,54,1.0,,"[[165, 42, 42], [92, 64, 51]]",https://designmuseum.api.digimuseum.co/wp-cont...
4,634f1903a45b33465c10ceb2,Windbreaker,JACKET,Jacket,44166,"Kellokumpu, Ritva",1984,KÃ¤yttÃ¶kokoelma,Torstai,*,...,2022,10,18,22,31,49,1.0,1984.0,"[[255, 165, 0], [0, 128, 0], [0, 0, 255], [255...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,634f18fee09bfd1d9a04b8ec,Paimio / Armchair 41,CHAIR,armchair,9249,"Aalto, Alvar",1932,*,"Artek, Huonekalutehdas Korhonen",60 x 64 x 80,...,2022,10,18,22,22,6,,1932.0,"[[0, 0, 0], [223, 215, 200]]",https://designmuseum.api.digimuseum.co/wp-cont...
60,634f18fed4227a56290cb6c5,Pastille,CHAIR,armchair,44165,"Aarnio, Eero",1968,*,Asko Oy,92 x 92 x 53,...,2022,10,18,22,22,6,,1968.0,"[[255, 255, 0]]",https://designmuseum.api.digimuseum.co/wp-cont...
61,634f18feb7693f159a62e2ef,High chair 616,CHAIR,high chair,9226,"SchultÃ©n, Ben af",1965,*,"Artek Oy, Huonekalutehdas Korhonen Oy ja Norrc...","42,8 x 44 x 77",...,2022,10,18,22,22,6,,1965.0,"[[255, 0, 0], [223, 215, 200]]",https://designmuseum.api.digimuseum.co/wp-cont...
62,634f18fd9d480d2bcc4ac79f,Stool 60,CHAIR,stool,44163,"Aalto, Alvar",1933,KÃ¤yttÃ¶kokoelma,Artek,38 x 38 x 44,...,2022,10,18,22,22,5,,1933.0,"[[223, 215, 200]]",https://designmuseum.api.digimuseum.co/wp-cont...


In [159]:
objects_df.columns

Index(['_id', 'tittle', 'Object', 'Special name', 'id', 'author',
       'Production date', 'Collection', 'Manufacturer', 'Dimension in cm',
       'Weight in kg', 'Materials', 'Colour', '_datasetid', '_timestamp',
       '_timestamp_year', '_timestamp_month', '_timestamp_day',
       '_timestamp_hour', '_timestamp_minute', '_timestamp_second', '_updated',
       'year', 'ColourRGB', 'image'],
      dtype='object')

In [160]:
columnDistribution(objects_df, 'tittle', displayColumns = ['id'])

Rows with non-empty values in column (tittle)
Number of elements: 60


Unnamed: 0,id,tittle
0,44171,Scandia
1,44168,Savonia
2,14219,Ultima Thule 2332
4,44166,Windbreaker
5,16850,Rautaristi (Iron Cross)
6,13906,Valkoiset hevoset (White Horses)
7,17876,Liekki (Flame)
8,32628,NeljÃ¤ vÃ¤riÃ¤ (Four Colours)
9,32627,Seebra (Zebra)
10,16851,Suopursuja (Marsh Tea)




Rows with empty values in column (tittle)
Number of elements: 4


Unnamed: 0,id,tittle
3,B561,*
34,C370,*
55,17248,*
58,16101,*




Different values in column (tittle): 60


['Scandia', 'Savonia', 'Ultima Thule 2332', 'Windbreaker', 'Rautaristi (Iron Cross)', 'Valkoiset hevoset (White Horses)', 'Liekki (Flame)', 'NeljÃ¤ vÃ¤riÃ¤ (Four Colours)', 'Seebra (Zebra)', 'Suopursuja (Marsh Tea)', 'Raitakaita', 'Suo (Marsh)', 'Simpukka (Sea Shell)', 'Flora (model S)', 'Think and drink', 'Warm', 'Teema (model BAU)', 'Ruska (model S)', 'Katrilli', 'Iris (model U2)', 'Myrna (malli AX)', 'Rice porcelain cup (model FK/VA)', 'i-102 (Aroma 2002)', 'I-lasi 2004', 'Ultima Thule (18 cl)', '1718', '2744 cocktail glass', '1610', 'MK', 'BAU, Muumi', '1618', 'Fiskars scissors', 'Pehtoori 2615', 'Subway bench', 'Garbage Bin', 'Jopo', 'Solifer Export', 'Jerry', 'Mobira Talkman 450', 'Asa Futura', 'C 1810 B', 'Planetaariset Laaksot (Planetary Valleys)', 'Bo boo', 'Nokia 9210 Communicator', 'Monrepos, fabric: Keidas', 'Model 4, fabric: Sormus', 'Lappkojs', 'Fasaani 939.015.91 (Pheasant)', 'Jokapoika, fabric: Piccolo', 'Block', 'Viimeinen at

Scandia                                      1
Savonia                                      1
Pehtoori 2615                                1
Subway bench                                 1
Garbage Bin                                  1
Jopo                                         1
Solifer Export                               1
Jerry                                        1
Mobira Talkman 450                           1
Asa Futura                                   1
C 1810 B                                     1
Planetaariset Laaksot (Planetary Valleys)    1
Bo boo                                       1
Nokia 9210 Communicator                      1
Monrepos, fabric: Keidas                     1
Model 4, fabric: Sormus                      1
Lappkojs                                     1
Fasaani 939.015.91 (Pheasant)                1
Jokapoika, fabric: Piccolo                   1
Block                                        1
Viimeinen ateria (The Last Meal)             1
PÃ¤ssi (Ram) 





In [85]:
columnDistribution(objects_df, 'author', displayColumns = ['id', 'tittle'])

Rows with non-empty values in column (author)
Number of elements: 63


Unnamed: 0,id,tittle,author
0,44171,Scandia,"Franck, Kaj"
1,44168,Savonia,"Babel, Adolf"
2,14219,Ultima Thule 2332,"Wirkkala, Tapio"
4,44166,Windbreaker,"Kellokumpu, Ritva"
5,16850,Rautaristi (Iron Cross),"KeinÃ¤nen-Baeckmann, Aino"
...,...,...,...
59,9249,Paimio / Armchair 41,"Aalto, Alvar"
60,44165,Pastille,"Aarnio, Eero"
61,9226,High chair 616,"SchultÃ©n, Ben af"
62,44163,Stool 60,"Aalto, Alvar"




Rows with empty values in column (author)
Number of elements: 1


Unnamed: 0,id,tittle,author
3,B561,*,*




Different values in column (author): 48


['Franck, Kaj', 'Babel, Adolf', 'Wirkkala, Tapio', 'Kellokumpu, Ritva', 'KeinÃ¤nen-Baeckmann, Aino', 'Sotavalta, Impi', 'Gallen-Kallela, Akseli', 'Simberg-EhrstrÃ¶m, Uhra', 'Brummer, Eva', 'Raitio, A. W.', 'LeppÃ¤nen, Helena', 'Puotila, Ritva', 'Metsovaara, Marjatta', 'ProcopÃ©, Ulla  Tomula, Esteri', 'Rintaniemi, PÃ¤ivi', 'AlfstrÃ¶m, Tony,  Keaney, Brian', 'ProcopÃ©, Ulla', 'Tarna, Tauno', 'Finch, Alfred William', 'Osol, Olga', 'Holzer-Kjellberg, Friedl', 'Sarpaneva, Timo', 'Hopea, Saara', 'Jansson, Tove  Franck, Kaj  Slotte-Elevant, Tove (decoration)', 'BÃ¤ckstrÃ¶m, Olof,  LindÃ©n, Olavi', 'Nurmesniemi, Antti', 'Rajalin, BÃ¶rje,  Nurmesniemi, Antti', 'Creadesign Oy,   KÃ¤hÃ¶nen, Hannu', 'Hellman,   Rislakki, Eero (colours)', 'Lindh, Richard', 'Rislakki, Eero', 'Pitkonen, Jorma', 'Saura, Samuli', 'Karjalainen, Heikki,  JÃ¤ntti, Mikko', 'WeckstrÃ¶m, BjÃ¶rn', 'Ratia, Ristomatti (model of the bag),  Wakisaka, Katsuji (print)', 'Johansson, Panu'

Franck, Kaj                                                         6
Sarpaneva, Timo                                                     3
Finch, Alfred William                                               3
Aalto, Alvar                                                        3
Nurmesniemi, Vuokko                                                 2
Hopea, Saara                                                        2
Nurmesniemi, Antti                                                  2
Wirkkala, Tapio                                                     2
Aarikka, Kaija                                                      1
Saura, Samuli                                                       1
Hellman,   Rislakki, Eero (colours)                                 1
Lindh, Richard                                                      1
SchultÃ©n, Ben af                                                   1
Rislakki, Eero                                                      1
Pitkonen, Jorma     





In [86]:
columnDistribution(objects_df, 'Collection', displayColumns = ['id', 'tittle'])

Rows with non-empty values in column (Collection)
Number of elements: 24


Unnamed: 0,id,tittle,Collection
0,44171,Scandia,KÃ¤yttÃ¶kokoelma
1,44168,Savonia,KÃ¤yttÃ¶kokoelma
3,B561,*,Antell
4,44166,Windbreaker,KÃ¤yttÃ¶kokoelma
8,32628,NeljÃ¤ vÃ¤riÃ¤ (Four Colours),OpetusministeriÃ¶n ryijykokoelma
9,32627,Seebra (Zebra),OpetusministeriÃ¶n ryijykokoelma
11,44184,Raitakaita,KÃ¤yttÃ¶kokoelma
12,32361,Suo (Marsh),OpetusministeriÃ¶n ryijykokoelma
14,44181,Flora (model S),KÃ¤yttÃ¶kokoelma
15,44182,Think and drink,KÃ¤yttÃ¶kokoelma




Rows with empty values in column (Collection)
Number of elements: 40


Unnamed: 0,id,tittle,Collection
2,14219,Ultima Thule 2332,*
5,16850,Rautaristi (Iron Cross),*
6,13906,Valkoiset hevoset (White Horses),*
7,17876,Liekki (Flame),*
10,16851,Suopursuja (Marsh Tea),*
13,33288,Simpukka (Sea Shell),*
23,7762,i-102 (Aroma 2002),*
24,ILM7606,I-lasi 2004,*
25,14218,Ultima Thule (18 cl),*
26,10026,1718,*




Different values in column (Collection): 5


['KÃ¤yttÃ¶kokoelma', 'Antell', 'OpetusministeriÃ¶n ryijykokoelma', 'Marimekko', 'Vuokko']


Value distribution: 


KÃ¤yttÃ¶kokoelma                    17
OpetusministeriÃ¶n ryijykokoelma     3
Marimekko                            2
Antell                               1
Vuokko                               1
Name: Collection, dtype: int64





In [87]:
columnDistribution(objects_df, 'Manufacturer', displayColumns = ['id', 'tittle'])

Rows with non-empty values in column (Manufacturer)
Number of elements: 63


Unnamed: 0,id,tittle,Manufacturer
0,44171,Scandia,Hackman Sorsakoski
1,44168,Savonia,Hackman Sorsakoski
2,14219,Ultima Thule 2332,"A AhlstrÃ¶m Oy, Iitalan lasitehdas"
4,44166,Windbreaker,Torstai
5,16850,Rautaristi (Iron Cross),Oy Neovius Ab
...,...,...,...
59,9249,Paimio / Armchair 41,"Artek, Huonekalutehdas Korhonen"
60,44165,Pastille,Asko Oy
61,9226,High chair 616,"Artek Oy, Huonekalutehdas Korhonen Oy ja Norrc..."
62,44163,Stool 60,Artek




Rows with empty values in column (Manufacturer)
Number of elements: 1


Unnamed: 0,id,tittle,Manufacturer
3,B561,*,*




Different values in column (Manufacturer): 47


['Hackman Sorsakoski', 'A AhlstrÃ¶m Oy, Iitalan lasitehdas', 'Torstai', 'Oy Neovius Ab', 'HumalajÃ¤rvi, Kerttu', 'Suomen KÃ¤sityÃ¶n ystÃ¤vÃ¤t /  Asikainen, Ann-Mari ja Lindberg, Merja', 'Suomen KÃ¤sityÃ¶n YstÃ¤vÃ¤t /  Sillfors, Pirkko', 'Suomen KÃ¤sityÃ¶n YstÃ¤vÃ¤t /  Ahlblad, Margit', 'Hackman Designor', 'Suomen KÃ¤sityÃ¶n YstÃ¤vÃ¤t /  Hartikainen, Anneli', 'Oy Finnrya Ab', 'Arabia', 'Amfora', 'Tonfisk', 'Sarvis Oy', 'Ab Iris Oy', 'Iittala', 'A AhlstrÃ¶m Oy, Iittalan lasitehdas', 'NuutajÃ¤rven lasi', 'Fiskars', 'WÃ¤rtsilÃ¤ Oyj Abp, JÃ¤rvenpÃ¤Ã¤n emali', 'W. Rosenlew & Co, Porin konepaja', 'Artekno Oy', 'SÃ¤kkivÃ¤line company', 'Helkama', 'Wilh Bensow Oy', 'HuhtamÃ¤ki Oyj, PyrkijÃ¤ Oy', 'Mobira Oy', 'ASA Radio Oy Turku', 'Fiskars Oyj Abp', 'Lapponia Jewelry Oy', 'Marimekko', 'Nokia Oyj', 'Vuokko Oy', 'Kotilieden Aitta', 'Marimekko Oyj', 'Design House Stockholm', 'Arabia / Bryk, Rut', 'Aarikka Oy', 'Arabia / Salmenhaara, Kyllikki', 'Lilj

Arabia                                                                   7
NuutajÃ¤rven lasi                                                        5
Ab Iris Oy                                                               3
Hackman Sorsakoski                                                       2
Oy Neovius Ab                                                            2
Marimekko                                                                2
Iittala                                                                  2
Marimekko Oyj                                                            1
ASA Radio Oy Turku                                                       1
Fiskars Oyj Abp                                                          1
Lapponia Jewelry Oy                                                      1
Nokia Oyj                                                                1
Vuokko Oy                                                                1
Kotilieden Aitta         





In [88]:
columnDistribution(objects_df, 'Materials', displayColumns = ['id', 'tittle'])

Rows with non-empty values in column (Materials)
Number of elements: 64


Unnamed: 0,id,tittle,Materials
0,44171,Scandia,[metal]
1,44168,Savonia,[metal]
2,14219,Ultima Thule 2332,[glass]
3,B561,*,[stoneware]
4,44166,Windbreaker,[polyesterivanu]
...,...,...,...
59,9249,Paimio / Armchair 41,[birch]
60,44165,Pastille,[fiberglass]
61,9226,High chair 616,"[solidbirch, plywood]"
62,44163,Stool 60,[fabric]




Rows with empty values in column (Materials)
Number of elements: 0


Unnamed: 0,id,tittle,Materials




Dataframe exploded in column (Materials)


Unnamed: 0,id,tittle,Materials
0,44171,Scandia,metal
1,44168,Savonia,metal
2,14219,Ultima Thule 2332,glass
3,B561,*,stoneware
4,44166,Windbreaker,polyesterivanu
...,...,...,...
60,44165,Pastille,fiberglass
61,9226,High chair 616,solidbirch
61,9226,High chair 616,plywood
62,44163,Stool 60,fabric




Different values in column (Materials): 30


['metal', 'glass', 'stoneware', 'polyesterivanu', 'wool', 'cotton', 'porcelain', 'ceramics', 'wood', 'plastic', 'enamel', 'melamine', 'castiron', 'teak', 'fiberglass', 'leather', 'reinforcedplastic', 'castaluminium', 'silver', 'marquis', 'printedcotton', 'faience', 'pine', 'redclay', 'birchplywood', 'pinelaminate(seat)', 'birch', 'solidbirch', 'plywood', 'fabric']


Value distribution: 


glass                 11
metal                  9
wool                   9
ceramics               8
cotton                 7
plastic                6
stoneware              3
fiberglass             2
pine                   2
porcelain              2
wood                   2
redclay                2
teak                   2
birchplywood           1
solidbirch             1
pinelaminate(seat)     1
birch                  1
printedcotton          1
plywood                1
faience                1
leather                1
marquis                1
silver                 1
castaluminium          1
reinforcedplastic      1
castiron               1
melamine               1
enamel                 1
polyesterivanu         1
fabric                 1
Name: Materials, dtype: int64





In [89]:
columnDistribution(objects_df, 'Colour', displayColumns = ['id', 'tittle'])

Rows with non-empty values in column (Colour)
Number of elements: 64


Unnamed: 0,id,tittle,Colour
0,44171,Scandia,[metal]
1,44168,Savonia,[metal]
2,14219,Ultima Thule 2332,[colourless glass]
3,B561,*,"[light brown, dark brown]"
4,44166,Windbreaker,"[orange, green, blue, pink]"
...,...,...,...
59,9249,Paimio / Armchair 41,"[black, birch]"
60,44165,Pastille,[yellow]
61,9226,High chair 616,"[red, birch]"
62,44163,Stool 60,[birch]




Rows with empty values in column (Colour)
Number of elements: 0


Unnamed: 0,id,tittle,Colour




Dataframe exploded in column (Colour)


Unnamed: 0,id,tittle,Colour
0,44171,Scandia,metal
1,44168,Savonia,metal
2,14219,Ultima Thule 2332,colourless glass
3,B561,*,light brown
3,B561,*,dark brown
...,...,...,...
61,9226,High chair 616,red
61,9226,High chair 616,birch
62,44163,Stool 60,birch
63,44164,Pirkka,pine




Different values in column (Colour): 44


['metal', 'colourless glass', 'light brown', 'dark brown', 'orange', ' green', ' blue', ' pink', 'violet', ' yellowish', 'white', ' red', ' brown', '?', 'black', ' white', 'brown', ' light pastel shade', 'blue', 'different tones of red brown', 'red', ' aniline', ' yellow', 'yellow', ' wood', ' gold', 'green gray', ' violet', 'rubin red', ' purple', 'gray', ' black', 'clear', 'green', 'silver', ' orange', 'wood', 'blue tones', ' several colours', ' blue-green', 'rio brown', ' birch', 'birch', 'pine']


Value distribution: 


 white                          10
red                              7
white                            6
brown                            6
black                            5
blue                             5
 black                           5
 green                           5
 red                             4
orange                           4
yellow                           4
metal                            3
wood                             3
 blue                            3
 brown                           3
colourless glass                 3
 violet                          2
green                            2
silver                           2
 birch                           2
gray                             2
?                                2
birch                            1
 orange                          1
 blue-green                      1
blue tones                       1
clear                            1
 several colours                 1
rio brown           





In [90]:
columnDistribution(objects_df, 'image', displayColumns = ['id', 'tittle'])

Rows with non-empty values in column (image)
Number of elements: 42


Unnamed: 0,id,tittle,image
2,14219,Ultima Thule 2332,https://designmuseum.api.digimuseum.co/wp-cont...
3,B561,*,https://designmuseum.api.digimuseum.co/wp-cont...
20,44175,Iris (model U2),https://designmuseum.api.digimuseum.co/wp-cont...
23,7762,i-102 (Aroma 2002),https://designmuseum.api.digimuseum.co/wp-cont...
24,ILM7606,I-lasi 2004,https://designmuseum.api.digimuseum.co/wp-cont...
25,14218,Ultima Thule (18 cl),https://designmuseum.api.digimuseum.co/wp-cont...
26,10026,1718,https://designmuseum.api.digimuseum.co/wp-cont...
27,12595,2744 cocktail glass,https://designmuseum.api.digimuseum.co/wp-cont...
28,22769,1610,https://designmuseum.api.digimuseum.co/wp-cont...
29,AM1699,MK,https://designmuseum.api.digimuseum.co/wp-cont...




Rows with empty values in column (image)
Number of elements: 22


Unnamed: 0,id,tittle,image
0,44171,Scandia,
1,44168,Savonia,
4,44166,Windbreaker,
5,16850,Rautaristi (Iron Cross),
6,13906,Valkoiset hevoset (White Horses),
7,17876,Liekki (Flame),
8,32628,NeljÃ¤ vÃ¤riÃ¤ (Four Colours),
9,32627,Seebra (Zebra),
10,16851,Suopursuja (Marsh Tea),
11,44184,Raitakaita,




Different values in column (image): 39


['https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-017298_wirkkala_ultima_thule-2000x1621.jpg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/10/wsi-imageoptim-b561-2000x1500.jpg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/08/wsi-imageoptim-b548-2000x2665.jpg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-025419-2000x1333.jpg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-dmpm_20150103_2785-2000x3000.jpg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-025608__franck_p-1-1327x1823.jpg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-franck-1610-lh-1667x2379.jpeg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/05/kermakko-mk-am1699-320x320.jpg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/09/wsi

https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-017298_wirkkala_ultima_thule-2000x1621.jpg                                                  2
https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-025419-2000x1333.jpg                                                                        2
https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-dmpm_20150103_2785-2000x3000.jpg                                                            2
https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/04/wsi-imageoptim-b548-2000x2665.jpg                                                                          1
https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/03/wsi-imageoptim-22320118103827303-320x495.jpg                                                               1
https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/09/wsi-imageoptim-38518-899x1299.jpg                          





# DMH Stories (audio)

In [91]:
filename = 'DMH_127-AudioRecorded-DEGARI'
audioStoryDegari_df = pd.read_json(folder + '/' + filename + '.json')
audioStoryDegari_df

Unnamed: 0,0,_id,_datasetid,_timestamp,_timestamp_year,_timestamp_month,_timestamp_day,_timestamp_hour,_timestamp_minute,_timestamp_second,_updated
0,"{'_id': '63738b60fb9758706e727881', 'Object na...",63738b60fb9758706e727881,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101413,2022,11,22,7,16,53,True
1,"{'_id': '63738b5b28b056289c6b6f10', 'Object na...",63738b5b28b056289c6b6f10,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101413,2022,11,22,7,16,53,True
2,"{'_id': '63738b65461cf10d8073bc8e', 'Object na...",63738b65461cf10d8073bc8e,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101413,2022,11,22,7,16,53,True
3,"{'_id': '63738b634d6d36651d600e14', 'Object na...",63738b634d6d36651d600e14,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101413,2022,11,22,7,16,53,True
4,"{'_id': '63738b79461cf10d8073bc9a', 'Object na...",63738b79461cf10d8073bc9a,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101412,2022,11,22,7,16,52,True
...,...,...,...,...,...,...,...,...,...,...,...
117,"{'_id': '63738b64b6bab71ffc679aaf', 'Object na...",63738b64b6bab71ffc679aaf,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101390,2022,11,22,7,16,30,True
118,"{'_id': '63738b6bb6bab71ffc679ab3', 'Object na...",63738b6bb6bab71ffc679ab3,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101389,2022,11,22,7,16,29,True
119,"{'_id': '63738b6f619d8873905ef709', 'Object na...",63738b6f619d8873905ef709,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101389,2022,11,22,7,16,29,True
120,"{'_id': '63738b55619d8873905ef6f8', 'Object na...",63738b55619d8873905ef6f8,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101389,2022,11,22,7,16,29,True


In [92]:
filename = 'DB134_DMH - Pop-up_VR_Museum - Semantic Annotation for Transcribed Audio-recorded Stories'
audioStoryEmotions_df = pd.read_json(folder + '/' + filename + '.json')
audioStoryEmotions_df

Unnamed: 0,_id,Object name,Object number,Date of recording,Context/event/workshop,Contributor name,User ID,End-user community,Story ID(#),Original language,...,_timestamp_month,_timestamp_day,_timestamp_hour,_timestamp_minute,_timestamp_second,emotions,sentiment,toxicity,entities,_updated
0,63738b53b6bab71ffc679aa4,Canister - Jerry,32030,15/12/2021,"Workshop for a group, at Design Museum",Päivi Castren-Kortekangas,789838,Senior citizens,#32030S3,Finnish,...,12,2,14,50,32,"{'Sadness': 2, 'Interest': 3, 'Joy': 1, 'Anger...",{'Positive': 0.33333333333333304},[],"{'dbr:Screw': {'@types': [''], 'confidence': 0...",
1,63738b52ec6863140217f367,Bicycle - Jopo,11780,15/03/2022,"Workshop for a group, at Design Museum",Eeva-Kaarina Öyry,751188,Senior citizens,#751188S1,Finnish,...,12,2,14,50,32,"{'Joy': 2, 'Interest': 1, 'Anticipation': 1, '...",{'Positive': 1},[],{'dbr:Bicycle': {'@types': ['http://dbpedia.or...,
2,63738b53fb9758706e727878,Canister - Jerry,32030,22/03/2022,Laajasalo,Maria Levander,789889,Senior citizens,#32030S1,Finnish,...,12,2,14,50,31,{'Interest': 1},{'Positive': 1},[],{'dbr:Gasoline': {'@types': ['http://dbpedia.o...,
3,63738b53f3d1b15c7d3cd1f0,Canister - Jerry,32030,15/12/2021,"Workshop for a group, at Design Museum",Päivi Castren-Kortekangas,789838,Senior citizens,#32030S2,Finnish,...,12,2,14,50,31,"{'Joy': 1, 'Disapproval': 1, 'Interest': 2, 'S...",{'Positive': 0.6000000000000001},[],{'dbr:Fishing_net': {'@types': ['http://www.w3...,
4,63738b5578ff61337567e273,Ceramic tile - Viimeinen Ateria,35971,21/09/2022,"Workshop for a group, at Design Museum",Saifuddin,119809,Finnish language students,#35971S2,Finnish,...,12,2,14,50,30,"{'Interest': 2, 'Joy': 1}",{'Positive': 1},[],[],
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,63738b7aec6863140217f380,Vase - Aalto vase,41793,17/03/2022,Lahti,Terttu Valonen,677821,Senior citizens,#41793S6,Finnish,...,12,2,14,49,49,"{'Fear': 1, 'Joy': 2, 'Interest': 2, 'Love': 1}",{'Positive': 0.7142857142857141},[],{'dbr:Flea': {'@types': ['http://dbpedia.org/o...,
117,63738b5c78ff61337567e278,Coffee pot - Pehtoori,8182,15/3/2022,"Workshop for a group, at Design Museum",Anonymous,923304,Senior citizens,#8182S7,Finnish,...,12,2,14,49,48,{'Interest': 2},{'Positive': 1},[],"{'dbr:Coffeemaker': {'@types': [''], 'confiden...",1.0
118,63738b5c78232564042adf4b,Coffee pot - Pehtoori,8182,15/3/2022,"Workshop for a group, at Design Museum",Anonymous,501851,Senior citizens,#8182S6,Finnish,...,12,2,14,49,48,{'Interest': 1},{'Positive': 1},[],"{'dbr:Coffeemaker': {'@types': [''], 'confiden...",1.0
119,63738b7bfb9758706e727891,*,B561,21/09/2022,"Workshop for a group, at Design Museum",Hamed Malek Rashed,317892,Finnish language students,#B561S1,English,...,12,2,14,49,48,"{'Anticipation': 1, 'Interest': 1}",{'Positive': 1},[],{'dbr:Teapot': {'@types': ['http://www.w3.org/...,


In [93]:
filename = 'DB132_DMH - Pop-up_VR_Museum - Values and Emotions for Transcribed Audio-recorded Stories'
audioStoryValues_df = pd.read_json(folder + '/' + filename + '.json')
audioStoryValues_df

Unnamed: 0,_id,Object name,Object number,Date of recording,Context/event/workshop,Contributor name,User ID,End-user community,Story ID(#),Original language,...,_timestamp_year,_timestamp_month,_timestamp_day,_timestamp_hour,_timestamp_minute,_timestamp_second,MFT_Values,EkmanEmotions,Original language.1,_updated
0,63738b53b6bab71ffc679aa4,Canister - Jerry,32030,15/12/2021,"Workshop for a group, at Design Museum",Päivi Castren-Kortekangas,789838,Senior citizens,#32030S3,Finnish,...,2022,12,1,21,22,58,[],[],,
1,63738b52ec6863140217f367,Bicycle - Jopo,11780,15/03/2022,"Workshop for a group, at Design Museum",Eeva-Kaarina Öyry,751188,Senior citizens,#751188S1,Finnish,...,2022,12,1,21,22,58,[],[enjoyment],,
2,63738b54461cf10d8073bc83,Canister – Jerry,32030,27/09/2022,Design Evening,Lauri,213442,General audience,#32030S5,Finnish,...,2022,12,1,21,22,57,[],[],,
3,63738b53fb9758706e727878,Canister - Jerry,32030,22/03/2022,Laajasalo,Maria Levander,789889,Senior citizens,#32030S1,Finnish,...,2022,12,1,21,22,57,[fairness],[],,
4,63738b53f3d1b15c7d3cd1f0,Canister - Jerry,32030,15/12/2021,"Workshop for a group, at Design Museum",Päivi Castren-Kortekangas,789838,Senior citizens,#32030S2,Finnish,...,2022,12,1,21,22,57,"[harm, fairness]",[],,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,63738b7aec6863140217f380,Vase - Aalto vase,41793,17/03/2022,Lahti,Terttu Valonen,677821,Senior citizens,#41793S6,Finnish,...,2022,12,1,21,22,17,[care],[],,
118,63738b5c78232564042adf4b,Coffee pot - Pehtoori,8182,15/3/2022,"Workshop for a group, at Design Museum",Anonymous,501851,Senior citizens,#8182S6,Finnish,...,2022,12,1,21,22,16,[harm],[],,1.0
119,63738b7bfb9758706e727891,*,B561,21/09/2022,"Workshop for a group, at Design Museum",Hamed Malek Rashed,317892,Finnish language students,#B561S1,English,...,2022,12,1,21,22,16,[harm],[],,
120,63738b5b619d8873905ef6fd,Coffee pot - Pehtoori,8182,15/3/2022,"Workshop for a group, at Design Museum",Anonymous,501851,Senior citizens,#8182S5,Finnish,...,2022,12,1,21,22,15,[care],[],,1.0


In [94]:
# Missing story in audioStoryEmotions
df = audioStoryValues_df[['_id', 'Story ID(#)']]
df2 = audioStoryEmotions_df[['_id', 'Story ID(#)']]

audioStoryMerged_df = df.merge(df2.drop_duplicates(), on=['_id'], 
                   how='left', indicator=True)
missing_df = audioStoryMerged_df.loc[ audioStoryMerged_df['_merge'] == 'left_only' ]
missing_df

Unnamed: 0,_id,Story ID(#)_x,Story ID(#)_y,_merge
45,63738b62619d8873905ef701,#44175S1,,left_only


In [95]:
audioStoryValues_df.columns

Index(['_id', 'Object name', 'Object number', 'Date of recording',
       'Context/event/workshop', 'Contributor name', 'User ID',
       'End-user community', 'Story ID(#)', 'Original language',
       'Audio recording file name', 'Start timestamp', 'End timestamp',
       'Finnish translation', 'English translation', 'Swedish translation',
       'Other language translation', 'Additional info', '_datasetid',
       '_timestamp', '_timestamp_year', '_timestamp_month', '_timestamp_day',
       '_timestamp_hour', '_timestamp_minute', '_timestamp_second',
       'MFT_Values', 'EkmanEmotions', 'Original language ', '_updated'],
      dtype='object')

**Column distribution (shared columns)**

In [96]:
columnDistribution(audioStoryValues_df, 'Object name', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (Object name)
Number of elements: 121


Unnamed: 0,_id,Story ID(#),Object name
0,63738b53b6bab71ffc679aa4,#32030S3,Canister - Jerry
1,63738b52ec6863140217f367,#751188S1,Bicycle - Jopo
2,63738b54461cf10d8073bc83,#32030S5,Canister – Jerry
3,63738b53fb9758706e727878,#32030S1,Canister - Jerry
4,63738b53f3d1b15c7d3cd1f0,#32030S2,Canister - Jerry
...,...,...,...
116,63738b7a4d6d36651d600e22,#41793S5,Vase - Aalto vase
117,63738b7aec6863140217f380,#41793S6,Vase - Aalto vase
118,63738b5c78232564042adf4b,#8182S6,Coffee pot - Pehtoori
120,63738b5b619d8873905ef6fd,#8182S5,Coffee pot - Pehtoori




Rows with empty values in column (Object name)
Number of elements: 1


Unnamed: 0,_id,Story ID(#),Object name
119,63738b7bfb9758706e727891,#B561S1,*




Different values in column (Object name): 22


['Canister - Jerry', 'Bicycle - Jopo', 'Canister – Jerry', 'Ceramic tile - Viimeinen Ateria', 'Chair - Pastille', 'Coffee pot - Pehtoori', 'Chair: Pirkka', 'Cup - Myrna', 'Cutlery - Savonia', 'Decorative object - Pässi (Ram)', 'Cup: Iris', 'High chair 616', 'Moped - Solifer export', 'Mug - Moomin mug', 'Pot - Timo Sarpaneva', 'Rug - Suo', 'Scissors - Fiskars', 'Shirt - Jokapoika', 'Smart phone: Nokia 9210 Communicator', 'Stool - Stool 60', 'Stool - Sauna Stool', 'Vase - Aalto vase']


Value distribution: 


Rug - Suo                               16
Decorative object - Pässi (Ram)         16
Cup - Myrna                             15
Chair - Pastille                        12
Moped - Solifer export                  10
Coffee pot - Pehtoori                    9
Vase - Aalto vase                        6
Stool - Stool 60                         5
Shirt - Jokapoika                        4
Scissors - Fiskars                       4
Canister - Jerry                         4
Cutlery - Savonia                        4
Mug - Moomin mug                         3
Pot - Timo Sarpaneva                     3
Ceramic tile - Viimeinen Ateria          2
Canister – Jerry                         2
Bicycle - Jopo                           1
Cup: Iris                                1
Chair: Pirkka                            1
Smart phone: Nokia 9210 Communicator     1
Stool - Sauna Stool                      1
High chair 616                           1
Name: Object name, dtype: int64





In [97]:
columnDistribution(audioStoryValues_df, 'Object number', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (Object number)
Number of elements: 122


Unnamed: 0,_id,Story ID(#),Object number
0,63738b53b6bab71ffc679aa4,#32030S3,32030
1,63738b52ec6863140217f367,#751188S1,11780
2,63738b54461cf10d8073bc83,#32030S5,32030
3,63738b53fb9758706e727878,#32030S1,32030
4,63738b53f3d1b15c7d3cd1f0,#32030S2,32030
...,...,...,...
117,63738b7aec6863140217f380,#41793S6,41793
118,63738b5c78232564042adf4b,#8182S6,8182
119,63738b7bfb9758706e727891,#B561S1,B561
120,63738b5b619d8873905ef6fd,#8182S5,8182




Rows with empty values in column (Object number)
Number of elements: 0


Unnamed: 0,_id,Story ID(#),Object number




Different values in column (Object number): 22


['32030', '11780', '35971', '44165', '8182', '44164', '44176', '44168', '41768', '44175', '9226', '32271', '44185', 'C370', '32361', '44174', '41821', '41377', '44163', '9321', '41793', 'B561']


Value distribution: 


41768    16
32361    16
44176    15
44165    12
32271    10
8182      9
41793     6
32030     6
44163     5
44168     4
44174     4
41821     4
44185     3
C370      3
35971     2
44175     1
9226      1
11780     1
44164     1
41377     1
9321      1
B561      1
Name: Object number, dtype: int64





In [98]:
columnDistribution(audioStoryValues_df, 'Contributor name', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (Contributor name)
Number of elements: 122


Unnamed: 0,_id,Story ID(#),Contributor name
0,63738b53b6bab71ffc679aa4,#32030S3,Päivi Castren-Kortekangas
1,63738b52ec6863140217f367,#751188S1,Eeva-Kaarina Öyry
2,63738b54461cf10d8073bc83,#32030S5,Lauri
3,63738b53fb9758706e727878,#32030S1,Maria Levander
4,63738b53f3d1b15c7d3cd1f0,#32030S2,Päivi Castren-Kortekangas
...,...,...,...
117,63738b7aec6863140217f380,#41793S6,Terttu Valonen
118,63738b5c78232564042adf4b,#8182S6,Anonymous
119,63738b7bfb9758706e727891,#B561S1,Hamed Malek Rashed
120,63738b5b619d8873905ef6fd,#8182S5,Anonymous




Rows with empty values in column (Contributor name)
Number of elements: 0


Unnamed: 0,_id,Story ID(#),Contributor name




Different values in column (Contributor name): 31


['Päivi Castren-Kortekangas', 'Eeva-Kaarina Öyry', 'Lauri', 'Maria Levander', 'Saifuddin', 'Anonymous', 'Mirja. J', 'Olli', 'Riitta Katriina Louhensalo', 'Ana Rodrigues', 'Anita Salmela', 'Elmeri Härkönen', 'Leena Lappalainen', 'Terttu Valonen', 'Marja-Liisa', 'Tuuli Tiihonen', 'Fish', 'Pauliina Härkönen', 'Sofi', 'Helena', 'Veikko Luoti', 'Kaarina Oinonen', 'Kristiina Haara', 'Leena Riitinen', 'Marke Kaskimies', 'Pia Hintikka', 'Sanjeewa Pawantha', 'Anneli Luoti', 'Kirsti L', 'Liisa Kemppi', 'Hamed Malek Rashed']


Value distribution: 


Anonymous                     18
Sofi                          11
Riitta Katriina Louhensalo     9
Eeva-Kaarina Öyry              8
Leena Lappalainen              7
Terttu Valonen                 7
Ana Rodrigues                  6
Marke Kaskimies                5
Fish                           5
Leena Riitinen                 4
Olli                           4
Päivi Castren-Kortekangas      3
Kirsti L                       3
Kristiina Haara                3
Helena                         3
Tuuli Tiihonen                 3
Mirja. J                       3
Marja-Liisa                    2
Anita Salmela                  2
Saifuddin                      2
Maria Levander                 2
Pia Hintikka                   2
Lauri                          2
Pauliina Härkönen              1
Elmeri Härkönen                1
Veikko Luoti                   1
Kaarina Oinonen                1
Sanjeewa Pawantha              1
Anneli Luoti                   1
Liisa Kemppi                   1
Hamed Male





In [99]:
columnDistribution(audioStoryValues_df, 'User ID', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (User ID)
Number of elements: 122


Unnamed: 0,_id,Story ID(#),User ID
0,63738b53b6bab71ffc679aa4,#32030S3,789838
1,63738b52ec6863140217f367,#751188S1,751188
2,63738b54461cf10d8073bc83,#32030S5,213442
3,63738b53fb9758706e727878,#32030S1,789889
4,63738b53f3d1b15c7d3cd1f0,#32030S2,789838
...,...,...,...
117,63738b7aec6863140217f380,#41793S6,677821
118,63738b5c78232564042adf4b,#8182S6,501851
119,63738b7bfb9758706e727891,#B561S1,317892
120,63738b5b619d8873905ef6fd,#8182S5,501851




Rows with empty values in column (User ID)
Number of elements: 0


Unnamed: 0,_id,Story ID(#),User ID




Different values in column (User ID): 41


['789838', '751188', '213442', '789889', '119809', '315375', '234555', '845666', '921848', '344521', '788016', '111222', '147199', '924889', '331897', '594472', '677821', '434969', '626889', '388721', '185280', '254668', '451211', '512341', '964084', '436574', '501851', '766245', '523452', '139120', '766242', '652225', '981129', '133333', '221852', '631141', '111333', '561211', '608474', '317892', '923304']


Value distribution: 


512341    11
788016     9
751188     8
677821     7
594472     7
111222     6
501851     5
254668     5
652225     5
344521     4
766242     4
561211     3
139120     3
436574     3
388721     3
789838     3
845666     3
981129     2
234555     2
626889     2
921848     2
119809     2
789889     2
213442     2
147199     2
315375     2
608474     1
111333     1
631141     1
317892     1
221852     1
133333     1
185280     1
523452     1
766245     1
964084     1
451211     1
434969     1
331897     1
924889     1
923304     1
Name: User ID, dtype: int64





In [100]:
columnDistribution(audioStoryValues_df, 'End-user community', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (End-user community)
Number of elements: 122


Unnamed: 0,_id,Story ID(#),End-user community
0,63738b53b6bab71ffc679aa4,#32030S3,Senior citizens
1,63738b52ec6863140217f367,#751188S1,Senior citizens
2,63738b54461cf10d8073bc83,#32030S5,General audience
3,63738b53fb9758706e727878,#32030S1,Senior citizens
4,63738b53f3d1b15c7d3cd1f0,#32030S2,Senior citizens
...,...,...,...
117,63738b7aec6863140217f380,#41793S6,Senior citizens
118,63738b5c78232564042adf4b,#8182S6,Senior citizens
119,63738b7bfb9758706e727891,#B561S1,Finnish language students
120,63738b5b619d8873905ef6fd,#8182S5,Senior citizens




Rows with empty values in column (End-user community)
Number of elements: 0


Unnamed: 0,_id,Story ID(#),End-user community




Different values in column (End-user community): 4


['Senior citizens', 'General audience', 'Finnish language students', 'Asylum seekers']


Value distribution: 


Senior citizens              87
General audience             20
Asylum seekers               12
Finnish language students     3
Name: End-user community, dtype: int64





In [101]:
columnDistribution(audioStoryValues_df, 'Original language', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (Original language)
Number of elements: 118


Unnamed: 0,_id,Story ID(#),Original language
0,63738b53b6bab71ffc679aa4,#32030S3,Finnish
1,63738b52ec6863140217f367,#751188S1,Finnish
2,63738b54461cf10d8073bc83,#32030S5,Finnish
3,63738b53fb9758706e727878,#32030S1,Finnish
4,63738b53f3d1b15c7d3cd1f0,#32030S2,Finnish
...,...,...,...
117,63738b7aec6863140217f380,#41793S6,Finnish
118,63738b5c78232564042adf4b,#8182S6,Finnish
119,63738b7bfb9758706e727891,#B561S1,English
120,63738b5b619d8873905ef6fd,#8182S5,Finnish




Rows with empty values in column (Original language)
Number of elements: 4


Unnamed: 0,_id,Story ID(#),Original language
9,63738b56fb9758706e72787b,#44165S3,
20,63738b5978ff61337567e276,#44165S10,
83,63738b6f461cf10d8073bc94,#32361S2,
86,63738b704d6d36651d600e1c,#32361S6,




Different values in column (Original language): 2


['Finnish', 'English']


Value distribution: 


Finnish    99
English    19
Name: Original language, dtype: int64





**Column distribution (extra columns)**

In [102]:
columnDistribution(audioStoryValues_df, 'MFT_Values', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (MFT_Values)
Number of elements: 60


Unnamed: 0,_id,Story ID(#),MFT_Values
3,63738b53fb9758706e727878,#32030S1,[fairness]
4,63738b53f3d1b15c7d3cd1f0,#32030S2,"[harm, fairness]"
5,63738b5578232564042adf46,#35971S1,"[liberty, care]"
6,63738b5578ff61337567e273,#35971S2,[loyalty]
8,63738b56ec6863140217f36a,#44165S2,[harm]
11,63738b57b6bab71ffc679aa7,#44165S5,[liberty]
16,63738b57f3d1b15c7d3cd1f3,#44165S4,[care]
17,63738b59ec6863140217f36c,#44165S12,[loyalty]
22,63738b5b461cf10d8073bc88,#8182S4,[fairness]
24,63738b5af3d1b15c7d3cd1f5,#8182S1,"[harm, fairness]"




Rows with empty values in column (MFT_Values)
Number of elements: 62


Unnamed: 0,_id,Story ID(#),MFT_Values
0,63738b53b6bab71ffc679aa4,#32030S3,[]
1,63738b52ec6863140217f367,#751188S1,[]
2,63738b54461cf10d8073bc83,#32030S5,[]
7,63738b5428b056289c6b6f0b,#32030S4,[]
9,63738b56fb9758706e72787b,#44165S3,[]
...,...,...,...
107,63738b7828b056289c6b6f22,#44163S5,[]
108,63738b774d6d36651d600e20,#9321S1,[]
113,63738b79619d8873905ef70f,#41793S2,[]
116,63738b7a4d6d36651d600e22,#41793S5,[]




Dataframe exploded in column (MFT_Values)


Unnamed: 0,_id,Story ID(#),MFT_Values
3,63738b53fb9758706e727878,#32030S1,fairness
4,63738b53f3d1b15c7d3cd1f0,#32030S2,harm
4,63738b53f3d1b15c7d3cd1f0,#32030S2,fairness
5,63738b5578232564042adf46,#35971S1,liberty
5,63738b5578232564042adf46,#35971S1,care
...,...,...,...
115,63738b7a78ff61337567e28a,#41793S4,fairness
117,63738b7aec6863140217f380,#41793S6,care
118,63738b5c78232564042adf4b,#8182S6,harm
119,63738b7bfb9758706e727891,#B561S1,harm




Different values in column (MFT_Values): 6


['fairness', 'harm', 'liberty', 'care', 'loyalty', 'sanctity']


Value distribution: 


care        27
harm        22
loyalty     19
fairness    18
liberty      2
sanctity     1
Name: MFT_Values, dtype: int64





In [103]:
columnDistribution(audioStoryValues_df, 'EkmanEmotions', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (EkmanEmotions)
Number of elements: 23


Unnamed: 0,_id,Story ID(#),EkmanEmotions
1,63738b52ec6863140217f367,#751188S1,[enjoyment]
5,63738b5578232564042adf46,#35971S1,"[surprise, enjoyment, fear, sadness]"
8,63738b56ec6863140217f36a,#44165S2,[anger]
14,63738b58619d8873905ef6fb,#44165S8,[anger]
15,63738b5878232564042adf49,#44165S9,[enjoyment]
18,63738b58461cf10d8073bc86,#44165S7,[enjoyment]
30,63738b5dfb9758706e72787f,#44176S1,[fear]
33,63738b5e28b056289c6b6f12,#44176S4,[enjoyment]
38,63738b604d6d36651d600e12,#44176S9,[enjoyment]
43,63738b63fb9758706e727883,#41768S1,[enjoyment]




Rows with empty values in column (EkmanEmotions)
Number of elements: 99


Unnamed: 0,_id,Story ID(#),EkmanEmotions
0,63738b53b6bab71ffc679aa4,#32030S3,[]
2,63738b54461cf10d8073bc83,#32030S5,[]
3,63738b53fb9758706e727878,#32030S1,[]
4,63738b53f3d1b15c7d3cd1f0,#32030S2,[]
6,63738b5578ff61337567e273,#35971S2,[]
...,...,...,...
116,63738b7a4d6d36651d600e22,#41793S5,[]
117,63738b7aec6863140217f380,#41793S6,[]
118,63738b5c78232564042adf4b,#8182S6,[]
119,63738b7bfb9758706e727891,#B561S1,[]




Dataframe exploded in column (EkmanEmotions)


Unnamed: 0,_id,Story ID(#),EkmanEmotions
1,63738b52ec6863140217f367,#751188S1,enjoyment
5,63738b5578232564042adf46,#35971S1,surprise
5,63738b5578232564042adf46,#35971S1,enjoyment
5,63738b5578232564042adf46,#35971S1,fear
5,63738b5578232564042adf46,#35971S1,sadness
8,63738b56ec6863140217f36a,#44165S2,anger
14,63738b58619d8873905ef6fb,#44165S8,anger
15,63738b5878232564042adf49,#44165S9,enjoyment
18,63738b58461cf10d8073bc86,#44165S7,enjoyment
30,63738b5dfb9758706e72787f,#44176S1,fear




Different values in column (EkmanEmotions): 6


['enjoyment', 'surprise', 'fear', 'sadness', 'anger', 'disgust']


Value distribution: 


enjoyment    12
disgust       6
fear          3
sadness       3
surprise      2
anger         2
Name: EkmanEmotions, dtype: int64





In [104]:
columnDistribution(audioStoryEmotions_df, 'emotions', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (emotions)
Number of elements: 109


Unnamed: 0,_id,Story ID(#),emotions
0,63738b53b6bab71ffc679aa4,#32030S3,"{'Sadness': 2, 'Interest': 3, 'Joy': 1, 'Anger..."
1,63738b52ec6863140217f367,#751188S1,"{'Joy': 2, 'Interest': 1, 'Anticipation': 1, '..."
2,63738b53fb9758706e727878,#32030S1,{'Interest': 1}
3,63738b53f3d1b15c7d3cd1f0,#32030S2,"{'Joy': 1, 'Disapproval': 1, 'Interest': 2, 'S..."
4,63738b5578ff61337567e273,#35971S2,"{'Interest': 2, 'Joy': 1}"
...,...,...,...
116,63738b7aec6863140217f380,#41793S6,"{'Fear': 1, 'Joy': 2, 'Interest': 2, 'Love': 1}"
117,63738b5c78ff61337567e278,#8182S7,{'Interest': 2}
118,63738b5c78232564042adf4b,#8182S6,{'Interest': 1}
119,63738b7bfb9758706e727891,#B561S1,"{'Anticipation': 1, 'Interest': 1}"




Rows with empty values in column (emotions)
Number of elements: 12


Unnamed: 0,_id,Story ID(#),emotions
15,63738b58619d8873905ef6fb,#44165S8,[]
33,63738b5f619d8873905ef6ff,#44176S6,[]
40,63738b61b6bab71ffc679aad,#44176S13,[]
41,63738b6128b056289c6b6f14,#44176S14,[]
53,63738b65619d8873905ef703,#41768S6,[]
56,63738b66ec6863140217f374,#41768S10,[]
59,63738b67f3d1b15c7d3cd1fd,#41768S12,[]
64,63738b69619d8873905ef705,#41768S16,[]
73,63738b6c78232564042adf55,#32271S10,[]
91,63738b7228b056289c6b6f1e,#32361S11,[]




Dataframe exploded in column (emotions)


Unnamed: 0,_id,Story ID(#),emotions
0,63738b53b6bab71ffc679aa4,#32030S3,Sadness
0,63738b53b6bab71ffc679aa4,#32030S3,Interest
0,63738b53b6bab71ffc679aa4,#32030S3,Joy
0,63738b53b6bab71ffc679aa4,#32030S3,Anger
1,63738b52ec6863140217f367,#751188S1,Joy
...,...,...,...
117,63738b5c78ff61337567e278,#8182S7,Interest
118,63738b5c78232564042adf4b,#8182S6,Interest
119,63738b7bfb9758706e727891,#B561S1,Anticipation
119,63738b7bfb9758706e727891,#B561S1,Interest




Different values in column (emotions): 13


['Sadness', 'Interest', 'Joy', 'Anger', 'Anticipation', 'Trust', 'Disapproval', 'Surprise', 'Love', 'Serenity', 'Fear', 'Disgust', 'False']


Value distribution: 


Interest        73
Joy             40
Anticipation    40
Trust           35
Surprise        35
Sadness         22
Anger           16
Fear            15
Love            12
Disgust          8
Disapproval      6
Serenity         2
False            1
Name: emotions, dtype: int64





In [105]:
columnDistribution(audioStoryEmotions_df, 'sentiment', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (sentiment)
Number of elements: 112


Unnamed: 0,_id,Story ID(#),sentiment
0,63738b53b6bab71ffc679aa4,#32030S3,{'Positive': 0.33333333333333304}
1,63738b52ec6863140217f367,#751188S1,{'Positive': 1}
2,63738b53fb9758706e727878,#32030S1,{'Positive': 1}
3,63738b53f3d1b15c7d3cd1f0,#32030S2,{'Positive': 0.6000000000000001}
4,63738b5578ff61337567e273,#35971S2,{'Positive': 1}
...,...,...,...
116,63738b7aec6863140217f380,#41793S6,{'Positive': 0.7142857142857141}
117,63738b5c78ff61337567e278,#8182S7,{'Positive': 1}
118,63738b5c78232564042adf4b,#8182S6,{'Positive': 1}
119,63738b7bfb9758706e727891,#B561S1,{'Positive': 1}




Rows with empty values in column (sentiment)
Number of elements: 9


Unnamed: 0,_id,Story ID(#),sentiment
40,63738b61b6bab71ffc679aad,#44176S13,[]
41,63738b6128b056289c6b6f14,#44176S14,[]
51,63738b64b6bab71ffc679aaf,#41768S3,[]
53,63738b65619d8873905ef703,#41768S6,[]
56,63738b66ec6863140217f374,#41768S10,[]
59,63738b67f3d1b15c7d3cd1fd,#41768S12,[]
73,63738b6c78232564042adf55,#32271S10,[]
90,63738b72b6bab71ffc679ab7,#32361S10,[]
102,63738b7678ff61337567e288,#41377S1,[]




Dataframe exploded in column (sentiment)


Unnamed: 0,_id,Story ID(#),sentiment
0,63738b53b6bab71ffc679aa4,#32030S3,Positive
1,63738b52ec6863140217f367,#751188S1,Positive
2,63738b53fb9758706e727878,#32030S1,Positive
3,63738b53f3d1b15c7d3cd1f0,#32030S2,Positive
4,63738b5578ff61337567e273,#35971S2,Positive
...,...,...,...
116,63738b7aec6863140217f380,#41793S6,Positive
117,63738b5c78ff61337567e278,#8182S7,Positive
118,63738b5c78232564042adf4b,#8182S6,Positive
119,63738b7bfb9758706e727891,#B561S1,Positive




Different values in column (sentiment): 3


['Positive', 'Neutral', 'Negative']


Value distribution: 


Positive    86
Neutral     13
Negative    13
Name: sentiment, dtype: int64





# DMH Stories (written)

In [106]:
filename = 'DB133_DMH - Pop-up_VR_Museum - Semantic Annotation for Written Stories'
writtenStoryEmotions_df = pd.read_json(folder + '/' + filename + '.json')
writtenStoryEmotions_df

Unnamed: 0,_id,Object name,Object number,Date of recording,Context/event/workshop,Contributor name,User ID,End-user community,Comment ID (#),Original language,...,_timestamp_year,_timestamp_month,_timestamp_day,_timestamp_hour,_timestamp_minute,_timestamp_second,emotions,sentiment,toxicity,entities
0,636c1448b4028608e54d73d6,Armchair - Paimio armchair 41,9249,30/08/2022,Design Evening,Anonymous,683378,General audience,#9249C4,English,...,2022,12,2,14,47,39,[],{'Positive': 0.7006779},[],[]
1,636c14475d9061195e24abf3,Armchair - Paimio armchair 41,9249,30/08/2022,Design Evening,Anonymous,171980,General audience,#9249C1,English,...,2022,12,2,14,47,39,[],[],[],[]
2,636c14499b33d34fc4602627,Armchair - Paimio armchair 41,9249,30/08/2022,Design Evening,Anonymous,906115,General audience,#9249C6,Finnish,...,2022,12,2,14,47,38,{'Interest': 1},{'Positive': 1.82837474},[],[]
3,636c1448566dcc2dea6212aa,Armchair - Paimio armchair 41,9249,30/08/2022,Design Evening,Anonymous,717077,General audience,#9249C2,English,...,2022,12,2,14,47,38,{'Fear': 1},{'Negative': 1},[],[]
4,636c1448f5d07205d51f45c4,Armchair - Paimio armchair 41,9249,30/08/2022,Design Evening,Anonymous,623637,General audience,#9249C3,English,...,2022,12,2,14,47,38,[],{'Positive': 1.59080726},[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
398,636c14d2e6c018559c4a401d,Windbreaker - Torstai,44166,19/01/2022,Online workshop,Leena Wilkman,336160,Senior citizens,#44166C1,Finnish,...,2022,12,2,14,45,27,[],[],[],{'dbr:Estonians': {'@types': ['http://dbpedia....
399,636c14d39b33d34fc4602677,Windbreaker - Torstai,44166,17/03/2022,Lahti,Riitta Katriina Louhensalo,788016,Senior citizens,#44166C5,Finnish,...,2022,12,2,14,45,26,{'Sadness': 2},{'Negative': 1},[],{'dbr:Finns': {'@types': ['http://dbpedia.org/...
400,636c14d33f72b3416c26d33d,Windbreaker - Torstai,44166,22/03/2022,Laajasalo,Anonymous,601587,Senior citizens,#44166C6,Finnish,...,2022,12,2,14,45,26,[],[],[],[]
401,636c14d3b4028608e54d7427,Windbreaker - Torstai,44166,15/03/2022,"Workshop for a group, at Design Museum",Eeva-Kaarina Öyry,751188,Senior citizens,#44166C4,Finnish,...,2022,12,2,14,45,25,[],{'Negative': 1},[],{'dbr:Windbreaker': {'@types': ['http://dbpedi...


In [107]:
filename = 'DB131_DMH - Pop-up_VR_Museum - Values and Emotions for Written Stories'
writtenStoryValues_df = pd.read_json(folder + '/' + filename + '.json')
writtenStoryValues_df

Unnamed: 0,_id,Object name,Object number,Date of recording,Context/event/workshop,Contributor name,User ID,End-user community,Comment ID (#),Original language,...,_datasetid,_timestamp,_timestamp_year,_timestamp_month,_timestamp_day,_timestamp_hour,_timestamp_minute,_timestamp_second,MFT_Values,EkmanEmotions
0,636c14475d9061195e24abf3,Armchair - Paimio armchair 41,9249,30/08/2022,Design Evening,Anonymous,171980,General audience,#9249C1,English,...,5f9f3f35-f36a-4e99-8180-f33988182bc7,1669805793,2022,11,30,10,56,33,[],[]
1,636c1448566dcc2dea6212aa,Armchair - Paimio armchair 41,9249,30/08/2022,Design Evening,Anonymous,717077,General audience,#9249C2,English,...,5f9f3f35-f36a-4e99-8180-f33988182bc7,1669805792,2022,11,30,10,56,32,[],[]
2,636c1448f5d07205d51f45c4,Armchair - Paimio armchair 41,9249,30/08/2022,Design Evening,Anonymous,623637,General audience,#9249C3,English,...,5f9f3f35-f36a-4e99-8180-f33988182bc7,1669805792,2022,11,30,10,56,32,[],[]
3,636c1448b4028608e54d73d6,Armchair - Paimio armchair 41,9249,30/08/2022,Design Evening,Anonymous,683378,General audience,#9249C4,English,...,5f9f3f35-f36a-4e99-8180-f33988182bc7,1669805792,2022,11,30,10,56,32,[],[enjoyment]
4,636c144a0e2530450149d6bd,Armchair - Paimio armchair 41,9249,27/09/2022,Design Evening,Anonymous,321106,General audience,#9249C9,Finnish,...,5f9f3f35-f36a-4e99-8180-f33988182bc7,1669805791,2022,11,30,10,56,31,[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401,636c14d39b33d34fc4602677,Windbreaker - Torstai,44166,17/03/2022,Lahti,Riitta Katriina Louhensalo,788016,Senior citizens,#44166C5,Finnish,...,5f9f3f35-f36a-4e99-8180-f33988182bc7,1669805661,2022,11,30,10,54,21,[],[]
402,636c14d33f72b3416c26d33d,Windbreaker - Torstai,44166,22/03/2022,Laajasalo,Anonymous,601587,Senior citizens,#44166C6,Finnish,...,5f9f3f35-f36a-4e99-8180-f33988182bc7,1669805661,2022,11,30,10,54,21,[],[]
403,636c14d2e6c018559c4a401d,Windbreaker - Torstai,44166,19/01/2022,Online workshop,Leena Wilkman,336160,Senior citizens,#44166C1,Finnish,...,5f9f3f35-f36a-4e99-8180-f33988182bc7,1669805661,2022,11,30,10,54,21,[],[]
404,636c14d40282857a3c150481,Windbreaker - Torstai,44166,22/03/2022,Laajasalo,Anonymous,838079,Senior citizens,#44166C7,Finnish,...,5f9f3f35-f36a-4e99-8180-f33988182bc7,1669805660,2022,11,30,10,54,20,[],[sadness]


In [108]:
# Missing story in writtenStoryEmotions
df = writtenStoryValues_df[['_id']]
df2 = writtenStoryEmotions_df[['_id']]

audioStoryMerged_df = df.merge(df2.drop_duplicates(), on=['_id'], 
                   how='left', indicator=True)
missing_df = audioStoryMerged_df.loc[ audioStoryMerged_df['_merge'] == 'left_only' ]
missing_df

Unnamed: 0,_id,_merge
98,636c14690e2530450149d6cf,left_only
265,636c14a40282857a3c150465,left_only
292,636c14ad9742aa76343842e5,left_only


In [109]:
writtenStoryValues_df.columns

Index(['_id', 'Object name', 'Object number', 'Date of recording',
       'Context/event/workshop', 'Contributor name', 'User ID',
       'End-user community', 'Comment ID (#)', 'Original language',
       'Finnish translation', 'English translation', 'Swedish translation',
       'Other language translation', 'Additional info', '_datasetid',
       '_timestamp', '_timestamp_year', '_timestamp_month', '_timestamp_day',
       '_timestamp_hour', '_timestamp_minute', '_timestamp_second',
       'MFT_Values', 'EkmanEmotions'],
      dtype='object')

**Column distribution (shared columns)**

In [110]:
displayColumns = ['_id', 'Comment ID (#)']

In [111]:
columnDistribution(writtenStoryValues_df, 'Object name', displayColumns = displayColumns)

Rows with non-empty values in column (Object name)
Number of elements: 406


Unnamed: 0,_id,Comment ID (#),Object name
0,636c14475d9061195e24abf3,#9249C1,Armchair - Paimio armchair 41
1,636c1448566dcc2dea6212aa,#9249C2,Armchair - Paimio armchair 41
2,636c1448f5d07205d51f45c4,#9249C3,Armchair - Paimio armchair 41
3,636c1448b4028608e54d73d6,#9249C4,Armchair - Paimio armchair 41
4,636c144a0e2530450149d6bd,#9249C9,Armchair - Paimio armchair 41
...,...,...,...
401,636c14d39b33d34fc4602677,#44166C5,Windbreaker - Torstai
402,636c14d33f72b3416c26d33d,#44166C6,Windbreaker - Torstai
403,636c14d2e6c018559c4a401d,#44166C1,Windbreaker - Torstai
404,636c14d40282857a3c150481,#44166C7,Windbreaker - Torstai




Rows with empty values in column (Object name)
Number of elements: 0


Unnamed: 0,_id,Comment ID (#),Object name




Different values in column (Object name): 77


['Armchair - Paimio armchair 41', 'Bag - Booboo', 'Aromaglass: i-102 (Aroma 2002)', 'Bag – Boboo', 'Bicycle - Jopo', 'Canister - Jerry', 'Ceramic tile - Viimeinen Ateria', 'Chair - Pastille', 'Chair: Pirkka', 'Cocktail glass - 2744', 'Cocktail glass – 2744', 'Coffee pot - Pehtoori', 'Creamer - MK', 'Creamer – MK', 'Cup - Myrna', 'Cup: Flora', 'Cup: Iris', 'Cup: Ruska', 'Cutlery - Savonia', 'Cup: Raitakaita', 'Cup: Think and Drink', 'Cutlery: Scandia', 'Decorative object - Pässi (Ram)', 'Doll – Lappkojs', 'Doll - Lappkojs', 'Dress – Model 4', 'Dress: Monrepos (fabric: Keidas)', 'Dress - Model 4', 'Drinking glass: 1718', 'Drinking glass: Ultima Thule', 'Garbage bin - City of Helsinki', 'Glass sculpture - Fasaani', 'Glass sculpture – Fasaani', 'High chair – High chair 616', 'Jug - 1610', 'Jug – 1610', 'Jug – 1611', 'Jug – 1612', 'Jug – 1613', 'Juicer - C1810 B', 'Juicer – C 1810 B', 'Lamp - Block', 'Lamp – Block', 'Maternity package', 'Mope

Subway bench                        22
Necklace – Planetaariset laaksot    15
Scissors - Fiskars                  15
Decorative object - Pässi (Ram)     14
Vase - Aalto vase                   14
                                    ..
Cup: Flora                           1
Jug – 1612                           1
Jug – 1611                           1
Cup: Raitakaita                      1
Jug – 1613                           1
Name: Object name, Length: 77, dtype: int64





In [112]:
columnDistribution(writtenStoryValues_df, 'Object number', displayColumns = displayColumns)

Rows with non-empty values in column (Object number)
Number of elements: 406


Unnamed: 0,_id,Comment ID (#),Object number
0,636c14475d9061195e24abf3,#9249C1,9249
1,636c1448566dcc2dea6212aa,#9249C2,9249
2,636c1448f5d07205d51f45c4,#9249C3,9249
3,636c1448b4028608e54d73d6,#9249C4,9249
4,636c144a0e2530450149d6bd,#9249C9,9249
...,...,...,...
401,636c14d39b33d34fc4602677,#44166C5,44166
402,636c14d33f72b3416c26d33d,#44166C6,44166
403,636c14d2e6c018559c4a401d,#44166C1,44166
404,636c14d40282857a3c150481,#44166C7,44166




Rows with empty values in column (Object number)
Number of elements: 0


Unnamed: 0,_id,Comment ID (#),Object number




Different values in column (Object number): 60


['9249', '41581', '7762', '11780', '32030', '35971', '44165', '44164', '12595', '8182', 'AM1699', '44176', '44181', '44175', '44179', '44168', '44184', '44182', '44171', '41768', '38518', '27303', '30074', '10026', '14218', '44167', '13081', '9226', '16101', '22769', '13154', '38467', '40881', '32271', '44185', '44178', '38267', '41379', 'C370', '32361', '33288', '17876', '32628', '16850', '16851', '13906', '44174', '32627', '41821', '18903', '41377', '9321', '44163', '41816', 'B561', '20260', '41793', '17248', 'B548', '44166']


Value distribution: 


41816     22
38267     19
12595     16
13081     16
44174     15
41768     14
41793     14
9249      13
38467     13
9321      12
13154     12
44165     12
32271     11
17248     11
16101     11
20260     10
11780     10
AM1699    10
44167     10
41581      9
9226       9
41821      9
C370       8
22769      8
44168      8
32361      7
8182       7
B561       7
44166      7
35971      6
32030      6
44163      5
44176      5
27303      5
38518      4
44178      3
B548       3
44175      3
44185      3
16851      2
44179      2
41377      2
18903      2
13906      2
40881      2
17876      2
10026      2
14218      2
41379      2
44164      2
30074      2
16850      1
32628      1
32627      1
33288      1
44171      1
44182      1
44184      1
7762       1
44181      1
Name: Object number, dtype: int64





In [113]:
columnDistribution(writtenStoryValues_df, 'Contributor name', displayColumns = displayColumns)

Rows with non-empty values in column (Contributor name)
Number of elements: 406


Unnamed: 0,_id,Comment ID (#),Contributor name
0,636c14475d9061195e24abf3,#9249C1,Anonymous
1,636c1448566dcc2dea6212aa,#9249C2,Anonymous
2,636c1448f5d07205d51f45c4,#9249C3,Anonymous
3,636c1448b4028608e54d73d6,#9249C4,Anonymous
4,636c144a0e2530450149d6bd,#9249C9,Anonymous
...,...,...,...
401,636c14d39b33d34fc4602677,#44166C5,Riitta Katriina Louhensalo
402,636c14d33f72b3416c26d33d,#44166C6,Anonymous
403,636c14d2e6c018559c4a401d,#44166C1,Leena Wilkman
404,636c14d40282857a3c150481,#44166C7,Anonymous




Rows with empty values in column (Contributor name)
Number of elements: 0


Unnamed: 0,_id,Comment ID (#),Contributor name




Different values in column (Contributor name): 27


['Anonymous', 'Eeva-Kaarina Öyry', 'Marke Kaskimies', 'Liisa Nummela', 'Päivi Castrén-Kortekangas', 'Halira', 'Olli', 'Anneli Hietanen', 'Leila Mustanoja-Syysmeri', 'A. R.', 'Lea Lifflander', 'Leena Lappalainen', 'Olli Jaakkola', 'Seija Järvinen', 'Paivi Castren-Kortekangas', 'Saleh', 'Päivi Castren-Kortekangas', 'Anita Salmela', 'Soili Viitaniemi', 'Leea Riittinen', 'Kristiina Haara', 'Leena Wilkman', 'Liisa Kemppi', 'Khallat', 'AnnaLiisa Joukainen', 'Fardoso', 'Riitta Katriina Louhensalo']


Value distribution: 


Anonymous                     366
Leena Wilkman                   4
Eeva-Kaarina Öyry               4
Olli                            3
Leena Lappalainen               3
Fardoso                         2
Liisa Nummela                   2
Halira                          2
Anneli Hietanen                 2
Anita Salmela                   1
AnnaLiisa Joukainen             1
Khallat                         1
Liisa Kemppi                    1
Kristiina Haara                 1
Leea Riittinen                  1
Soili Viitaniemi                1
Seija Järvinen                  1
Päivi Castren-Kortekangas       1
Saleh                           1
Paivi Castren-Kortekangas       1
Olli Jaakkola                   1
Lea Lifflander                  1
A. R.                           1
Leila Mustanoja-Syysmeri        1
Päivi Castrén-Kortekangas       1
Marke Kaskimies                 1
Riitta Katriina Louhensalo      1
Name: Contributor name, dtype: int64





In [114]:
columnDistribution(writtenStoryValues_df, 'User ID', displayColumns = displayColumns)

Rows with non-empty values in column (User ID)
Number of elements: 406


Unnamed: 0,_id,Comment ID (#),User ID
0,636c14475d9061195e24abf3,#9249C1,171980
1,636c1448566dcc2dea6212aa,#9249C2,717077
2,636c1448f5d07205d51f45c4,#9249C3,623637
3,636c1448b4028608e54d73d6,#9249C4,683378
4,636c144a0e2530450149d6bd,#9249C9,321106
...,...,...,...
401,636c14d39b33d34fc4602677,#44166C5,788016
402,636c14d33f72b3416c26d33d,#44166C6,601587
403,636c14d2e6c018559c4a401d,#44166C1,336160
404,636c14d40282857a3c150481,#44166C7,838079




Rows with empty values in column (User ID)
Number of elements: 0


Unnamed: 0,_id,Comment ID (#),User ID




Different values in column (User ID): 389


['171980', '717077', '623637', '683378', '321106', '854873', '906115', '143940', '747249', '810077', '860129', '499916', '135729', '304144', '494929', '763909', '869688', '635522', '546155', '691509', '687460', '867192', '648008', '315375', '889840', '536507', '733830', '467736', '751188', '652225', '480163', '363051', '994463', '315367', '859853', '923304', '902419', '830662', '420687', '501851', '615657', '810394', '492976', '789838', '760671', '666698', '519875', '446753', '327926', '266015', '958841', '702839', '657293', '337348', '137027', '742369', '227606', '646909', '557386', '906289', '784682', '180106', '807976', '430645', '570664', '478101', '386405', '697639', '896756', '371912', '547625', '898143', '275365', '624865', '373908', '164176', '426054', '719025', '956636', '656728', '796892', '741370', '435167', '646613', '354956', '827861', '163866', '142112', '734629', '602028', '146872', '434969', '253996', '647464', '594472', '20

751188    4
336160    4
446753    4
594472    3
792314    2
         ..
211097    1
488702    1
856559    1
437385    1
838079    1
Name: User ID, Length: 389, dtype: int64





In [115]:
columnDistribution(writtenStoryValues_df, 'End-user community', displayColumns = displayColumns)

Rows with non-empty values in column (End-user community)
Number of elements: 406


Unnamed: 0,_id,Comment ID (#),End-user community
0,636c14475d9061195e24abf3,#9249C1,General audience
1,636c1448566dcc2dea6212aa,#9249C2,General audience
2,636c1448f5d07205d51f45c4,#9249C3,General audience
3,636c1448b4028608e54d73d6,#9249C4,General audience
4,636c144a0e2530450149d6bd,#9249C9,General audience
...,...,...,...
401,636c14d39b33d34fc4602677,#44166C5,Senior citizens
402,636c14d33f72b3416c26d33d,#44166C6,Senior citizens
403,636c14d2e6c018559c4a401d,#44166C1,Senior citizens
404,636c14d40282857a3c150481,#44166C7,Senior citizens




Rows with empty values in column (End-user community)
Number of elements: 0


Unnamed: 0,_id,Comment ID (#),End-user community




Different values in column (End-user community): 4


['General audience', 'Senior citizens', 'Asylum seekers', 'Finnish language students']


Value distribution: 


General audience             213
Senior citizens              149
Finnish language students     28
Asylum seekers                16
Name: End-user community, dtype: int64





In [116]:
columnDistribution(writtenStoryValues_df, 'Original language', displayColumns = displayColumns)

Rows with non-empty values in column (Original language)
Number of elements: 406


Unnamed: 0,_id,Comment ID (#),Original language
0,636c14475d9061195e24abf3,#9249C1,English
1,636c1448566dcc2dea6212aa,#9249C2,English
2,636c1448f5d07205d51f45c4,#9249C3,English
3,636c1448b4028608e54d73d6,#9249C4,English
4,636c144a0e2530450149d6bd,#9249C9,Finnish
...,...,...,...
401,636c14d39b33d34fc4602677,#44166C5,Finnish
402,636c14d33f72b3416c26d33d,#44166C6,Finnish
403,636c14d2e6c018559c4a401d,#44166C1,Finnish
404,636c14d40282857a3c150481,#44166C7,Finnish




Rows with empty values in column (Original language)
Number of elements: 0


Unnamed: 0,_id,Comment ID (#),Original language




Different values in column (Original language): 6


['English', 'Finnish', 'Danish', 'German', 'Italian?', 'Spanish']


Value distribution: 


Finnish     289
English     113
Danish        1
German        1
Italian?      1
Spanish       1
Name: Original language, dtype: int64





**Column distribution (extra columns)**

In [117]:
columnDistribution(writtenStoryValues_df, 'MFT_Values', displayColumns = displayColumns)

Rows with non-empty values in column (MFT_Values)
Number of elements: 66


Unnamed: 0,_id,Comment ID (#),MFT_Values
6,636c14499b33d34fc4602627,#9249C6,[care]
8,636c144a3f72b3416c26d2ed,#9249C7,[liberty]
10,636c144c6b245e0b9926dbb9,#41581C1,[care]
19,636c144ff5d07205d51f45c8,#41581C9,[care]
25,636c14510282857a3c150435,#11780C5,[care]
...,...,...,...
381,636c14cc9b33d34fc4602673,#17248C5,[care]
394,636c14cff5d07205d51f4612,#16101C3,[fairness]
395,636c14d122d37d085f130362,#17248C10,[care]
396,636c14d15d9061195e24ac43,#17248C11,[care]




Rows with empty values in column (MFT_Values)
Number of elements: 340


Unnamed: 0,_id,Comment ID (#),MFT_Values
0,636c14475d9061195e24abf3,#9249C1,[]
1,636c1448566dcc2dea6212aa,#9249C2,[]
2,636c1448f5d07205d51f45c4,#9249C3,[]
3,636c1448b4028608e54d73d6,#9249C4,[]
4,636c144a0e2530450149d6bd,#9249C9,[]
...,...,...,...
401,636c14d39b33d34fc4602677,#44166C5,[]
402,636c14d33f72b3416c26d33d,#44166C6,[]
403,636c14d2e6c018559c4a401d,#44166C1,[]
404,636c14d40282857a3c150481,#44166C7,[]




Dataframe exploded in column (MFT_Values)


Unnamed: 0,_id,Comment ID (#),MFT_Values
6,636c14499b33d34fc4602627,#9249C6,care
8,636c144a3f72b3416c26d2ed,#9249C7,liberty
10,636c144c6b245e0b9926dbb9,#41581C1,care
19,636c144ff5d07205d51f45c8,#41581C9,care
25,636c14510282857a3c150435,#11780C5,care
...,...,...,...
381,636c14cc9b33d34fc4602673,#17248C5,care
394,636c14cff5d07205d51f4612,#16101C3,fairness
395,636c14d122d37d085f130362,#17248C10,care
396,636c14d15d9061195e24ac43,#17248C11,care




Different values in column (MFT_Values): 9


['care', 'liberty', 'care, harm', 'loyalty', 'sanctity', 'harm', 'oppression', 'care, loyalty', 'fairness']


Value distribution: 


care             30
harm             16
loyalty           6
care, harm        5
liberty           3
sanctity          2
oppression        2
care, loyalty     1
fairness          1
Name: MFT_Values, dtype: int64





In [118]:
columnDistribution(writtenStoryValues_df, 'EkmanEmotions', displayColumns = displayColumns)

Rows with non-empty values in column (EkmanEmotions)
Number of elements: 28


Unnamed: 0,_id,Comment ID (#),EkmanEmotions
3,636c1448b4028608e54d73d6,#9249C4,[enjoyment]
36,636c1453f5d07205d51f45ca,#11780C10,[enjoyment]
52,636c145b0282857a3c15043b,#44164C1,[sadness]
55,636c145a6b245e0b9926dbc1,#44165C10,[fear]
57,636c145b3f72b3416c26d2f7,#44165C12,[disgust]
66,636c145e6b245e0b9926dbc3,#12595C6,[enjoyment]
69,636c145f0282857a3c15043d,#12595C9,[enjoyment]
85,636c1464f5d07205d51f45d4,#AM1699C1,[disgust]
106,636c146c9b33d34fc460263b,#44182C1,[enjoyment]
128,636c14740e2530450149d6d5,#44176C26,[surprise]




Rows with empty values in column (EkmanEmotions)
Number of elements: 378


Unnamed: 0,_id,Comment ID (#),EkmanEmotions
0,636c14475d9061195e24abf3,#9249C1,[]
1,636c1448566dcc2dea6212aa,#9249C2,[]
2,636c1448f5d07205d51f45c4,#9249C3,[]
4,636c144a0e2530450149d6bd,#9249C9,[]
5,636c14496b245e0b9926dbb7,#9249C5,[]
...,...,...,...
399,636c14d29742aa76343842fb,#44166C3,[]
400,636c14d10e2530450149d70b,#17248C9,[]
401,636c14d39b33d34fc4602677,#44166C5,[]
402,636c14d33f72b3416c26d33d,#44166C6,[]




Dataframe exploded in column (EkmanEmotions)


Unnamed: 0,_id,Comment ID (#),EkmanEmotions
3,636c1448b4028608e54d73d6,#9249C4,enjoyment
36,636c1453f5d07205d51f45ca,#11780C10,enjoyment
52,636c145b0282857a3c15043b,#44164C1,sadness
55,636c145a6b245e0b9926dbc1,#44165C10,fear
57,636c145b3f72b3416c26d2f7,#44165C12,disgust
66,636c145e6b245e0b9926dbc3,#12595C6,enjoyment
69,636c145f0282857a3c15043d,#12595C9,enjoyment
85,636c1464f5d07205d51f45d4,#AM1699C1,disgust
106,636c146c9b33d34fc460263b,#44182C1,enjoyment
128,636c14740e2530450149d6d5,#44176C26,surprise




Different values in column (EkmanEmotions): 8


['enjoyment', 'sadness', 'fear', 'disgust', 'surprise', 'surprise, anger', 'enjoyment, disgust', 'anger']


Value distribution: 


enjoyment             12
disgust                5
sadness                3
fear                   3
enjoyment, disgust     2
surprise               1
surprise, anger        1
anger                  1
Name: EkmanEmotions, dtype: int64





In [162]:
columnDistribution(writtenStoryEmotions_df, 'Object number', displayColumns = displayColumns)

Rows with non-empty values in column (Object number)
Number of elements: 403


Unnamed: 0,_id,User ID,Object number
0,636c1448b4028608e54d73d6,683378,9249
1,636c14475d9061195e24abf3,171980,9249
2,636c14499b33d34fc4602627,906115,9249
3,636c1448566dcc2dea6212aa,717077,9249
4,636c1448f5d07205d51f45c4,623637,9249
...,...,...,...
398,636c14d2e6c018559c4a401d,336160,44166
399,636c14d39b33d34fc4602677,788016,44166
400,636c14d33f72b3416c26d33d,601587,44166
401,636c14d3b4028608e54d7427,751188,44166




Rows with empty values in column (Object number)
Number of elements: 0


Unnamed: 0,_id,User ID,Object number




Different values in column (Object number): 60


['9249', '41581', '7762', '11780', '32030', '35971', '44165', '44164', '12595', '8182', 'AM1699', '44176', '44175', '44181', '44184', '44179', '44168', '44182', '41768', '44171', '38518', '27303', '30074', '10026', '14218', '44167', '13081', '9226', '16101', '22769', '13154', '38467', '40881', '32271', '44185', '44178', '38267', '41379', 'C370', '32361', '16850', '33288', '17876', '16851', '32628', '44174', '32627', '13906', '41821', '18903', '41377', '9321', '44163', '41816', 'B561', '20260', '41793', '17248', 'B548', '44166']


Value distribution: 


41816     22
38267     19
12595     16
13081     16
41768     14
41793     14
44174     14
9249      13
38467     13
9321      12
13154     12
44165     12
32271     11
17248     11
16101     11
20260     10
11780     10
AM1699    10
44167     10
41581      9
9226       9
41821      9
C370       8
44168      8
22769      8
8182       7
B561       7
44166      7
35971      6
32361      6
32030      6
44163      5
44176      5
27303      5
38518      4
44178      3
B548       3
44185      3
30074      2
44179      2
41377      2
18903      2
13906      2
40881      2
16851      2
17876      2
10026      2
14218      2
41379      2
44164      2
44175      2
32628      1
33288      1
16850      1
44171      1
44182      1
44184      1
7762       1
44181      1
32627      1
Name: Object number, dtype: int64





In [161]:
columnDistribution(writtenStoryEmotions_df, 'User ID', displayColumns = displayColumns)

Rows with non-empty values in column (User ID)
Number of elements: 403


Unnamed: 0,_id,User ID
0,636c1448b4028608e54d73d6,683378
1,636c14475d9061195e24abf3,171980
2,636c14499b33d34fc4602627,906115
3,636c1448566dcc2dea6212aa,717077
4,636c1448f5d07205d51f45c4,623637
...,...,...
398,636c14d2e6c018559c4a401d,336160
399,636c14d39b33d34fc4602677,788016
400,636c14d33f72b3416c26d33d,601587
401,636c14d3b4028608e54d7427,751188




Rows with empty values in column (User ID)
Number of elements: 0


Unnamed: 0,_id,User ID




Different values in column (User ID): 388


['683378', '171980', '906115', '717077', '623637', '810077', '321106', '854873', '143940', '747249', '499916', '135729', '304144', '494929', '763909', '860129', '869688', '635522', '546155', '691509', '687460', '867192', '648008', '315375', '889840', '536507', '733830', '467736', '751188', '652225', '480163', '902419', '363051', '994463', '315367', '501851', '859853', '923304', '492976', '830662', '420687', '666698', '615657', '810394', '327926', '789838', '760671', '702839', '519875', '446753', '742369', '266015', '958841', '657293', '337348', '137027', '180106', '227606', '646909', '570664', '557386', '906289', '784682', '807976', '430645', '371912', '478101', '386405', '275365', '697639', '896756', '164176', '547625', '898143', '719025', '956636', '624865', '373908', '796892', '741370', '426054', '354956', '827861', '656728', '734629', '435167', '646613', '434969', '163866', '142112', '594472', '602028', '146872', '253996', '647464', '21

336160    4
751188    4
446753    4
594472    3
283382    2
         ..
856559    1
680369    1
876846    1
825152    1
838079    1
Name: User ID, Length: 388, dtype: int64





In [119]:
columnDistribution(writtenStoryEmotions_df, 'emotions', displayColumns = displayColumns)

Rows with non-empty values in column (emotions)
Number of elements: 230


Unnamed: 0,_id,Comment ID (#),emotions
2,636c14499b33d34fc4602627,#9249C6,{'Interest': 1}
3,636c1448566dcc2dea6212aa,#9249C2,{'Fear': 1}
5,636c144a0282857a3c150431,#9249C8,{'Sadness': 1}
8,636c144b566dcc2dea6212ac,#9249C12,{'Interest': 1}
9,636c144a3f72b3416c26d2ed,#9249C7,"{'Joy': 1, 'Interest': 1, 'Anticipation': 1, '..."
...,...,...,...
392,636c14d122d37d085f130362,#17248C10,{'Interest': 1}
395,636c14d2f5d07205d51f4614,#44166C2,{'Joy': 1}
396,636c14d29742aa76343842fb,#44166C3,{'Interest': 1}
399,636c14d39b33d34fc4602677,#44166C5,{'Sadness': 2}




Rows with empty values in column (emotions)
Number of elements: 173


Unnamed: 0,_id,Comment ID (#),emotions
0,636c1448b4028608e54d73d6,#9249C4,[]
1,636c14475d9061195e24abf3,#9249C1,[]
4,636c1448f5d07205d51f45c4,#9249C3,[]
6,636c144a0e2530450149d6bd,#9249C9,[]
7,636c14496b245e0b9926dbb7,#9249C5,[]
...,...,...,...
394,636c14d09b33d34fc4602675,#16101C6,[]
397,636c14d10e2530450149d70b,#17248C9,[]
398,636c14d2e6c018559c4a401d,#44166C1,[]
400,636c14d33f72b3416c26d33d,#44166C6,[]




Dataframe exploded in column (emotions)


Unnamed: 0,_id,Comment ID (#),emotions
2,636c14499b33d34fc4602627,#9249C6,Interest
3,636c1448566dcc2dea6212aa,#9249C2,Fear
5,636c144a0282857a3c150431,#9249C8,Sadness
8,636c144b566dcc2dea6212ac,#9249C12,Interest
9,636c144a3f72b3416c26d2ed,#9249C7,Joy
...,...,...,...
402,636c14d40282857a3c150481,#44166C7,Anger
402,636c14d40282857a3c150481,#44166C7,Disgust
402,636c14d40282857a3c150481,#44166C7,Fear
402,636c14d40282857a3c150481,#44166C7,Sadness




Different values in column (emotions): 12


['Interest', 'Fear', 'Sadness', 'Joy', 'Anticipation', 'Serenity', 'Disapproval', 'Anger', 'Surprise', 'Disgust', 'Trust', 'Love']


Value distribution: 


Interest        131
Joy              55
Surprise         34
Sadness          29
Anticipation     26
Trust            26
Fear             14
Disgust          13
Love             13
Anger            12
Disapproval       8
Serenity          7
Name: emotions, dtype: int64





In [120]:
columnDistribution(writtenStoryEmotions_df, 'sentiment', displayColumns = displayColumns)

Rows with non-empty values in column (sentiment)
Number of elements: 291


Unnamed: 0,_id,Comment ID (#),sentiment
0,636c1448b4028608e54d73d6,#9249C4,{'Positive': 0.7006779}
2,636c14499b33d34fc4602627,#9249C6,{'Positive': 1.82837474}
3,636c1448566dcc2dea6212aa,#9249C2,{'Negative': 1}
4,636c1448f5d07205d51f45c4,#9249C3,{'Positive': 1.59080726}
5,636c144a0282857a3c150431,#9249C8,{'Negative': 1}
...,...,...,...
396,636c14d29742aa76343842fb,#44166C3,{'Neutral': 1}
397,636c14d10e2530450149d70b,#17248C9,{'Positive': 0.5309802}
399,636c14d39b33d34fc4602677,#44166C5,{'Negative': 1}
401,636c14d3b4028608e54d7427,#44166C4,{'Negative': 1}




Rows with empty values in column (sentiment)
Number of elements: 112


Unnamed: 0,_id,Comment ID (#),sentiment
1,636c14475d9061195e24abf3,#9249C1,[]
7,636c14496b245e0b9926dbb7,#9249C5,[]
21,636c144e22d37d085f130316,#41581C6,[]
32,636c1453b4028608e54d73dd,#32030C1,[]
34,636c145222d37d085f130318,#11780C7,[]
...,...,...,...
389,636c14cfb4028608e54d7425,#16101C5,[]
391,636c14d00282857a3c15047f,#17248C8,[]
393,636c14d15d9061195e24ac43,#17248C11,[]
398,636c14d2e6c018559c4a401d,#44166C1,[]




Dataframe exploded in column (sentiment)


Unnamed: 0,_id,Comment ID (#),sentiment
0,636c1448b4028608e54d73d6,#9249C4,Positive
2,636c14499b33d34fc4602627,#9249C6,Positive
3,636c1448566dcc2dea6212aa,#9249C2,Negative
4,636c1448f5d07205d51f45c4,#9249C3,Positive
5,636c144a0282857a3c150431,#9249C8,Negative
...,...,...,...
396,636c14d29742aa76343842fb,#44166C3,Neutral
397,636c14d10e2530450149d70b,#17248C9,Positive
399,636c14d39b33d34fc4602677,#44166C5,Negative
401,636c14d3b4028608e54d7427,#44166C4,Negative




Different values in column (sentiment): 3


['Positive', 'Negative', 'Neutral']


Value distribution: 


Positive    223
Negative     41
Neutral      27
Name: sentiment, dtype: int64





# DMH Stories (combined)

In [163]:
filename = 'DMH_CombinedStories'
combinedStories_df = pd.read_json(parsedFolder + '/' + filename + '.json')
combinedStories_df

Unnamed: 0,_id,Object name,Object number,Date of recording,Context/event/workshop,Contributor name,User ID,End-user community,Story ID(#),Original language,...,emotions,sentiment,toxicity,entities,Original language.1,MFT_Values,EkmanEmotions,originalEmotions,Comment ID (#),_updated
0,63738b5eb6bab71ffc679aab,Cup - Myrna,44176,15/3/2022,"Workshop for a group, at Design Museum",Eeva-Kaarina Öyry,751188,Senior citizens,#44176S3,Finnish,...,"{'anger': 0, 'anticipation': 1, 'disgust': 0, ...",{'Positive': 1},[],{'dbr:Moped': {'@types': ['http://www.w3.org/2...,Finnish,[],[],"{'Joy': 1, 'Anticipation': 1}",,
1,636c14b53f72b3416c26d32b,Stool - Stool 60,44163,08/03/2022,Orimattila,Anonymous,787682,General audience,,Finnish,...,{},{},[],[],Finnish,[],[],{},#44163C2,
2,63738b67fb9758706e727885,Decorative object - Pässi (Ram),41768,07/10/2022,"Workshop for a group, at Design Museum",Sofi,512341,Asylum seekers,#41768S11,English,...,"{'anger': 0, 'anticipation': 0.25, 'disgust': ...",{'Positive': 0.42857142857142805},[],{'dbr:Coast': {'@types': ['http://dbpedia.org/...,English,"[harm, care, loyalty]",[sadness],"{'Sadness': 2, 'Joy': 2, 'Fear': 2, 'Interest'...",,
3,63738b71ec6863140217f37a,Rug - Suo,32361,17/3/2022,Lahti,Kristiina Haara,139120,Senior citizens,#32361S7,Finnish,...,"{'anger': 0, 'anticipation': 0.142857142857142...",{'Positive': 1.703720699999999},[],{'dbr:Carpet': {'@types': ['http://www.w3.org/...,Finnish,[],[],"{'Serenity': 1.9041194300000002, 'Interest': 6...",,
4,63738b75b6bab71ffc679ab9,Scissors - Fiskars,44174,17/03/2022,Lahti,Terttu Valonen,677821,Senior citizens,#44174S4,Finnish,...,"{'anger': 1.158730158730158, 'anticipation': 0...",{'Positive': 0.5},[],[],Finnish,[],[],"{'Anger': 1, 'Trust': 2}",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523,63738b7a4d6d36651d600e22,Vase - Aalto vase,41793,17/03/2022,Lahti,Riitta Katriina Louhensalo,788016,Senior citizens,#41793S5,Finnish,...,"{'anger': 1, 'anticipation': 0, 'disgust': 1.6...",{'Negative': 1},[],{'dbr:Aalto_Vase': {'@types': ['http://dbpedia...,Finnish,[],[],"{'Anger': 1, 'Fear': 1, 'Sadness': 2, 'Disgust...",,
524,636c146622d37d085f130324,Creamer – MK,AM1699,27/09/2022,Design Evening,Anonymous,734629,General audience,,Finnish,...,{'interest': 0.54682213},{'Positive': 0.9398848},[],[],Finnish,[],[],{'Interest': 0.54682213},#AM1699C8,
525,636c14bf3f72b3416c26d331,Subway bench,41816,29/09/2022,Maunula,Anonymous,345617,Senior citizens,,Finnish,...,{},{},[],{'dbr:Vuosaari': {'@types': ['http://dbpedia.o...,Finnish,[],[],{},#41816C19,
526,63738b6578232564042adf51,Decorative object - Pässi (Ram),41768,07/10/2022,"Workshop for a group, at Design Museum",Sofi,512341,Asylum seekers,#41768S7,English,...,"{'anger': 0, 'anticipation': 0, 'disgust': 0, ...",{'Positive': 1},[],[],English,[],[],"{'Interest': 1, 'Surprise': 1, 'Trust': 2}",,


In [164]:
combinedStories_df.columns

Index(['_id', 'Object name', 'Object number', 'Date of recording',
       'Context/event/workshop', 'Contributor name', 'User ID',
       'End-user community', 'Story ID(#)', 'Original language ',
       'Audio recording file name', 'Start timestamp', 'End timestamp',
       'Finnish translation', 'English translation', 'Swedish translation',
       'Other language translation', 'Additional info', '_datasetid',
       '_timestamp', '_timestamp_year', '_timestamp_month', '_timestamp_day',
       '_timestamp_hour', '_timestamp_minute', '_timestamp_second', 'caption',
       'dc:description', 'degari_extracted_emotions', 'plutchik_emotions',
       'emotion_recognition', 'concept_parsing', 'subjectivity_detection',
       'polarity_classification', 'intensity_ranking', 'aspect_extraction',
       'personality_prediction', 'depression_categorization',
       'toxicity_spotting', '@id', 'emotions', 'sentiment', 'toxicity',
       'entities', 'Original language', 'MFT_Values', 'EkmanEmotions

In [165]:
displayColumns = ['_id']

In [166]:
columnDistribution(combinedStories_df, 'Object name', displayColumns = displayColumns)

Rows with non-empty values in column (Object name)
Number of elements: 527


Unnamed: 0,_id,Object name
0,63738b5eb6bab71ffc679aab,Cup - Myrna
1,636c14b53f72b3416c26d32b,Stool - Stool 60
2,63738b67fb9758706e727885,Decorative object - Pässi (Ram)
3,63738b71ec6863140217f37a,Rug - Suo
4,63738b75b6bab71ffc679ab9,Scissors - Fiskars
...,...,...
523,63738b7a4d6d36651d600e22,Vase - Aalto vase
524,636c146622d37d085f130324,Creamer – MK
525,636c14bf3f72b3416c26d331,Subway bench
526,63738b6578232564042adf51,Decorative object - Pässi (Ram)




Rows with empty values in column (Object name)
Number of elements: 1


Unnamed: 0,_id,Object name
187,63738b7bfb9758706e727891,*




Different values in column (Object name): 79


['Cup - Myrna', 'Stool - Stool 60', 'Decorative object - Pässi (Ram)', 'Rug - Suo', 'Scissors - Fiskars', 'Vase, A. W. Finch, blue', 'Cocktail glass – 2744', 'Coffee pot - Pehtoori', 'Smart phone: Nokia 9210 Communicator', 'Cup: Flora', 'Chair - Pastille', 'Necklace – Planetaariset laaksot', 'Jug – 1611', 'Pot - Timo Sarpaneva', 'Subway bench', 'Jug – 1610', 'Armchair - Paimio armchair 41', 'Bicycle - Jopo', 'Canister - Jerry', 'Vase - Kyllikki Salmenhaara', 'Maternity package', 'Teapot – Japanese teapot', 'Moped - Solifer export', 'Shirt - Jokapoika', 'Glass sculpture – Fasaani', 'Rug: Simpukka (Sea Shell)', 'Cutlery: Scandia', 'Lamp – Block', 'Creamer – MK', 'Vase, Kyllikki Salmenhaara', 'Cup: Ruska', 'High chair – High chair 616', 'Mug - Moomin mug', 'Cutlery - Savonia', 'Television - Asa Futura', 'Creamer - MK', 'Windbreaker - Torstai', 'Ryijy rug: Suopursuja (Marsh Tea)', 'Dress: Monrepos (fabric: Keidas)', 'Vase - Aalto vase', 'Jui

Decorative object - Pässi (Ram)       30
Chair - Pastille                      24
Rug - Suo                             23
Subway bench                          22
Moped - Solifer export                21
                                      ..
Cup: Think and Drink                   1
High chair 616                         1
Jug – 1611                             1
Ryijy rug: Rautaristi (Iron Cross)     1
Ryijy rug: Seebra (Zebra)              1
Name: Object name, Length: 79, dtype: int64





In [167]:
columnDistribution(combinedStories_df, 'Object number', displayColumns = displayColumns)

Rows with non-empty values in column (Object number)
Number of elements: 528


Unnamed: 0,_id,Object number
0,63738b5eb6bab71ffc679aab,44176
1,636c14b53f72b3416c26d32b,44163
2,63738b67fb9758706e727885,41768
3,63738b71ec6863140217f37a,32361
4,63738b75b6bab71ffc679ab9,44174
...,...,...
523,63738b7a4d6d36651d600e22,41793
524,636c146622d37d085f130324,AM1699
525,636c14bf3f72b3416c26d331,41816
526,63738b6578232564042adf51,41768




Rows with empty values in column (Object number)
Number of elements: 0


Unnamed: 0,_id,Object number




Different values in column (Object number): 60


['44176', '44163', '41768', '32361', '44174', '16101', '12595', '8182', '41377', '44181', '44165', '38267', '22769', 'C370', '41816', '9249', '11780', '32030', '17248', '40881', 'B561', '32271', '41821', '13081', '33288', '44171', '38467', 'AM1699', '44179', '9226', '44185', '44168', '20260', '44166', '16851', '30074', '41793', '13154', '44167', 'B548', '44182', '9321', '41581', '14218', '13906', '44175', '35971', '38518', '18903', '10026', '16850', '41379', '27303', '44164', '17876', '44184', '44178', '7762', '32628', '32627']


Value distribution: 


41768     30
44165     24
32361     23
41816     22
32271     21
41793     20
44176     20
38267     19
44174     19
8182      16
12595     16
13081     16
9321      13
9249      13
41821     13
38467     13
44168     12
13154     12
32030     12
11780     11
17248     11
C370      11
16101     11
44167     10
AM1699    10
9226      10
44163     10
20260     10
41581      9
B561       8
22769      8
35971      8
44166      7
44185      6
27303      5
38518      4
44175      4
41377      3
44178      3
44164      3
B548       3
17876      2
41379      2
10026      2
18903      2
30074      2
16851      2
13906      2
14218      2
40881      2
44179      2
44181      1
16850      1
44182      1
33288      1
44171      1
44184      1
7762       1
32628      1
32627      1
Name: Object number, dtype: int64





In [168]:
columnDistribution(combinedStories_df, 'Contributor name', displayColumns = displayColumns)

Rows with non-empty values in column (Contributor name)
Number of elements: 528


Unnamed: 0,_id,Contributor name
0,63738b5eb6bab71ffc679aab,Eeva-Kaarina Öyry
1,636c14b53f72b3416c26d32b,Anonymous
2,63738b67fb9758706e727885,Sofi
3,63738b71ec6863140217f37a,Kristiina Haara
4,63738b75b6bab71ffc679ab9,Terttu Valonen
...,...,...
523,63738b7a4d6d36651d600e22,Riitta Katriina Louhensalo
524,636c146622d37d085f130324,Anonymous
525,636c14bf3f72b3416c26d331,Anonymous
526,63738b6578232564042adf51,Sofi




Rows with empty values in column (Contributor name)
Number of elements: 0


Unnamed: 0,_id,Contributor name




Different values in column (Contributor name): 48


['Eeva-Kaarina Öyry', 'Anonymous', 'Sofi', 'Kristiina Haara', 'Terttu Valonen', 'Riitta Katriina Louhensalo', 'Liisa Nummela', 'Olli', 'Päivi Castrén-Kortekangas', 'Marke Kaskimies', 'Anneli Luoti', 'Pia Hintikka', 'Tuuli Tiihonen', 'Ana Rodrigues', 'Leena Lappalainen', 'Fardoso', 'Anita Salmela', 'Fish', 'Marja-Liisa', 'Leena Riitinen', 'Leena Wilkman', 'Leila Mustanoja-Syysmeri', 'Veikko Luoti', 'Kirsti L', 'Anneli Hietanen', 'Seija Järvinen', 'Lauri', 'Päivi Castren-Kortekangas', 'Hamed Malek Rashed', 'Saifuddin', 'A. R.', 'Pauliina Härkönen', 'Mirja. J', 'Liisa Kemppi', 'Saleh', 'Lea Lifflander', 'Kaarina Oinonen', 'AnnaLiisa Joukainen', 'Soili Viitaniemi', 'Helena', 'Olli Jaakkola', 'Halira', 'Maria Levander', 'Sanjeewa Pawantha', 'Leea Riittinen', 'Khallat', 'Paivi Castren-Kortekangas', 'Elmeri Härkönen']


Value distribution: 


Anonymous                     384
Eeva-Kaarina Öyry              12
Sofi                           11
Riitta Katriina Louhensalo     10
Leena Lappalainen              10
Terttu Valonen                  7
Olli                            7
Marke Kaskimies                 6
Ana Rodrigues                   6
Fish                            5
Kristiina Haara                 4
Päivi Castren-Kortekangas       4
Leena Wilkman                   4
Leena Riitinen                  4
Anita Salmela                   3
Tuuli Tiihonen                  3
Mirja. J                        3
Helena                          3
Kirsti L                        3
Liisa Kemppi                    2
Saifuddin                       2
Halira                          2
Maria Levander                  2
Lauri                           2
Anneli Hietanen                 2
Marja-Liisa                     2
Fardoso                         2
Pia Hintikka                    2
Liisa Nummela                   2
Seija Järvinen





In [169]:
columnDistribution(combinedStories_df, 'User ID', displayColumns = displayColumns)

Rows with non-empty values in column (User ID)
Number of elements: 528


Unnamed: 0,_id,User ID
0,63738b5eb6bab71ffc679aab,751188
1,636c14b53f72b3416c26d32b,787682
2,63738b67fb9758706e727885,512341
3,63738b71ec6863140217f37a,139120
4,63738b75b6bab71ffc679ab9,677821
...,...,...
523,63738b7a4d6d36651d600e22,788016
524,636c146622d37d085f130324,734629
525,636c14bf3f72b3416c26d331,345617
526,63738b6578232564042adf51,512341




Rows with empty values in column (User ID)
Number of elements: 0


Unnamed: 0,_id,User ID




Different values in column (User ID): 413


['751188', '787682', '512341', '139120', '677821', '647498', '384393', '275365', '788016', '719025', '796249', '480163', '703428', '958841', '894906', '215378', '446753', '101381', '466661', '742369', '592912', '854873', '789838', '902419', '492976', '464584', '652225', '221852', '111333', '781657', '605291', '132192', '535800', '981129', '414852', '985925', '232207', '266015', '627927', '122569', '811474', '142112', '393458', '448163', '388721', '491960', '536507', '399203', '772814', '111222', '307500', '501851', '594472', '348608', '244152', '283382', '697639', '778993', '199637', '146872', '656728', '147199', '316813', '254668', '226144', '647464', '426551', '626889', '766242', '810077', '336160', '646613', '908403', '879111', '315375', '489255', '388401', '792314', '861297', '163866', '929262', '501029', '983651', '752324', '646909', '543281', '744996', '466896', '478101', '402459', '720643', '620452', '376777', '364570', '766245', '74

751188    12
512341    11
594472    10
788016    10
677821     7
          ..
739567     1
918705     1
741370     1
810394     1
345617     1
Name: User ID, Length: 413, dtype: int64





In [170]:
columnDistribution(combinedStories_df, 'End-user community', displayColumns = displayColumns)

Rows with non-empty values in column (End-user community)
Number of elements: 528


Unnamed: 0,_id,End-user community
0,63738b5eb6bab71ffc679aab,Senior citizens
1,636c14b53f72b3416c26d32b,General audience
2,63738b67fb9758706e727885,Asylum seekers
3,63738b71ec6863140217f37a,Senior citizens
4,63738b75b6bab71ffc679ab9,Senior citizens
...,...,...
523,63738b7a4d6d36651d600e22,Senior citizens
524,636c146622d37d085f130324,General audience
525,636c14bf3f72b3416c26d331,Senior citizens
526,63738b6578232564042adf51,Asylum seekers




Rows with empty values in column (End-user community)
Number of elements: 0


Unnamed: 0,_id,End-user community




Different values in column (End-user community): 4


['Senior citizens', 'General audience', 'Asylum seekers', 'Finnish language students']


Value distribution: 


Senior citizens              236
General audience             233
Finnish language students     31
Asylum seekers                28
Name: End-user community, dtype: int64





In [171]:
columnDistribution(combinedStories_df, 'Original language', displayColumns = displayColumns)

Rows with non-empty values in column (Original language)
Number of elements: 524


Unnamed: 0,_id,Original language
0,63738b5eb6bab71ffc679aab,Finnish
1,636c14b53f72b3416c26d32b,Finnish
2,63738b67fb9758706e727885,English
3,63738b71ec6863140217f37a,Finnish
4,63738b75b6bab71ffc679ab9,Finnish
...,...,...
523,63738b7a4d6d36651d600e22,Finnish
524,636c146622d37d085f130324,Finnish
525,636c14bf3f72b3416c26d331,Finnish
526,63738b6578232564042adf51,English




Rows with empty values in column (Original language)
Number of elements: 4


Unnamed: 0,_id,Original language
181,63738b56fb9758706e72787b,
387,63738b5978ff61337567e276,
400,63738b6f461cf10d8073bc94,
493,63738b704d6d36651d600e1c,




Different values in column (Original language): 6


['Finnish', 'English', 'Danish', 'Spanish', 'German', 'Italian?']


Value distribution: 


Finnish     388
English     132
Danish        1
Spanish       1
German        1
Italian?      1
Name: Original language, dtype: int64





In [172]:
columnDistribution(combinedStories_df, 'MFT_Values', displayColumns = displayColumns)

Rows with non-empty values in column (MFT_Values)
Number of elements: 126


Unnamed: 0,_id,MFT_Values
2,63738b67fb9758706e727885,"[harm, care, loyalty]"
10,636c14b222d37d085f130350,[care]
11,636c14a40282857a3c150465,[harm]
13,636c14595d9061195e24abfd,[sanctity]
14,63738b664d6d36651d600e16,[loyalty]
...,...,...
502,63738b5df3d1b15c7d3cd1f7,[loyalty]
506,63738b79461cf10d8073bc9a,"[harm, care]"
511,63738b59ec6863140217f36c,[loyalty]
519,636c14690e2530450149d6cf,[care]




Rows with empty values in column (MFT_Values)
Number of elements: 402


Unnamed: 0,_id,MFT_Values
0,63738b5eb6bab71ffc679aab,[]
1,636c14b53f72b3416c26d32b,[]
3,63738b71ec6863140217f37a,[]
4,63738b75b6bab71ffc679ab9,[]
5,636c14d09b33d34fc4602675,[]
...,...,...
523,63738b7a4d6d36651d600e22,[]
524,636c146622d37d085f130324,[]
525,636c14bf3f72b3416c26d331,[]
526,63738b6578232564042adf51,[]




Dataframe exploded in column (MFT_Values)


Unnamed: 0,_id,MFT_Values
2,63738b67fb9758706e727885,harm
2,63738b67fb9758706e727885,care
2,63738b67fb9758706e727885,loyalty
10,636c14b222d37d085f130350,care
11,636c14a40282857a3c150465,harm
...,...,...
506,63738b79461cf10d8073bc9a,care
511,63738b59ec6863140217f36c,loyalty
519,636c14690e2530450149d6cf,care
520,63738b5af3d1b15c7d3cd1f5,harm




Different values in column (MFT_Values): 9


['harm', 'care', 'loyalty', 'sanctity', 'fairness', 'care, harm', 'liberty', 'oppression', 'care, loyalty']


Value distribution: 


care             57
harm             38
loyalty          25
fairness         19
care, harm        5
liberty           5
sanctity          3
oppression        2
care, loyalty     1
Name: MFT_Values, dtype: int64





In [173]:
columnDistribution(combinedStories_df, 'EkmanEmotions', displayColumns = displayColumns)

Rows with non-empty values in column (EkmanEmotions)
Number of elements: 51


Unnamed: 0,_id,EkmanEmotions
2,63738b67fb9758706e727885,[sadness]
20,636c145a6b245e0b9926dbc1,[fear]
24,636c1453f5d07205d51f45ca,[enjoyment]
38,636c14810282857a3c150451,"[surprise, anger]"
46,636c14d122d37d085f130362,[enjoyment]
55,63738b6dec6863140217f378,[enjoyment]
63,636c14ac5d9061195e24ac2d,[enjoyment]
74,63738b604d6d36651d600e12,[enjoyment]
82,63738b56ec6863140217f36a,[anger]
92,636c145b3f72b3416c26d2f7,[disgust]




Rows with empty values in column (EkmanEmotions)
Number of elements: 477


Unnamed: 0,_id,EkmanEmotions
0,63738b5eb6bab71ffc679aab,[]
1,636c14b53f72b3416c26d32b,[]
3,63738b71ec6863140217f37a,[]
4,63738b75b6bab71ffc679ab9,[]
5,636c14d09b33d34fc4602675,[]
...,...,...
523,63738b7a4d6d36651d600e22,[]
524,636c146622d37d085f130324,[]
525,636c14bf3f72b3416c26d331,[]
526,63738b6578232564042adf51,[]




Dataframe exploded in column (EkmanEmotions)


Unnamed: 0,_id,EkmanEmotions
2,63738b67fb9758706e727885,sadness
20,636c145a6b245e0b9926dbc1,fear
24,636c1453f5d07205d51f45ca,enjoyment
38,636c14810282857a3c150451,"surprise, anger"
46,636c14d122d37d085f130362,enjoyment
55,63738b6dec6863140217f378,enjoyment
63,636c14ac5d9061195e24ac2d,enjoyment
74,63738b604d6d36651d600e12,enjoyment
82,63738b56ec6863140217f36a,anger
92,636c145b3f72b3416c26d2f7,disgust




Different values in column (EkmanEmotions): 8


['sadness', 'fear', 'enjoyment', 'surprise, anger', 'anger', 'disgust', 'enjoyment, disgust', 'surprise']


Value distribution: 


enjoyment             24
disgust               11
sadness                6
fear                   6
anger                  3
surprise               3
enjoyment, disgust     2
surprise, anger        1
Name: EkmanEmotions, dtype: int64





In [174]:
columnDistribution(combinedStories_df, 'originalEmotions', displayColumns = displayColumns)

Rows with non-empty values in column (originalEmotions)
Number of elements: 339


Unnamed: 0,_id,originalEmotions
0,63738b5eb6bab71ffc679aab,"{'Joy': 1, 'Anticipation': 1}"
2,63738b67fb9758706e727885,"{'Sadness': 2, 'Joy': 2, 'Fear': 2, 'Interest'..."
3,63738b71ec6863140217f37a,"{'Serenity': 1.9041194300000002, 'Interest': 6..."
4,63738b75b6bab71ffc679ab9,"{'Anger': 1, 'Trust': 2}"
6,636c14cee6c018559c4a401b,{'Joy': 1}
...,...,...
521,636c14770e2530450149d6d7,"{'Interest': 0.63377804, 'Joy': 0.6065651}"
523,63738b7a4d6d36651d600e22,"{'Anger': 1, 'Fear': 1, 'Sadness': 2, 'Disgust..."
524,636c146622d37d085f130324,{'Interest': 0.54682213}
526,63738b6578232564042adf51,"{'Interest': 1, 'Surprise': 1, 'Trust': 2}"




Rows with empty values in column (originalEmotions)
Number of elements: 189


Unnamed: 0,_id,originalEmotions
1,636c14b53f72b3416c26d32b,{}
5,636c14d09b33d34fc4602675,{}
10,636c14b222d37d085f130350,{}
11,636c14a40282857a3c150465,{}
15,636c149cb4028608e54d7407,{}
...,...,...
516,636c145c5d9061195e24abff,{}
517,636c14a80e2530450149d6f3,{}
519,636c14690e2530450149d6cf,{}
522,636c14beb4028608e54d741b,{}




Dataframe exploded in column (originalEmotions)


Unnamed: 0,_id,originalEmotions
0,63738b5eb6bab71ffc679aab,Joy
0,63738b5eb6bab71ffc679aab,Anticipation
2,63738b67fb9758706e727885,Sadness
2,63738b67fb9758706e727885,Joy
2,63738b67fb9758706e727885,Fear
...,...,...
526,63738b6578232564042adf51,Surprise
526,63738b6578232564042adf51,Trust
527,63738b55619d8873905ef6f8,Interest
527,63738b55619d8873905ef6f8,Serenity




Different values in column (originalEmotions): 13


['Joy', 'Anticipation', 'Sadness', 'Fear', 'Interest', 'Serenity', 'Love', 'False', 'Anger', 'Trust', 'Surprise', 'Disgust', 'Disapproval']


Value distribution: 


Interest        204
Joy              95
Surprise         69
Anticipation     66
Trust            61
Sadness          51
Fear             29
Anger            28
Love             25
Disgust          21
Disapproval      14
Serenity          9
False             1
Name: originalEmotions, dtype: int64





In [175]:
# Combination of originalEmotions, plutchik_emotions, EkmanEmotions
columnDistribution(combinedStories_df, 'emotions', displayColumns = displayColumns)

Rows with non-empty values in column (emotions)
Number of elements: 352


Unnamed: 0,_id,emotions
0,63738b5eb6bab71ffc679aab,"{'anger': 0, 'anticipation': 1, 'disgust': 0, ..."
2,63738b67fb9758706e727885,"{'anger': 0, 'anticipation': 0.25, 'disgust': ..."
3,63738b71ec6863140217f37a,"{'anger': 0, 'anticipation': 0.142857142857142..."
4,63738b75b6bab71ffc679ab9,"{'anger': 1.158730158730158, 'anticipation': 0..."
6,636c14cee6c018559c4a401b,{'joy': 1}
...,...,...
521,636c14770e2530450149d6d7,"{'interest': 0.63377804, 'joy': 0.6065651}"
523,63738b7a4d6d36651d600e22,"{'anger': 1, 'anticipation': 0, 'disgust': 1.6..."
524,636c146622d37d085f130324,{'interest': 0.54682213}
526,63738b6578232564042adf51,"{'anger': 0, 'anticipation': 0, 'disgust': 0, ..."




Rows with empty values in column (emotions)
Number of elements: 176


Unnamed: 0,_id,emotions
1,636c14b53f72b3416c26d32b,{}
5,636c14d09b33d34fc4602675,{}
10,636c14b222d37d085f130350,{}
11,636c14a40282857a3c150465,{}
15,636c149cb4028608e54d7407,{}
...,...,...
516,636c145c5d9061195e24abff,{}
517,636c14a80e2530450149d6f3,{}
519,636c14690e2530450149d6cf,{}
522,636c14beb4028608e54d741b,{}




Dataframe exploded in column (emotions)


Unnamed: 0,_id,emotions
0,63738b5eb6bab71ffc679aab,anger
0,63738b5eb6bab71ffc679aab,anticipation
0,63738b5eb6bab71ffc679aab,disgust
0,63738b5eb6bab71ffc679aab,fear
0,63738b5eb6bab71ffc679aab,joy
...,...,...
527,63738b55619d8873905ef6f8,sadness
527,63738b55619d8873905ef6f8,surprise
527,63738b55619d8873905ef6f8,trust
527,63738b55619d8873905ef6f8,interest




Different values in column (emotions): 13


['anger', 'anticipation', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'trust', 'interest', 'serenity', 'love', 'false', 'disapproval']


Value distribution: 


interest        204
joy             177
surprise        156
sadness         151
anticipation    148
trust           148
fear            136
disgust         135
anger           134
love             25
disapproval      14
serenity          9
false             1
Name: emotions, dtype: int64





In [176]:
columnDistribution(combinedStories_df, 'sentiment', displayColumns = displayColumns)

Rows with non-empty values in column (sentiment)
Number of elements: 403


Unnamed: 0,_id,sentiment
0,63738b5eb6bab71ffc679aab,{'Positive': 1}
2,63738b67fb9758706e727885,{'Positive': 0.42857142857142805}
3,63738b71ec6863140217f37a,{'Positive': 1.703720699999999}
4,63738b75b6bab71ffc679ab9,{'Positive': 0.5}
5,636c14d09b33d34fc4602675,{'Positive': 0.91532284}
...,...,...
522,636c14beb4028608e54d741b,{'Negative': 0.9876988999999999}
523,63738b7a4d6d36651d600e22,{'Negative': 1}
524,636c146622d37d085f130324,{'Positive': 0.9398848}
526,63738b6578232564042adf51,{'Positive': 1}




Rows with empty values in column (sentiment)
Number of elements: 125


Unnamed: 0,_id,sentiment
1,636c14b53f72b3416c26d32b,{}
6,636c14cee6c018559c4a401b,{}
10,636c14b222d37d085f130350,{}
11,636c14a40282857a3c150465,{}
22,636c14496b245e0b9926dbb7,{}
...,...,...
507,636c14bbb4028608e54d7419,{}
517,636c14a80e2530450149d6f3,{}
518,636c14af5d9061195e24ac2f,{}
519,636c14690e2530450149d6cf,{}




Dataframe exploded in column (sentiment)


Unnamed: 0,_id,sentiment
0,63738b5eb6bab71ffc679aab,Positive
2,63738b67fb9758706e727885,Positive
3,63738b71ec6863140217f37a,Positive
4,63738b75b6bab71ffc679ab9,Positive
5,636c14d09b33d34fc4602675,Positive
...,...,...
522,636c14beb4028608e54d741b,Negative
523,63738b7a4d6d36651d600e22,Negative
524,636c146622d37d085f130324,Positive
526,63738b6578232564042adf51,Positive




Different values in column (sentiment): 3


['Positive', 'Neutral', 'Negative']


Value distribution: 


Positive    309
Negative     54
Neutral      40
Name: sentiment, dtype: int64





# DMH User responses

It lacks interaction data (other than a few emotions associated to stories). Therefore, it is more suitable for the recommender

In [134]:
filename = 'DB130_DMH - Pop-up_VR_Museum - User Responses'
userResponses_df = pd.read_json(folder + '/' + filename + '.json')
userResponses_df

Unnamed: 0,_id,User ID,Start timestamp,Selected language,Selected age,End-user community,Selected avatar,Game experience,Objects available for selection,Categories selected,...,Emotional reaction to stories,Gameplay time,_datasetid,_timestamp,_timestamp_year,_timestamp_month,_timestamp_day,_timestamp_hour,_timestamp_minute,_timestamp_second
0,638f4b4032859c045a6e737c,6572085,11/29/2022 18:47:58,FIN,1946-1964,General audience,13081,Never,All objects in the main dataset,"decorative, dress, vase, vehicle,",...,"#41793S1 love,","Gameplay time: 850.9962, End time: 11/29/2022 ...",468d274c-a6ae-4b62-a01e-3fe76a0865fa,1670335296,2022,12,6,14,1,36
1,638f4b40979772150849fbbf,2248709,11/29/2022 18:55:20,FIN,1946-1964,General audience,38518,Never,All objects in the main dataset,"furniture, textile, glassware and ceramicware,",...,"#32361S7 love,","Gameplay time: 441.916, End time: 11/29/2022 1...",468d274c-a6ae-4b62-a01e-3fe76a0865fa,1670335296,2022,12,6,14,1,36
2,638f4b40142ba269db23bd8c,9919638,11/29/2022 19:04:23,FIN,1928-1945,Senior citizens,41768,Rarely,All objects in the main dataset,"other, kitchenware,",...,*,"Gameplay time: 541.8212, End time: 11/29/2022 ...",468d274c-a6ae-4b62-a01e-3fe76a0865fa,1670335296,2022,12,6,14,1,36
3,638f4b3fd90fcc0b682f8408,6434373,11/29/2022 18:25:49,ENG,1997-2012,General audience,13081,Occasionally,All objects in the main dataset,"other,",...,"#44174S4 optimism,","Gameplay time: 432.2325, End time: 11/29/2022 ...",468d274c-a6ae-4b62-a01e-3fe76a0865fa,1670335295,2022,12,6,14,1,35
4,638f4b3f27823c1cc16ce96d,1833308,11/29/2022 18:33:45,FIN,1981-1996,General audience,41768,Frequently,All objects in the main dataset,"electronics, other,",...,"#32030S1 optimism,","Gameplay time: 475.3929, End time: 11/29/2022 ...",468d274c-a6ae-4b62-a01e-3fe76a0865fa,1670335295,2022,12,6,14,1,35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,6384bea891c3ac4f02557f98,37379384,5/17/2022 12:14:09,ENG,1981-1996,General audience,41768,*,"44165, 41793, C370, 8182, 44185, 44163, 32030,...",,...,*,"Gameplay time: 277.1, End time: 5/17/2022 9:14...",468d274c-a6ae-4b62-a01e-3fe76a0865fa,1669643944,2022,11,28,13,59,4
210,6384bea8f85bd1292d692535,13447409,5/17/2022 12:30:13,ENG,1965-1980,General audience,41768,*,"44165, 41793, C370, 8182, 44185, 44163, 32030,...",,...,"#44163S4 joy,","Gameplay time: 232.8556, End time: 5/17/2022 9...",468d274c-a6ae-4b62-a01e-3fe76a0865fa,1669643944,2022,11,28,13,59,4
211,6384bea7784d110d88092921,75292986,5/16/2022 22:03:13,ENG,1965-1980,General audience,41768,*,"44165, 41793, C370, 8182, 44185, 44163, 32030,...",,...,*,"Gameplay time: 82.93777, End time: 5/16/2022 7...",468d274c-a6ae-4b62-a01e-3fe76a0865fa,1669643943,2022,11,28,13,59,3
212,6384bea75da06d160b66a215,13644563,5/17/2022 10:59:16,ENG,1946-1964,General audience,41768,*,"44165, 41793, C370, 8182, 44185, 44163, 32030,...",,...,"#41793S5 surprise, #44165S9 neutral, #44174S3 ...","Gameplay time: 215.4281, End time: 5/17/2022 7...",468d274c-a6ae-4b62-a01e-3fe76a0865fa,1669643943,2022,11,28,13,59,3


In [135]:
userResponses_df.columns

Index(['_id', 'User ID', 'Start timestamp', 'Selected language',
       'Selected age', 'End-user community', 'Selected avatar',
       'Game experience', 'Objects available for selection',
       'Categories selected', 'Objects selected', 'Objects listened to',
       'Objects immersed in', 'Objects collected', 'Stories listened to',
       'Emotional reaction to stories', 'Gameplay time', '_datasetid',
       '_timestamp', '_timestamp_year', '_timestamp_month', '_timestamp_day',
       '_timestamp_hour', '_timestamp_minute', '_timestamp_second'],
      dtype='object')

In [136]:
displayColumns = ['_id', 'User ID']

In [137]:
columnDistribution(userResponses_df, 'User ID', displayColumns = displayColumns)

Rows with non-empty values in column (User ID)
Number of elements: 214


Unnamed: 0,_id,User ID
0,638f4b4032859c045a6e737c,6572085
1,638f4b40979772150849fbbf,2248709
2,638f4b40142ba269db23bd8c,9919638
3,638f4b3fd90fcc0b682f8408,6434373
4,638f4b3f27823c1cc16ce96d,1833308
...,...,...
209,6384bea891c3ac4f02557f98,37379384
210,6384bea8f85bd1292d692535,13447409
211,6384bea7784d110d88092921,75292986
212,6384bea75da06d160b66a215,13644563




Rows with empty values in column (User ID)
Number of elements: 0


Unnamed: 0,_id,User ID




Different values in column (User ID): 214


['6572085', '2248709', '9919638', '6434373', '1833308', '6825977', '8083958', '6471022', '9709897', '1986509', '6920935', '5707354', '4399208', '2272470', '3851791', '1714374', '7517759', '9314686', '6613860', '70298586', '21883814', '81013783', '71073614', '78001245', '92309761', '21276792', '88221800', '59767017', '76333665', '63567615', '21090549', '19973148', '33237407', '64678386', '98401181', '27872752', '48930607', '29132296', '10387842', '26822262', '96109954', '48018686', '26352442', '53699847', '94333904', '32122140', '44760899', '83680546', '50876281', '89464136', '86176123', '59845397', '92663976', '87946854', '84054512', '48328538', '32887849', '33797261', '38001335', '93407191', '11067813', '71170096', '29842510', '99550804', '90007877', '99585316', '29899841', '21647899', '44459391', '46910753', '20435057', '96260326', '45875019', '92345079', '26682255', '43099230', '19936222', '76854885', '26338489', '47307356', '38605152', 

6572085     1
68115046    1
48390518    1
33949935    1
15180122    1
           ..
26682255    1
43099230    1
19936222    1
76854885    1
22817778    1
Name: User ID, Length: 214, dtype: int64





In [138]:
columnDistribution(userResponses_df, 'Selected language', displayColumns = displayColumns)

Rows with non-empty values in column (Selected language)
Number of elements: 214


Unnamed: 0,_id,User ID,Selected language
0,638f4b4032859c045a6e737c,6572085,FIN
1,638f4b40979772150849fbbf,2248709,FIN
2,638f4b40142ba269db23bd8c,9919638,FIN
3,638f4b3fd90fcc0b682f8408,6434373,ENG
4,638f4b3f27823c1cc16ce96d,1833308,FIN
...,...,...,...
209,6384bea891c3ac4f02557f98,37379384,ENG
210,6384bea8f85bd1292d692535,13447409,ENG
211,6384bea7784d110d88092921,75292986,ENG
212,6384bea75da06d160b66a215,13644563,ENG




Rows with empty values in column (Selected language)
Number of elements: 0


Unnamed: 0,_id,User ID,Selected language




Different values in column (Selected language): 3


['FIN', 'ENG', 'SWE']


Value distribution: 


ENG    128
FIN     77
SWE      9
Name: Selected language, dtype: int64





In [139]:
columnDistribution(userResponses_df, 'Selected age', displayColumns = displayColumns)

Rows with non-empty values in column (Selected age)
Number of elements: 214


Unnamed: 0,_id,User ID,Selected age
0,638f4b4032859c045a6e737c,6572085,1946-1964
1,638f4b40979772150849fbbf,2248709,1946-1964
2,638f4b40142ba269db23bd8c,9919638,1928-1945
3,638f4b3fd90fcc0b682f8408,6434373,1997-2012
4,638f4b3f27823c1cc16ce96d,1833308,1981-1996
...,...,...,...
209,6384bea891c3ac4f02557f98,37379384,1981-1996
210,6384bea8f85bd1292d692535,13447409,1965-1980
211,6384bea7784d110d88092921,75292986,1965-1980
212,6384bea75da06d160b66a215,13644563,1946-1964




Rows with empty values in column (Selected age)
Number of elements: 0


Unnamed: 0,_id,User ID,Selected age




Different values in column (Selected age): 5


['1946-1964', '1928-1945', '1997-2012', '1981-1996', '1965-1980']


Value distribution: 


1981-1996    77
1965-1980    55
1997-2012    32
1946-1964    31
1928-1945    19
Name: Selected age, dtype: int64





In [140]:
columnDistribution(userResponses_df, 'End-user community', displayColumns = displayColumns)

Rows with non-empty values in column (End-user community)
Number of elements: 214


Unnamed: 0,_id,User ID,End-user community
0,638f4b4032859c045a6e737c,6572085,General audience
1,638f4b40979772150849fbbf,2248709,General audience
2,638f4b40142ba269db23bd8c,9919638,Senior citizens
3,638f4b3fd90fcc0b682f8408,6434373,General audience
4,638f4b3f27823c1cc16ce96d,1833308,General audience
...,...,...,...
209,6384bea891c3ac4f02557f98,37379384,General audience
210,6384bea8f85bd1292d692535,13447409,General audience
211,6384bea7784d110d88092921,75292986,General audience
212,6384bea75da06d160b66a215,13644563,General audience




Rows with empty values in column (End-user community)
Number of elements: 0


Unnamed: 0,_id,User ID,End-user community




Different values in column (End-user community): 4


['General audience', 'Senior citizens', 'Asylum seekers', 'Finnish language students']


Value distribution: 


General audience             188
Senior citizens               21
Asylum seekers                 4
Finnish language students      1
Name: End-user community, dtype: int64





In [141]:
columnDistribution(userResponses_df, 'Selected avatar', displayColumns = displayColumns)

Rows with non-empty values in column (Selected avatar)
Number of elements: 214


Unnamed: 0,_id,User ID,Selected avatar
0,638f4b4032859c045a6e737c,6572085,13081
1,638f4b40979772150849fbbf,2248709,38518
2,638f4b40142ba269db23bd8c,9919638,41768
3,638f4b3fd90fcc0b682f8408,6434373,13081
4,638f4b3f27823c1cc16ce96d,1833308,41768
...,...,...,...
209,6384bea891c3ac4f02557f98,37379384,41768
210,6384bea8f85bd1292d692535,13447409,41768
211,6384bea7784d110d88092921,75292986,41768
212,6384bea75da06d160b66a215,13644563,41768




Rows with empty values in column (Selected avatar)
Number of elements: 0


Unnamed: 0,_id,User ID,Selected avatar




Different values in column (Selected avatar): 3


['13081', '38518', '41768']


Value distribution: 


41768    118
38518     78
13081     18
Name: Selected avatar, dtype: int64





In [142]:
columnDistribution(userResponses_df, 'Game experience', displayColumns = displayColumns)

Rows with non-empty values in column (Game experience)
Number of elements: 28


Unnamed: 0,_id,User ID,Game experience
0,638f4b4032859c045a6e737c,6572085,Never
1,638f4b40979772150849fbbf,2248709,Never
2,638f4b40142ba269db23bd8c,9919638,Rarely
3,638f4b3fd90fcc0b682f8408,6434373,Occasionally
4,638f4b3f27823c1cc16ce96d,1833308,Frequently
5,638f4b3db805de7202206d95,6825977,Frequently
6,638f4b3e44e74923bd35af5d,8083958,Occasionally
7,638f4b3e7f4a2174a9474f7a,6471022,Rarely
8,638f4b3db276e757f0449993,9709897,Occasionally
9,638f4b3d3069d52f4670e383,1986509,Never




Rows with empty values in column (Game experience)
Number of elements: 186


Unnamed: 0,_id,User ID,Game experience
26,6384bee3946068660a33bed8,88221800,*
29,6384bee275fb044cb00a7d88,63567615,*
30,6384bee291c3ac4f02557fbd,21090549,*
31,6384bee2f85bd1292d69255a,19973148,*
32,6384bee1784d110d88092946,33237407,*
...,...,...,...
209,6384bea891c3ac4f02557f98,37379384,*
210,6384bea8f85bd1292d692535,13447409,*
211,6384bea7784d110d88092921,75292986,*
212,6384bea75da06d160b66a215,13644563,*




Different values in column (Game experience): 5


['Never', 'Rarely', 'Occasionally', 'Frequently', 'All the time']


Value distribution: 


Rarely          8
Occasionally    6
Frequently      5
All the time    5
Never           4
Name: Game experience, dtype: int64





In [143]:
columnDistribution(userResponses_df, 'Objects selected', displayColumns = displayColumns)

Rows with non-empty values in column (Objects selected)
Number of elements: 214


Unnamed: 0,_id,User ID,Objects selected
0,638f4b4032859c045a6e737c,6572085,"41768, 41821, 30074, 27303, 16101, 13081, 4179..."
1,638f4b40979772150849fbbf,2248709,"9321, 9226, 41581, 32361, 32361, 10026, 44181,"
2,638f4b40142ba269db23bd8c,9919638,"44174, 35971, 32030, B561, AM1699,"
3,638f4b3fd90fcc0b682f8408,6434373,"44174, 38467,"
4,638f4b3f27823c1cc16ce96d,1833308,"41377, 38467, 44167, 35971, 32030,"
...,...,...,...
209,6384bea891c3ac4f02557f98,37379384,"32030, 44174, C370, 44163, 44176, 41793, 8182,"
210,6384bea8f85bd1292d692535,13447409,"41793, 44163,"
211,6384bea7784d110d88092921,75292986,"44165, 41793, C370, 8182, 44185, 44163, 32030,"
212,6384bea75da06d160b66a215,13644563,"32030, 8182, 41793, , 44165, 44163, 44174,"




Rows with empty values in column (Objects selected)
Number of elements: 0


Unnamed: 0,_id,User ID,Objects selected




Different values in column (Objects selected): 207


['41768, 41821, 30074, 27303, 16101, 13081, 41793, 11780, ', '9321, 9226, 41581, 32361, 32361, 10026, 44181, ', '44174, 35971, 32030, B561, AM1699, ', '44174, 38467, ', '41377, 38467, 44167, 35971, 32030, ', '41768, 44185, 11780, ', 'AM1699, C370, ', '9226, ', '41581, 32361, 30074, 38518, 41768, 38267, 11780, 10026, 44169, C370, ', '41581, 32361, 13154, ', 'B561, C370, 38518, 41768, 38267, 9249, 44163, 41816, 44175, 44185, ', '27303, 41821, C370, 16101, 44165, 35971, 44174, 38467, 44167, 32361, ', '11780, 13081, 41768, 13154, 30074, 44176, 44180, ', 'B561, 44174, 41768, ', '32361, 41581, 11780, 7762, 44181, ', '41768, ', '16101, 41793, 11780, ', '44176, 44175, 44178, 44182, 44176, 44185, 44175, 9321, 44163, 11780, ', '44168, C370, 8182, 8182, 44173, 13154, 41379, ', '32030, 44168, 32361, 11780, 41821,  44167, ', '44165, 9226, 32361, 11780, ', '44163, 44178, 44185, 11780, 44165, ', '11780, 44168, 44176, 41821,  32361, ', '44165, 922

41793,                                                     4
41793, 44165,                                              2
41793, 8182,                                               2
44174, 41793,                                              2
41821, 44176,                                              2
                                                          ..
32030, 41821, 44163, , 41793, 44185, C370,                 1
44165, 44163, 32030, 8182, 41821, 41793, 44174,            1
C370, 44174, 44185, 32030, 8182, 41821, 41793,             1
44174, 32030, 41821, 41793, 8182, 44185, C370,             1
41821, 32030, 8182, 41793, 44176, 44165, 44163, 44174,     1
Name: Objects selected, Length: 207, dtype: int64





In [144]:
columnDistribution(userResponses_df, 'Objects listened to', displayColumns = displayColumns)

Rows with non-empty values in column (Objects listened to)
Number of elements: 204


Unnamed: 0,_id,User ID,Objects listened to
0,638f4b4032859c045a6e737c,6572085,"41768, 41821, 41793, 11780,"
1,638f4b40979772150849fbbf,2248709,"9226, 32361,"
2,638f4b40142ba269db23bd8c,9919638,"35971, B561,"
3,638f4b3fd90fcc0b682f8408,6434373,44174
4,638f4b3f27823c1cc16ce96d,1833308,32030
...,...,...,...
209,6384bea891c3ac4f02557f98,37379384,"44174, 41793, 8182,"
210,6384bea8f85bd1292d692535,13447409,"41793, 44163, 44163, 44163,"
211,6384bea7784d110d88092921,75292986,"41793, 44185,"
212,6384bea75da06d160b66a215,13644563,"41793, 44165, 44174,"




Rows with empty values in column (Objects listened to)
Number of elements: 10


Unnamed: 0,_id,User ID,Objects listened to
11,638f4b3c142ba269db23bd89,5707354,*
17,638f4b393069d52f4670e380,9314686,*
21,6384bee52f430f43b94580bb,81013783,
47,6384bedc91c3ac4f02557fb9,83680546,*
48,6384bedcf85bd1292d692556,50876281,*
70,6384bed575fb044cb00a7d80,20435057,
117,6384bec6f85bd1292d692548,87299459,
133,6384bec15da06d160b66a226,65760442,
155,6384beba784d110d8809292e,69758629,
173,6384beb45da06d160b66a21e,66320104,




Different values in column (Objects listened to): 162


['41768, 41821, 41793, 11780, ', '9226, 32361, ', '35971, B561, ', '44174, ', '32030, ', '11780, ', 'C370, ', '9226, ', '11780, C370, ', '32361, ', '44185, ', '44176, ', '41768, ', '44168, C370, 8182, 8182, ', '32030, 44168, 41821,  ', '11780, 44168, 32361, ', '44165, 44165, 9226, 9226, 8182 2615, 8182 2615, 8182 2615, 8182 2615, 8182 2615, 8182 2615, 8182 2615, 41768, 41768, 41768, ', '32030, 11780, ', '41768, 9226, 32030, 44176, 32361, 44174, 41793, ', '44174, 44163, ', 'C370, 41768, ', '44163, 44185, 44174, 41821, ', '44168, 44185, 11780, 41768, 32361, 32361, 41821, 44174, 44176, ', '11780, 41821, ', '44174, 8182, ', '44163, 44174, 32361, 44185, 44176, ', '44163, C370, ', '44185, 32030, 44174, ', '44185, 44185, 11780, 32030, ', 'C370, 44168, 44176, 32361, 41793, 41768, 44165, 44163, 41821, ', '44185, 41793, ', '41793, 41768, ', '11780, 32361, ', '41793, 41821, 41768, ', '11780, 44165, ', '8182, 11780, ', '44163, 32361, ', '41

44165,                                              6
44174,                                              6
11780,                                              5
41793,                                              5
C370,                                               4
                                                   ..
8182, 44165, 32030,                                 1
8182, 44185, 32030,                                 1
44163, 44174, 32030, C370,                          1
44163, 44176, C370, 44165, 32030, 44185, 41821,     1
41821, 41793, 44165, 44174,                         1
Name: Objects listened to, Length: 162, dtype: int64





In [145]:
columnDistribution(userResponses_df, 'Stories listened to', displayColumns = displayColumns)

Rows with non-empty values in column (Stories listened to)
Number of elements: 131


Unnamed: 0,_id,User ID,Stories listened to
0,638f4b4032859c045a6e737c,6572085,"#41768S3, #41793S1, #751188S1,"
1,638f4b40979772150849fbbf,2248709,"#32361S7,"
3,638f4b3fd90fcc0b682f8408,6434373,"#44174S4,"
4,638f4b3f27823c1cc16ce96d,1833308,"#32030S1,"
6,638f4b3e44e74923bd35af5d,8083958,"#C370S3,"
...,...,...,...
207,6384bea919e7bd5cc766af42,49216289,"#44165S3, #41793S5,"
208,6384bea875fb044cb00a7d62,83889225,"#44185S3,"
210,6384bea8f85bd1292d692535,13447409,"#44163S4, #44163S5,"
212,6384bea75da06d160b66a215,13644563,"#41793S5, #44165S9, #44174S3,"




Rows with empty values in column (Stories listened to)
Number of elements: 83


Unnamed: 0,_id,User ID,Stories listened to
2,638f4b40142ba269db23bd8c,9919638,*
5,638f4b3db805de7202206d95,6825977,*
11,638f4b3c142ba269db23bd89,5707354,*
14,638f4b3b32859c045a6e7379,3851791,*
16,638f4b3a7f4a2174a9474f77,7517759,*
...,...,...,...
198,6384beace14c1316ba55e9be,27233685,*
201,6384beab75fb044cb00a7d66,17651139,*
203,6384beaab44a0e511634db26,18115977,*
209,6384bea891c3ac4f02557f98,37379384,*




Different values in column (Stories listened to): 97


['#41768S3, #41793S1, #751188S1, ', '#32361S7, ', '#44174S4, ', '#32030S1, ', '#C370S3, ', '#9226S1, ', '#C370S2, ', '#32361S14, ', '#44185S3, ', '#44176S15, ', '#44174S3, ', '#41768S7, ', '#44168S1, #C370S3, #8182S6, #8182S1, ', '#32030S4, #44168S4, ', '#44168S1, #32361S3, ', '#44165S5, #9226S1, #8182S4, #8182S6, #8182S4, #8182S4, #8182S6, #8182S4, #41768S2, #41768S1, #41768S5, ', '#44176S9, #32361S8, #44174S3, #41793S5, ', '#44163S5, ', '#41768S2, ', '#C370S2, #41768S5, ', '#32361S8, #44176S15, ', '#44163S4, #C370S2, ', '#44176S3, ', '#44185S3, #32030S4, #44174S3, ', '#44185S2, #751188S1, #32030S1, ', '#32361S12, #41768S3, ', '#41793S5, ', '#32361S12, ', '#44165S10, ', '#44163S4, #32361S13, ', '#41768S2, #44176S13, ', '#44163S4, ', '#8182S5, #44163S4, #44174S4, #C370S2, #C370S1, ', '#44165S3, ', '#44165S9, ', '#44174S3, #C370S1, ', '#44163S4, #44185S3, ', '#44165S6, ', '#44185S2, #8182S4, ', '#41821S4, ', '#44176S8, ', '#44176S

#41793S5,                                          12
#44176S15,                                          4
#44185S3,                                           4
#8182S4,                                            4
#8182S6,                                            4
                                                   ..
#44174S3, #C370S1,                                  1
#44165S9,                                           1
#8182S5, #44163S4, #44174S4, #C370S2, #C370S1,      1
#44163S4,                                           1
#41821S3, #41793S5, #44165S9, #44174S3,             1
Name: Stories listened to, Length: 97, dtype: int64





In [146]:
columnDistribution(userResponses_df, 'Emotional reaction to stories', displayColumns = displayColumns)

Rows with non-empty values in column (Emotional reaction to stories)
Number of elements: 118


Unnamed: 0,_id,User ID,Emotional reaction to stories
0,638f4b4032859c045a6e737c,6572085,"#41793S1 love,"
1,638f4b40979772150849fbbf,2248709,"#32361S7 love,"
3,638f4b3fd90fcc0b682f8408,6434373,"#44174S4 optimism,"
4,638f4b3f27823c1cc16ce96d,1833308,"#32030S1 optimism,"
6,638f4b3e44e74923bd35af5d,8083958,"#C370S3 delight,"
...,...,...,...
207,6384bea919e7bd5cc766af42,49216289,"#41793S5 surprise,"
208,6384bea875fb044cb00a7d62,83889225,"#44185S3 joy,"
210,6384bea8f85bd1292d692535,13447409,"#44163S4 joy,"
212,6384bea75da06d160b66a215,13644563,"#41793S5 surprise, #44165S9 neutral, #44174S3 ..."




Rows with empty values in column (Emotional reaction to stories)
Number of elements: 96


Unnamed: 0,_id,User ID,Emotional reaction to stories
2,638f4b40142ba269db23bd8c,9919638,*
5,638f4b3db805de7202206d95,6825977,*
7,638f4b3e7f4a2174a9474f7a,6471022,*
11,638f4b3c142ba269db23bd89,5707354,*
14,638f4b3b32859c045a6e7379,3851791,*
...,...,...,...
198,6384beace14c1316ba55e9be,27233685,*
201,6384beab75fb044cb00a7d66,17651139,*
203,6384beaab44a0e511634db26,18115977,*
209,6384bea891c3ac4f02557f98,37379384,*




Different values in column (Emotional reaction to stories): 103


['#41793S1 love, ', '#32361S7 love, ', '#44174S4 optimism, ', '#32030S1 optimism, ', '#C370S3 delight, ', '#C370S2 delight, ', '#32361S14 optimism, ', '#44185S3 love, ', '#44176S15 optimism, ', '#44174S3 hope, ', '#41768S7 delight, ', '#44168S1 delight, #C370S3 love, #8182S6 joy, #8182S1 love, ', '#44168S4 neutral, ', '#32361S3 joy, ', '#44165S7 joy, #8182S4 surprise, #41768S2 joy, ', '#44176S9 joy, #32361S8 joy, #44174S3 surprise, #41793S5 joy, ', '#44163S5 surprise, ', '#41768S2 surprise, ', '#C370S2, #41768S5, ', '#32361S9 sadness, #44176S15 joy, ', '#44185S3 joy, ', '#44163S4 joy, #C370S2 neutral, ', '#44176S3 joy, ', '#44185S3 joy, #32030S4 joy, #44174S3 joy, ', '#32361S12 surprise, #41768S3 joy, ', '#41793S5 neutral, ', '#41793S5 joy, ', '#44165S10 neutral, ', '#32361S13 joy, ', '#41768S2 joy, #44176S13 neutral, ', '#44163S4 joy, ', '#41793S5 surprise, ', '#8182S5 surprise, #44163S4 contempt, #44174S4 contempt, #

#41793S5 surprise,                                                                             5
#41793S5 neutral,                                                                              3
#44185S3 joy,                                                                                  3
#8182S4 joy,                                                                                   3
#44163S4 joy,                                                                                  2
                                                                                              ..
#44174S3 joy, #C370S1 surprise,                                                                1
#44165S3 joy,                                                                                  1
#8182S5 surprise, #44163S4 contempt, #44174S4 contempt, #C370S2 contempt, #C370S2 neutral,     1
#41768S2 joy, #44176S13 neutral,                                                               1
#41821S3 joy, #41793S5 surpris





# **Artworks interacted with**

In [147]:
def getNonEmptyDataframe(df, column):
    df = df.copy()
        
    # Check for number columns
    if (is_numeric_dtype(df[column])):
        df[column] = df[column].astype(str)
        
    return df.loc[ (df[column].str.len() != 0) & (df[column] != "*") & (df[column] != "") & (df[column].isna() == False) ]
    

In [148]:
"""
df: pandas dataframe
idObjectColumn: column with the artwork ids
interactionColumns: type of interaction (column name) that must be non-empty for that artwork id
""" 
def interactedArtworks(df, idObjectColumn, interactionColumns = []):
    validColumns = []
    validColumns.append(idObjectColumn)
    validColumns.extend(interactionColumns)
    
    
    # Valid interactions
    rows_df = df.copy()
    for column in validColumns:
        rows_df = getNonEmptyDataframe(rows_df, column)

    display(rows_df)
    
    # Value distribution
    artworks = 5
    distribution_rows_df = rows_df[idObjectColumn].value_counts().rename_axis(idObjectColumn).reset_index(name = 'interactions').head(artworks)
    print("non-empty interactions: " + str(interactionColumns))
    print("Value distribution - " + str(artworks) + " most interacted artworks: ")
    display(distribution_rows_df)
    print("\n")
    
    
    

In [149]:
columns = ['MFT_Values']
interactedArtworks(combinedStories_df, 'Object name', interactionColumns = columns)

Unnamed: 0,_id,Object name,Object number,Date of recording,Context/event/workshop,Contributor name,User ID,End-user community,Story ID(#),Original language,...,@id,emotions,sentiment,toxicity,entities,Original language.1,MFT_Values,EkmanEmotions,Comment ID (#),_updated
2,63738b67fb9758706e727885,Decorative object - Pässi (Ram),41768,07/10/2022,"Workshop for a group, at Design Museum",Sofi,512341,Asylum seekers,#41768S11,English,...,63738b67fb9758706e727885,"{'Sadness': 2, 'Joy': 2, 'Fear': 2, 'Interest'...",{'Positive': 0.42857142857142805},[],{'dbr:Coast': {'@types': ['http://dbpedia.org/...,English,"[harm, care, loyalty]",[sadness],,
10,636c14b222d37d085f130350,Smart phone: Nokia 9210 Communicator,41377,25/10/2022,Design Evening,Anonymous,796249,General audience,,English,...,,{},{},[],{'dbr:Mobile_phone': {'@types': ['http://dbped...,English,[care],[],#41377C1,
11,636c14a40282857a3c150465,Rug - Suo,32361,15/12/2021,"Workshop for a group, at Design Museum",Liisa Nummela,480163,Senior citizens,,,...,,{},{},,,Finnish,[harm],[],#32361C3,
13,636c14595d9061195e24abfd,Chair - Pastille,44165,15/03/2022,"Workshop for a group, at Design Museum",Anonymous,958841,Senior citizens,,Finnish,...,,{'Interest': 0.6042757999999999},{'Positive': 0.9146489},[],{'dbr:Easter': {'@types': ['http://dbpedia.org...,Finnish,[sanctity],[],#44165C6,
14,63738b664d6d36651d600e16,Decorative object - Pässi (Ram),41768,07/10/2022,"Workshop for a group, at Design Museum",Sofi,512341,Asylum seekers,#41768S9,English,...,63738b664d6d36651d600e16,"{'Joy': 1, 'Interest': 2, 'Trust': 1}",{'Positive': 1},[],"{'dbr:Solar_System': {'@types': [''], 'confide...",English,[loyalty],[],,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
502,63738b5df3d1b15c7d3cd1f7,Cup - Myrna,44176,19/1/2022,Online workshop,Anonymous,331897,Senior citizens,#44176S2,Finnish,...,63738b5df3d1b15c7d3cd1f7,"{'Interest': 3, 'Disapproval': 1}",{'Positive': 0.5},[],{'dbr:Coffee': {'@types': ['http://dbpedia.org...,Finnish,[loyalty],[],,
506,63738b79461cf10d8073bc9a,Vase - Aalto vase,41793,08/03/2022,Orimattila,Kirsti L,561211,General audience,#41793S1,Finnish,...,63738b79461cf10d8073bc9a,"{'Joy': 2, 'Fear': 1, 'Sadness': 1, 'Anger': 1...",{'Positive': 0.33333333333333304},[],{'dbr:Vase': {'@types': ['http://dbpedia.org/o...,Finnish,"[harm, care]","[enjoyment, disgust, sadness]",,
511,63738b59ec6863140217f36c,Chair - Pastille,44165,17/3/2022,Lahti,Riitta Katriina Louhensalo,788016,Senior citizens,#44165S12,Finnish,...,63738b59ec6863140217f36c,"{'Anticipation': 1, 'Joy': 1, 'Trust': 1, 'Sur...",{'Positive': 1.348176566666666},[],{'dbr:Adult_(band)': {'@types': ['http://dbped...,Finnish,[loyalty],[],,
519,636c14690e2530450149d6cf,Cup: Iris,44175,25/10/2022,Design Evening,Anonymous,846698,General audience,,,...,,{},{},,,Finnish,[care],[],#44175C1,


non-empty interactions: ['MFT_Values']
Value distribution - 5 most interacted artworks: 


Unnamed: 0,Object name,interactions
0,Cup - Myrna,10
1,Rug - Suo,9
2,Decorative object - Pässi (Ram),8
3,Chair - Pastille,8
4,Cutlery - Savonia,7






In [150]:
columns = ['emotions']
interactedArtworks(combinedStories_df, 'Object name', interactionColumns = columns)

Unnamed: 0,_id,Object name,Object number,Date of recording,Context/event/workshop,Contributor name,User ID,End-user community,Story ID(#),Original language,...,@id,emotions,sentiment,toxicity,entities,Original language.1,MFT_Values,EkmanEmotions,Comment ID (#),_updated
0,63738b5eb6bab71ffc679aab,Cup - Myrna,44176,15/3/2022,"Workshop for a group, at Design Museum",Eeva-Kaarina Öyry,751188,Senior citizens,#44176S3,Finnish,...,63738b5eb6bab71ffc679aab,"{'Joy': 1, 'Anticipation': 1}",{'Positive': 1},[],{'dbr:Moped': {'@types': ['http://www.w3.org/2...,Finnish,[],[],,
2,63738b67fb9758706e727885,Decorative object - Pässi (Ram),41768,07/10/2022,"Workshop for a group, at Design Museum",Sofi,512341,Asylum seekers,#41768S11,English,...,63738b67fb9758706e727885,"{'Sadness': 2, 'Joy': 2, 'Fear': 2, 'Interest'...",{'Positive': 0.42857142857142805},[],{'dbr:Coast': {'@types': ['http://dbpedia.org/...,English,"[harm, care, loyalty]",[sadness],,
3,63738b71ec6863140217f37a,Rug - Suo,32361,17/3/2022,Lahti,Kristiina Haara,139120,Senior citizens,#32361S7,Finnish,...,63738b71ec6863140217f37a,"{'Serenity': 1.9041194300000002, 'Interest': 6...",{'Positive': 1.703720699999999},[],{'dbr:Carpet': {'@types': ['http://www.w3.org/...,Finnish,[],[],,
4,63738b75b6bab71ffc679ab9,Scissors - Fiskars,44174,17/03/2022,Lahti,Terttu Valonen,677821,Senior citizens,#44174S4,Finnish,...,63738b75b6bab71ffc679ab9,"{'Anger': 1, 'Trust': 2}",{'Positive': 0.5},[],[],Finnish,[],[],,
6,636c14cee6c018559c4a401b,"Vase, A. W. Finch, blue",16101,30/08/2022,Design Evening,Anonymous,384393,General audience,,Finnish,...,,{'Joy': 1},{},[],[],Finnish,[],[],#16101C2,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
521,636c14770e2530450149d6d7,Dress: Monrepos (fabric: Keidas),30074,25/10/2022,Design Evening,Anonymous,856559,General audience,,English,...,,"{'Interest': 0.63377804, 'Joy': 0.6065651}",{'Positive': 0.81786275},[],[],English,[],[],#30074C1,
523,63738b7a4d6d36651d600e22,Vase - Aalto vase,41793,17/03/2022,Lahti,Riitta Katriina Louhensalo,788016,Senior citizens,#41793S5,Finnish,...,63738b7a4d6d36651d600e22,"{'Anger': 1, 'Fear': 1, 'Sadness': 2, 'Disgust...",{'Negative': 1},[],{'dbr:Aalto_Vase': {'@types': ['http://dbpedia...,Finnish,[],[],,
524,636c146622d37d085f130324,Creamer – MK,AM1699,27/09/2022,Design Evening,Anonymous,734629,General audience,,Finnish,...,,{'Interest': 0.54682213},{'Positive': 0.9398848},[],[],Finnish,[],[],#AM1699C8,
526,63738b6578232564042adf51,Decorative object - Pässi (Ram),41768,07/10/2022,"Workshop for a group, at Design Museum",Sofi,512341,Asylum seekers,#41768S7,English,...,63738b6578232564042adf51,"{'Interest': 1, 'Surprise': 1, 'Trust': 2}",{'Positive': 1},[],[],English,[],[],,


non-empty interactions: ['emotions']
Value distribution - 5 most interacted artworks: 


Unnamed: 0,Object name,interactions
0,Chair - Pastille,21
1,Rug - Suo,20
2,Decorative object - Pässi (Ram),20
3,Moped - Solifer export,17
4,Coffee pot - Pehtoori,15






In [151]:
columns = ['sentiment']
interactedArtworks(combinedStories_df, 'Object name', interactionColumns = columns)

Unnamed: 0,_id,Object name,Object number,Date of recording,Context/event/workshop,Contributor name,User ID,End-user community,Story ID(#),Original language,...,@id,emotions,sentiment,toxicity,entities,Original language.1,MFT_Values,EkmanEmotions,Comment ID (#),_updated
0,63738b5eb6bab71ffc679aab,Cup - Myrna,44176,15/3/2022,"Workshop for a group, at Design Museum",Eeva-Kaarina Öyry,751188,Senior citizens,#44176S3,Finnish,...,63738b5eb6bab71ffc679aab,"{'Joy': 1, 'Anticipation': 1}",{'Positive': 1},[],{'dbr:Moped': {'@types': ['http://www.w3.org/2...,Finnish,[],[],,
2,63738b67fb9758706e727885,Decorative object - Pässi (Ram),41768,07/10/2022,"Workshop for a group, at Design Museum",Sofi,512341,Asylum seekers,#41768S11,English,...,63738b67fb9758706e727885,"{'Sadness': 2, 'Joy': 2, 'Fear': 2, 'Interest'...",{'Positive': 0.42857142857142805},[],{'dbr:Coast': {'@types': ['http://dbpedia.org/...,English,"[harm, care, loyalty]",[sadness],,
3,63738b71ec6863140217f37a,Rug - Suo,32361,17/3/2022,Lahti,Kristiina Haara,139120,Senior citizens,#32361S7,Finnish,...,63738b71ec6863140217f37a,"{'Serenity': 1.9041194300000002, 'Interest': 6...",{'Positive': 1.703720699999999},[],{'dbr:Carpet': {'@types': ['http://www.w3.org/...,Finnish,[],[],,
4,63738b75b6bab71ffc679ab9,Scissors - Fiskars,44174,17/03/2022,Lahti,Terttu Valonen,677821,Senior citizens,#44174S4,Finnish,...,63738b75b6bab71ffc679ab9,"{'Anger': 1, 'Trust': 2}",{'Positive': 0.5},[],[],Finnish,[],[],,
5,636c14d09b33d34fc4602675,"Vase, A. W. Finch, blue",16101,29/09/2022,Maunula,Anonymous,647498,Senior citizens,,Finnish,...,,{},{'Positive': 0.91532284},[],[],Finnish,[],[],#16101C6,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
522,636c14beb4028608e54d741b,Subway bench,41816,27/09/2022,Design Evening,Anonymous,568806,General audience,,Italian?,...,,{},{'Negative': 0.9876988999999999},[],[],Italian?,[],[],#41816C17,
523,63738b7a4d6d36651d600e22,Vase - Aalto vase,41793,17/03/2022,Lahti,Riitta Katriina Louhensalo,788016,Senior citizens,#41793S5,Finnish,...,63738b7a4d6d36651d600e22,"{'Anger': 1, 'Fear': 1, 'Sadness': 2, 'Disgust...",{'Negative': 1},[],{'dbr:Aalto_Vase': {'@types': ['http://dbpedia...,Finnish,[],[],,
524,636c146622d37d085f130324,Creamer – MK,AM1699,27/09/2022,Design Evening,Anonymous,734629,General audience,,Finnish,...,,{'Interest': 0.54682213},{'Positive': 0.9398848},[],[],Finnish,[],[],#AM1699C8,
526,63738b6578232564042adf51,Decorative object - Pässi (Ram),41768,07/10/2022,"Workshop for a group, at Design Museum",Sofi,512341,Asylum seekers,#41768S7,English,...,63738b6578232564042adf51,"{'Interest': 1, 'Surprise': 1, 'Trust': 2}",{'Positive': 1},[],[],English,[],[],,


non-empty interactions: ['sentiment']
Value distribution - 5 most interacted artworks: 


Unnamed: 0,Object name,interactions
0,Chair - Pastille,22
1,Rug - Suo,20
2,Decorative object - Pässi (Ram),20
3,Moped - Solifer export,18
4,Subway bench,17






In [152]:
columns = ['MFT_Values', 'emotions', 'sentiment']
interactedArtworks(combinedStories_df, 'Object name', interactionColumns = columns)

Unnamed: 0,_id,Object name,Object number,Date of recording,Context/event/workshop,Contributor name,User ID,End-user community,Story ID(#),Original language,...,@id,emotions,sentiment,toxicity,entities,Original language.1,MFT_Values,EkmanEmotions,Comment ID (#),_updated
2,63738b67fb9758706e727885,Decorative object - Pässi (Ram),41768,07/10/2022,"Workshop for a group, at Design Museum",Sofi,512341,Asylum seekers,#41768S11,English,...,63738b67fb9758706e727885,"{'Sadness': 2, 'Joy': 2, 'Fear': 2, 'Interest'...",{'Positive': 0.42857142857142805},[],{'dbr:Coast': {'@types': ['http://dbpedia.org/...,English,"[harm, care, loyalty]",[sadness],,
13,636c14595d9061195e24abfd,Chair - Pastille,44165,15/03/2022,"Workshop for a group, at Design Museum",Anonymous,958841,Senior citizens,,Finnish,...,,{'Interest': 0.6042757999999999},{'Positive': 0.9146489},[],{'dbr:Easter': {'@types': ['http://dbpedia.org...,Finnish,[sanctity],[],#44165C6,
14,63738b664d6d36651d600e16,Decorative object - Pässi (Ram),41768,07/10/2022,"Workshop for a group, at Design Museum",Sofi,512341,Asylum seekers,#41768S9,English,...,63738b664d6d36651d600e16,"{'Joy': 1, 'Interest': 2, 'Trust': 1}",{'Positive': 1},[],"{'dbr:Solar_System': {'@types': [''], 'confide...",English,[loyalty],[],,
17,636c14a10282857a3c150463,Pot - Timo Sarpaneva,C370,19/01/2022,Online workshop,Olli,446753,Senior citizens,,Finnish,...,,"{'Disgust': 1, 'Joy': 1, 'Trust': 1, 'Interest...",{'Positive': 1.8093564999999998},[],[],Finnish,[care],[],#C370C1,
21,636c14883f72b3416c26d311,Jug – 1610,22769,29/09/2022,Maunula,Anonymous,592912,Senior citizens,,Finnish,...,,{'Interest': 2},{'Positive': 1},[],[],Finnish,[care],[],#16101C8,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,63738b60ec6863140217f370,Cup - Myrna,44176,8/3/2022,Orimattila,Riitta Katriina Louhensalo,788016,General audience,#44176S10,Finnish,...,63738b60ec6863140217f370,"{'Surprise': 1, 'Interest': 5, 'Disapproval': 1}",{'Positive': 0.7142857142857141},[],{'dbr:Coffee': {'@types': ['http://dbpedia.org...,Finnish,"[sanctity, care]",[],,
502,63738b5df3d1b15c7d3cd1f7,Cup - Myrna,44176,19/1/2022,Online workshop,Anonymous,331897,Senior citizens,#44176S2,Finnish,...,63738b5df3d1b15c7d3cd1f7,"{'Interest': 3, 'Disapproval': 1}",{'Positive': 0.5},[],{'dbr:Coffee': {'@types': ['http://dbpedia.org...,Finnish,[loyalty],[],,
506,63738b79461cf10d8073bc9a,Vase - Aalto vase,41793,08/03/2022,Orimattila,Kirsti L,561211,General audience,#41793S1,Finnish,...,63738b79461cf10d8073bc9a,"{'Joy': 2, 'Fear': 1, 'Sadness': 1, 'Anger': 1...",{'Positive': 0.33333333333333304},[],{'dbr:Vase': {'@types': ['http://dbpedia.org/o...,Finnish,"[harm, care]","[enjoyment, disgust, sadness]",,
511,63738b59ec6863140217f36c,Chair - Pastille,44165,17/3/2022,Lahti,Riitta Katriina Louhensalo,788016,Senior citizens,#44165S12,Finnish,...,63738b59ec6863140217f36c,"{'Anticipation': 1, 'Joy': 1, 'Trust': 1, 'Sur...",{'Positive': 1.348176566666666},[],{'dbr:Adult_(band)': {'@types': ['http://dbped...,Finnish,[loyalty],[],,


non-empty interactions: ['MFT_Values', 'emotions', 'sentiment']
Value distribution - 5 most interacted artworks: 


Unnamed: 0,Object name,interactions
0,Decorative object - Pässi (Ram),7
1,Rug - Suo,7
2,Chair - Pastille,7
3,Cup - Myrna,7
4,Coffee pot - Pehtoori,6




