In [155]:
import pandas as pd
import json
from IPython.display import display

In [191]:
def print_df(df, header = "df"):
    print(header)
    print("Number of elements: " + str(len(df)))
    display(df)
    print("\n")

In [206]:
def columnDistribution(df, column, displayColumns = []):
    if column not in displayColumns:
        displayColumns.append(column)
        
    # With value
    rows_df = df.loc[ (df[column].astype(str).str.len() != 0) & (df[column] != "*") & (df[column] != "") & (df[column].isna() == False) ]
    print_df(rows_df[displayColumns], "Rows with non-empty values in column (" + column + ")")
    
    # Empty value
    emptyRows_df = df.loc[ (df[column].astype(str).str.len() == 0) | (df[column] == "*") | (df[column] == "") | (df[column].isna()) ]
    print_df(emptyRows_df[displayColumns], "Rows with empty values in column (" + column + ")")
    
    
    # Unique values
    # If value is a [list], explode it first
    explode_rows_df = rows_df.copy()
    if len(rows_df) > 0 and (isinstance(rows_df[column].to_list()[0], list) or isinstance(rows_df[column].to_list()[0], dict)):
        explode_rows_df = explode_rows_df.explode(column)
        
        # Exploded dataframe
        print("Dataframe exploded in column " + "(" + column + ")")
        display(explode_rows_df[displayColumns])
        print("\n")
    
    print("Different values in column " + "(" + column + ")" + ": " + str(len(pd.unique(explode_rows_df[column]))))
    print("\n")
    print(list(pd.unique(explode_rows_df[column])))
    print("\n")
    
    
    
    # Value distribution
    distribution_rows_df = explode_rows_df[column].value_counts()#.reset_index()
    print("Value distribution: ")
    display(distribution_rows_df)
    print("\n")
    
    """
    # Duplicated values
    duplicated_df = explode_rows_df[explode_rows_df.duplicated(subset=[column],keep=False)]
    print("Duplicated values in column " + "(" + column + ")")
    display(duplicated_df)
    """



## DMH - DATA

In [158]:
folder = 'data'
parsedFolder = folder + "/" + 'parsed'

**Artwork data**

Title: 	DMH - Pop_up_VR_Museum - Objects classification - English
- Description: 	Classification of all objects from Design Museum Helsinki's permanent collection that are currently in the Pop-up VR Museum. The information about these objects is stored in this dataset in English.
- UUID: 	0daa0287-d7f4-4f03-a068-95f43afcc347
- URL: https://spice.kmi.open.ac.uk/dataset/details/111
- Number of items: 63

Each json entry includes the following information (they don't use ontologies):
- _id
- Object_name
- Object
- Special name
- Object number
- Designer
- Production_date
- Collection
- Manufacturer
- Dimension in cm: n x m x p format
- Weight in kg
- Material
- Colour
- Timestamp

In [159]:
filename = 'DB111_DMH - Pop_up_VR_Museum - Objects classification - English'
objects_df = pd.read_json(parsedFolder + '/' + filename + '.json')
objects_df

Unnamed: 0,_id,tittle,Object,Special name,id,author,Production date,Collection,Manufacturer,Dimension in cm,...,_timestamp_year,_timestamp_month,_timestamp_day,_timestamp_hour,_timestamp_minute,_timestamp_second,_updated,year,ColourRGB,image
0,634f19096c70ef5cf152f237,Scandia,CUTLERY,*,44171,"Franck, Kaj",1952,KÃ¤yttÃ¶kokoelma,Hackman Sorsakoski,fork: 17.5 cm (length) x 0.15cm (thickness) k...,...,2022,11,16,11,44,16,1.0,1952.0,"[[109, 114, 120]]",
1,634f1908b7693f159a62e2f6,Savonia,CUTLERY,*,44168,"Babel, Adolf",1967,KÃ¤yttÃ¶kokoelma,Hackman Sorsakoski,fork: 19.5 cm (length) x 0.1 cm (thickness) k...,...,2022,11,16,11,43,14,1.0,1967.0,"[[109, 114, 120]]",
2,636bf322a8e46b2e35302848,Ultima Thule 2332,BOWL,fruit or dessert bowl,14219,"Wirkkala, Tapio",1970,*,"A AhlstrÃ¶m Oy, Iitalan lasitehdas",20x20x9,...,2022,11,9,18,36,18,,1970.0,[],https://designmuseum.api.digimuseum.co/wp-cont...
3,634f1907d8af1b50664089d0,*,JUG,teapot,B561,*,19th century,Antell,*,"16,8 x 14 x 15",...,2022,10,18,22,33,54,1.0,,"[[165, 42, 42], [92, 64, 51]]",https://designmuseum.api.digimuseum.co/wp-cont...
4,634f1903a45b33465c10ceb2,Windbreaker,JACKET,Jacket,44166,"Kellokumpu, Ritva",1984,KÃ¤yttÃ¶kokoelma,Torstai,*,...,2022,10,18,22,31,49,1.0,1984.0,"[[255, 165, 0], [0, 128, 0], [0, 0, 255], [255...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,634f18fee09bfd1d9a04b8ec,Paimio / Armchair 41,CHAIR,armchair,9249,"Aalto, Alvar",1932,*,"Artek, Huonekalutehdas Korhonen",60 x 64 x 80,...,2022,10,18,22,22,6,,1932.0,"[[0, 0, 0], [223, 215, 200]]",https://designmuseum.api.digimuseum.co/wp-cont...
60,634f18fed4227a56290cb6c5,Pastille,CHAIR,armchair,44165,"Aarnio, Eero",1968,*,Asko Oy,92 x 92 x 53,...,2022,10,18,22,22,6,,1968.0,"[[255, 255, 0]]",https://designmuseum.api.digimuseum.co/wp-cont...
61,634f18feb7693f159a62e2ef,High chair 616,CHAIR,high chair,9226,"SchultÃ©n, Ben af",1965,*,"Artek Oy, Huonekalutehdas Korhonen Oy ja Norrc...","42,8 x 44 x 77",...,2022,10,18,22,22,6,,1965.0,"[[255, 0, 0], [223, 215, 200]]",https://designmuseum.api.digimuseum.co/wp-cont...
62,634f18fd9d480d2bcc4ac79f,Stool 60,CHAIR,stool,44163,"Aalto, Alvar",1933,KÃ¤yttÃ¶kokoelma,Artek,38 x 38 x 44,...,2022,10,18,22,22,5,,1933.0,"[[223, 215, 200]]",https://designmuseum.api.digimuseum.co/wp-cont...


In [160]:
objects_df.columns

Index(['_id', 'tittle', 'Object', 'Special name', 'id', 'author',
       'Production date', 'Collection', 'Manufacturer', 'Dimension in cm',
       'Weight in kg', 'Materials', 'Colour', '_datasetid', '_timestamp',
       '_timestamp_year', '_timestamp_month', '_timestamp_day',
       '_timestamp_hour', '_timestamp_minute', '_timestamp_second', '_updated',
       'year', 'ColourRGB', 'image'],
      dtype='object')

In [161]:
columnDistribution(objects_df, 'tittle', displayColumns = ['id'])

Rows with non-empty values in column (tittle)
Number of elements: 60


Unnamed: 0,id,tittle
0,44171,Scandia
1,44168,Savonia
2,14219,Ultima Thule 2332
4,44166,Windbreaker
5,16850,Rautaristi (Iron Cross)
6,13906,Valkoiset hevoset (White Horses)
7,17876,Liekki (Flame)
8,32628,NeljÃ¤ vÃ¤riÃ¤ (Four Colours)
9,32627,Seebra (Zebra)
10,16851,Suopursuja (Marsh Tea)




Rows with empty values in column (tittle)
Number of elements: 4


Unnamed: 0,id,tittle
3,B561,*
34,C370,*
55,17248,*
58,16101,*




Different values in column (tittle): 60


['Scandia', 'Savonia', 'Ultima Thule 2332', 'Windbreaker', 'Rautaristi (Iron Cross)', 'Valkoiset hevoset (White Horses)', 'Liekki (Flame)', 'NeljÃ¤ vÃ¤riÃ¤ (Four Colours)', 'Seebra (Zebra)', 'Suopursuja (Marsh Tea)', 'Raitakaita', 'Suo (Marsh)', 'Simpukka (Sea Shell)', 'Flora (model S)', 'Think and drink', 'Warm', 'Teema (model BAU)', 'Ruska (model S)', 'Katrilli', 'Iris (model U2)', 'Myrna (malli AX)', 'Rice porcelain cup (model FK/VA)', 'i-102 (Aroma 2002)', 'I-lasi 2004', 'Ultima Thule (18 cl)', '1718', '2744 cocktail glass', '1610', 'MK', 'BAU, Muumi', '1618', 'Fiskars scissors', 'Pehtoori 2615', 'Subway bench', 'Garbage Bin', 'Jopo', 'Solifer Export', 'Jerry', 'Mobira Talkman 450', 'Asa Futura', 'C 1810 B', 'Planetaariset Laaksot (Planetary Valleys)', 'Bo boo', 'Nokia 9210 Communicator', 'Monrepos, fabric: Keidas', 'Model 4, fabric: Sormus', 'Lappkojs', 'Fasaani 939.015.91 (Pheasant)', 'Jokapoika, fabric: Piccolo', 'Block', 'Viimeinen at

Scandia                                      1
Savonia                                      1
Pehtoori 2615                                1
Subway bench                                 1
Garbage Bin                                  1
Jopo                                         1
Solifer Export                               1
Jerry                                        1
Mobira Talkman 450                           1
Asa Futura                                   1
C 1810 B                                     1
Planetaariset Laaksot (Planetary Valleys)    1
Bo boo                                       1
Nokia 9210 Communicator                      1
Monrepos, fabric: Keidas                     1
Model 4, fabric: Sormus                      1
Lappkojs                                     1
Fasaani 939.015.91 (Pheasant)                1
Jokapoika, fabric: Piccolo                   1
Block                                        1
Viimeinen ateria (The Last Meal)             1
PÃ¤ssi (Ram) 





In [162]:
columnDistribution(objects_df, 'author', displayColumns = ['id', 'tittle'])

Rows with non-empty values in column (author)
Number of elements: 63


Unnamed: 0,id,tittle,author
0,44171,Scandia,"Franck, Kaj"
1,44168,Savonia,"Babel, Adolf"
2,14219,Ultima Thule 2332,"Wirkkala, Tapio"
4,44166,Windbreaker,"Kellokumpu, Ritva"
5,16850,Rautaristi (Iron Cross),"KeinÃ¤nen-Baeckmann, Aino"
...,...,...,...
59,9249,Paimio / Armchair 41,"Aalto, Alvar"
60,44165,Pastille,"Aarnio, Eero"
61,9226,High chair 616,"SchultÃ©n, Ben af"
62,44163,Stool 60,"Aalto, Alvar"




Rows with empty values in column (author)
Number of elements: 1


Unnamed: 0,id,tittle,author
3,B561,*,*




Different values in column (author): 48


['Franck, Kaj', 'Babel, Adolf', 'Wirkkala, Tapio', 'Kellokumpu, Ritva', 'KeinÃ¤nen-Baeckmann, Aino', 'Sotavalta, Impi', 'Gallen-Kallela, Akseli', 'Simberg-EhrstrÃ¶m, Uhra', 'Brummer, Eva', 'Raitio, A. W.', 'LeppÃ¤nen, Helena', 'Puotila, Ritva', 'Metsovaara, Marjatta', 'ProcopÃ©, Ulla  Tomula, Esteri', 'Rintaniemi, PÃ¤ivi', 'AlfstrÃ¶m, Tony,  Keaney, Brian', 'ProcopÃ©, Ulla', 'Tarna, Tauno', 'Finch, Alfred William', 'Osol, Olga', 'Holzer-Kjellberg, Friedl', 'Sarpaneva, Timo', 'Hopea, Saara', 'Jansson, Tove  Franck, Kaj  Slotte-Elevant, Tove (decoration)', 'BÃ¤ckstrÃ¶m, Olof,  LindÃ©n, Olavi', 'Nurmesniemi, Antti', 'Rajalin, BÃ¶rje,  Nurmesniemi, Antti', 'Creadesign Oy,   KÃ¤hÃ¶nen, Hannu', 'Hellman,   Rislakki, Eero (colours)', 'Lindh, Richard', 'Rislakki, Eero', 'Pitkonen, Jorma', 'Saura, Samuli', 'Karjalainen, Heikki,  JÃ¤ntti, Mikko', 'WeckstrÃ¶m, BjÃ¶rn', 'Ratia, Ristomatti (model of the bag),  Wakisaka, Katsuji (print)', 'Johansson, Panu'

Franck, Kaj                                                         6
Sarpaneva, Timo                                                     3
Finch, Alfred William                                               3
Aalto, Alvar                                                        3
Nurmesniemi, Vuokko                                                 2
Hopea, Saara                                                        2
Nurmesniemi, Antti                                                  2
Wirkkala, Tapio                                                     2
Aarikka, Kaija                                                      1
Saura, Samuli                                                       1
Hellman,   Rislakki, Eero (colours)                                 1
Lindh, Richard                                                      1
SchultÃ©n, Ben af                                                   1
Rislakki, Eero                                                      1
Pitkonen, Jorma     





In [163]:
columnDistribution(objects_df, 'Collection', displayColumns = ['id', 'tittle'])

Rows with non-empty values in column (Collection)
Number of elements: 24


Unnamed: 0,id,tittle,Collection
0,44171,Scandia,KÃ¤yttÃ¶kokoelma
1,44168,Savonia,KÃ¤yttÃ¶kokoelma
3,B561,*,Antell
4,44166,Windbreaker,KÃ¤yttÃ¶kokoelma
8,32628,NeljÃ¤ vÃ¤riÃ¤ (Four Colours),OpetusministeriÃ¶n ryijykokoelma
9,32627,Seebra (Zebra),OpetusministeriÃ¶n ryijykokoelma
11,44184,Raitakaita,KÃ¤yttÃ¶kokoelma
12,32361,Suo (Marsh),OpetusministeriÃ¶n ryijykokoelma
14,44181,Flora (model S),KÃ¤yttÃ¶kokoelma
15,44182,Think and drink,KÃ¤yttÃ¶kokoelma




Rows with empty values in column (Collection)
Number of elements: 40


Unnamed: 0,id,tittle,Collection
2,14219,Ultima Thule 2332,*
5,16850,Rautaristi (Iron Cross),*
6,13906,Valkoiset hevoset (White Horses),*
7,17876,Liekki (Flame),*
10,16851,Suopursuja (Marsh Tea),*
13,33288,Simpukka (Sea Shell),*
23,7762,i-102 (Aroma 2002),*
24,ILM7606,I-lasi 2004,*
25,14218,Ultima Thule (18 cl),*
26,10026,1718,*




Different values in column (Collection): 5


['KÃ¤yttÃ¶kokoelma', 'Antell', 'OpetusministeriÃ¶n ryijykokoelma', 'Marimekko', 'Vuokko']


Value distribution: 


KÃ¤yttÃ¶kokoelma                    17
OpetusministeriÃ¶n ryijykokoelma     3
Marimekko                            2
Antell                               1
Vuokko                               1
Name: Collection, dtype: int64





In [164]:
columnDistribution(objects_df, 'Manufacturer', displayColumns = ['id', 'tittle'])

Rows with non-empty values in column (Manufacturer)
Number of elements: 63


Unnamed: 0,id,tittle,Manufacturer
0,44171,Scandia,Hackman Sorsakoski
1,44168,Savonia,Hackman Sorsakoski
2,14219,Ultima Thule 2332,"A AhlstrÃ¶m Oy, Iitalan lasitehdas"
4,44166,Windbreaker,Torstai
5,16850,Rautaristi (Iron Cross),Oy Neovius Ab
...,...,...,...
59,9249,Paimio / Armchair 41,"Artek, Huonekalutehdas Korhonen"
60,44165,Pastille,Asko Oy
61,9226,High chair 616,"Artek Oy, Huonekalutehdas Korhonen Oy ja Norrc..."
62,44163,Stool 60,Artek




Rows with empty values in column (Manufacturer)
Number of elements: 1


Unnamed: 0,id,tittle,Manufacturer
3,B561,*,*




Different values in column (Manufacturer): 47


['Hackman Sorsakoski', 'A AhlstrÃ¶m Oy, Iitalan lasitehdas', 'Torstai', 'Oy Neovius Ab', 'HumalajÃ¤rvi, Kerttu', 'Suomen KÃ¤sityÃ¶n ystÃ¤vÃ¤t /  Asikainen, Ann-Mari ja Lindberg, Merja', 'Suomen KÃ¤sityÃ¶n YstÃ¤vÃ¤t /  Sillfors, Pirkko', 'Suomen KÃ¤sityÃ¶n YstÃ¤vÃ¤t /  Ahlblad, Margit', 'Hackman Designor', 'Suomen KÃ¤sityÃ¶n YstÃ¤vÃ¤t /  Hartikainen, Anneli', 'Oy Finnrya Ab', 'Arabia', 'Amfora', 'Tonfisk', 'Sarvis Oy', 'Ab Iris Oy', 'Iittala', 'A AhlstrÃ¶m Oy, Iittalan lasitehdas', 'NuutajÃ¤rven lasi', 'Fiskars', 'WÃ¤rtsilÃ¤ Oyj Abp, JÃ¤rvenpÃ¤Ã¤n emali', 'W. Rosenlew & Co, Porin konepaja', 'Artekno Oy', 'SÃ¤kkivÃ¤line company', 'Helkama', 'Wilh Bensow Oy', 'HuhtamÃ¤ki Oyj, PyrkijÃ¤ Oy', 'Mobira Oy', 'ASA Radio Oy Turku', 'Fiskars Oyj Abp', 'Lapponia Jewelry Oy', 'Marimekko', 'Nokia Oyj', 'Vuokko Oy', 'Kotilieden Aitta', 'Marimekko Oyj', 'Design House Stockholm', 'Arabia / Bryk, Rut', 'Aarikka Oy', 'Arabia / Salmenhaara, Kyllikki', 'Lilj

Arabia                                                                   7
NuutajÃ¤rven lasi                                                        5
Ab Iris Oy                                                               3
Hackman Sorsakoski                                                       2
Oy Neovius Ab                                                            2
Marimekko                                                                2
Iittala                                                                  2
Marimekko Oyj                                                            1
ASA Radio Oy Turku                                                       1
Fiskars Oyj Abp                                                          1
Lapponia Jewelry Oy                                                      1
Nokia Oyj                                                                1
Vuokko Oy                                                                1
Kotilieden Aitta         





In [165]:
columnDistribution(objects_df, 'Materials', displayColumns = ['id', 'tittle'])

Rows with non-empty values in column (Materials)
Number of elements: 64


Unnamed: 0,id,tittle,Materials
0,44171,Scandia,[metal]
1,44168,Savonia,[metal]
2,14219,Ultima Thule 2332,[glass]
3,B561,*,[stoneware]
4,44166,Windbreaker,[polyesterivanu]
...,...,...,...
59,9249,Paimio / Armchair 41,[birch]
60,44165,Pastille,[fiberglass]
61,9226,High chair 616,"[solidbirch, plywood]"
62,44163,Stool 60,[fabric]




Rows with empty values in column (Materials)
Number of elements: 0


Unnamed: 0,id,tittle,Materials




Different values in column (Materials): 30


['metal', 'glass', 'stoneware', 'polyesterivanu', 'wool', 'cotton', 'porcelain', 'ceramics', 'wood', 'plastic', 'enamel', 'melamine', 'castiron', 'teak', 'fiberglass', 'leather', 'reinforcedplastic', 'castaluminium', 'silver', 'marquis', 'printedcotton', 'faience', 'pine', 'redclay', 'birchplywood', 'pinelaminate(seat)', 'birch', 'solidbirch', 'plywood', 'fabric']


Value distribution: 


glass                 11
metal                  9
wool                   9
ceramics               8
cotton                 7
plastic                6
stoneware              3
fiberglass             2
pine                   2
porcelain              2
wood                   2
redclay                2
teak                   2
birchplywood           1
solidbirch             1
pinelaminate(seat)     1
birch                  1
printedcotton          1
plywood                1
faience                1
leather                1
marquis                1
silver                 1
castaluminium          1
reinforcedplastic      1
castiron               1
melamine               1
enamel                 1
polyesterivanu         1
fabric                 1
Name: Materials, dtype: int64





In [166]:
columnDistribution(objects_df, 'Colour', displayColumns = ['id', 'tittle'])

Rows with non-empty values in column (Colour)
Number of elements: 64


Unnamed: 0,id,tittle,Colour
0,44171,Scandia,[metal]
1,44168,Savonia,[metal]
2,14219,Ultima Thule 2332,[colourless glass]
3,B561,*,"[light brown, dark brown]"
4,44166,Windbreaker,"[orange, green, blue, pink]"
...,...,...,...
59,9249,Paimio / Armchair 41,"[black, birch]"
60,44165,Pastille,[yellow]
61,9226,High chair 616,"[red, birch]"
62,44163,Stool 60,[birch]




Rows with empty values in column (Colour)
Number of elements: 0


Unnamed: 0,id,tittle,Colour




Different values in column (Colour): 44


['metal', 'colourless glass', 'light brown', 'dark brown', 'orange', ' green', ' blue', ' pink', 'violet', ' yellowish', 'white', ' red', ' brown', '?', 'black', ' white', 'brown', ' light pastel shade', 'blue', 'different tones of red brown', 'red', ' aniline', ' yellow', 'yellow', ' wood', ' gold', 'green gray', ' violet', 'rubin red', ' purple', 'gray', ' black', 'clear', 'green', 'silver', ' orange', 'wood', 'blue tones', ' several colours', ' blue-green', 'rio brown', ' birch', 'birch', 'pine']


Value distribution: 


 white                          10
red                              7
white                            6
brown                            6
black                            5
blue                             5
 black                           5
 green                           5
 red                             4
orange                           4
yellow                           4
metal                            3
wood                             3
 blue                            3
 brown                           3
colourless glass                 3
 violet                          2
green                            2
silver                           2
 birch                           2
gray                             2
?                                2
birch                            1
 orange                          1
 blue-green                      1
blue tones                       1
clear                            1
 several colours                 1
rio brown           





In [167]:
columnDistribution(objects_df, 'image', displayColumns = ['id', 'tittle'])

Rows with non-empty values in column (image)
Number of elements: 42


Unnamed: 0,id,tittle,image
2,14219,Ultima Thule 2332,https://designmuseum.api.digimuseum.co/wp-cont...
3,B561,*,https://designmuseum.api.digimuseum.co/wp-cont...
20,44175,Iris (model U2),https://designmuseum.api.digimuseum.co/wp-cont...
23,7762,i-102 (Aroma 2002),https://designmuseum.api.digimuseum.co/wp-cont...
24,ILM7606,I-lasi 2004,https://designmuseum.api.digimuseum.co/wp-cont...
25,14218,Ultima Thule (18 cl),https://designmuseum.api.digimuseum.co/wp-cont...
26,10026,1718,https://designmuseum.api.digimuseum.co/wp-cont...
27,12595,2744 cocktail glass,https://designmuseum.api.digimuseum.co/wp-cont...
28,22769,1610,https://designmuseum.api.digimuseum.co/wp-cont...
29,AM1699,MK,https://designmuseum.api.digimuseum.co/wp-cont...




Rows with empty values in column (image)
Number of elements: 22


Unnamed: 0,id,tittle,image
0,44171,Scandia,
1,44168,Savonia,
4,44166,Windbreaker,
5,16850,Rautaristi (Iron Cross),
6,13906,Valkoiset hevoset (White Horses),
7,17876,Liekki (Flame),
8,32628,NeljÃ¤ vÃ¤riÃ¤ (Four Colours),
9,32627,Seebra (Zebra),
10,16851,Suopursuja (Marsh Tea),
11,44184,Raitakaita,




Different values in column (image): 39


['https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-017298_wirkkala_ultima_thule-2000x1621.jpg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/10/wsi-imageoptim-b561-2000x1500.jpg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/08/wsi-imageoptim-b548-2000x2665.jpg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-025419-2000x1333.jpg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-dmpm_20150103_2785-2000x3000.jpg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-025608__franck_p-1-1327x1823.jpg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-franck-1610-lh-1667x2379.jpeg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/05/kermakko-mk-am1699-320x320.jpg', 'https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/09/wsi

https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-017298_wirkkala_ultima_thule-2000x1621.jpg                                                  2
https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-025419-2000x1333.jpg                                                                        2
https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/02/wsi-imageoptim-dmpm_20150103_2785-2000x3000.jpg                                                            2
https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/04/wsi-imageoptim-b548-2000x2665.jpg                                                                          1
https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/03/wsi-imageoptim-22320118103827303-320x495.jpg                                                               1
https://designmuseum.api.digimuseum.co/wp-content/uploads/2017/09/wsi-imageoptim-38518-899x1299.jpg                          





# DMH Stories

In [174]:
filename = 'DMH_127-AudioRecorded-DEGARI'
audioStoryDegari_df = pd.read_json(folder + '/' + filename + '.json')
audioStoryDegari_df

Unnamed: 0,0,_id,_datasetid,_timestamp,_timestamp_year,_timestamp_month,_timestamp_day,_timestamp_hour,_timestamp_minute,_timestamp_second,_updated
0,"{'_id': '63738b60fb9758706e727881', 'Object na...",63738b60fb9758706e727881,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101413,2022,11,22,7,16,53,True
1,"{'_id': '63738b5b28b056289c6b6f10', 'Object na...",63738b5b28b056289c6b6f10,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101413,2022,11,22,7,16,53,True
2,"{'_id': '63738b65461cf10d8073bc8e', 'Object na...",63738b65461cf10d8073bc8e,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101413,2022,11,22,7,16,53,True
3,"{'_id': '63738b634d6d36651d600e14', 'Object na...",63738b634d6d36651d600e14,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101413,2022,11,22,7,16,53,True
4,"{'_id': '63738b79461cf10d8073bc9a', 'Object na...",63738b79461cf10d8073bc9a,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101412,2022,11,22,7,16,52,True
...,...,...,...,...,...,...,...,...,...,...,...
117,"{'_id': '63738b64b6bab71ffc679aaf', 'Object na...",63738b64b6bab71ffc679aaf,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101390,2022,11,22,7,16,30,True
118,"{'_id': '63738b6bb6bab71ffc679ab3', 'Object na...",63738b6bb6bab71ffc679ab3,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101389,2022,11,22,7,16,29,True
119,"{'_id': '63738b6f619d8873905ef709', 'Object na...",63738b6f619d8873905ef709,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101389,2022,11,22,7,16,29,True
120,"{'_id': '63738b55619d8873905ef6f8', 'Object na...",63738b55619d8873905ef6f8,86e6f3e5-6647-4107-b98b-bbde5cf9ebf5,1669101389,2022,11,22,7,16,29,True


In [175]:
filename = 'DB134_DMH - Pop-up_VR_Museum - Semantic Annotation for Transcribed Audio-recorded Stories'
audioStoryEmotions_df = pd.read_json(folder + '/' + filename + '.json')
audioStoryEmotions_df

Unnamed: 0,_id,Object name,Object number,Date of recording,Context/event/workshop,Contributor name,User ID,End-user community,Story ID(#),Original language,...,_timestamp_month,_timestamp_day,_timestamp_hour,_timestamp_minute,_timestamp_second,emotions,sentiment,toxicity,entities,_updated
0,63738b53b6bab71ffc679aa4,Canister - Jerry,32030,15/12/2021,"Workshop for a group, at Design Museum",Päivi Castren-Kortekangas,789838,Senior citizens,#32030S3,Finnish,...,12,2,14,50,32,"{'Sadness': 2, 'Interest': 3, 'Joy': 1, 'Anger...",{'Positive': 0.33333333333333304},[],"{'dbr:Screw': {'@types': [''], 'confidence': 0...",
1,63738b52ec6863140217f367,Bicycle - Jopo,11780,15/03/2022,"Workshop for a group, at Design Museum",Eeva-Kaarina Öyry,751188,Senior citizens,#751188S1,Finnish,...,12,2,14,50,32,"{'Joy': 2, 'Interest': 1, 'Anticipation': 1, '...",{'Positive': 1},[],{'dbr:Bicycle': {'@types': ['http://dbpedia.or...,
2,63738b53fb9758706e727878,Canister - Jerry,32030,22/03/2022,Laajasalo,Maria Levander,789889,Senior citizens,#32030S1,Finnish,...,12,2,14,50,31,{'Interest': 1},{'Positive': 1},[],{'dbr:Gasoline': {'@types': ['http://dbpedia.o...,
3,63738b53f3d1b15c7d3cd1f0,Canister - Jerry,32030,15/12/2021,"Workshop for a group, at Design Museum",Päivi Castren-Kortekangas,789838,Senior citizens,#32030S2,Finnish,...,12,2,14,50,31,"{'Joy': 1, 'Disapproval': 1, 'Interest': 2, 'S...",{'Positive': 0.6000000000000001},[],{'dbr:Fishing_net': {'@types': ['http://www.w3...,
4,63738b5578ff61337567e273,Ceramic tile - Viimeinen Ateria,35971,21/09/2022,"Workshop for a group, at Design Museum",Saifuddin,119809,Finnish language students,#35971S2,Finnish,...,12,2,14,50,30,"{'Interest': 2, 'Joy': 1}",{'Positive': 1},[],[],
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,63738b7aec6863140217f380,Vase - Aalto vase,41793,17/03/2022,Lahti,Terttu Valonen,677821,Senior citizens,#41793S6,Finnish,...,12,2,14,49,49,"{'Fear': 1, 'Joy': 2, 'Interest': 2, 'Love': 1}",{'Positive': 0.7142857142857141},[],{'dbr:Flea': {'@types': ['http://dbpedia.org/o...,
117,63738b5c78ff61337567e278,Coffee pot - Pehtoori,8182,15/3/2022,"Workshop for a group, at Design Museum",Anonymous,923304,Senior citizens,#8182S7,Finnish,...,12,2,14,49,48,{'Interest': 2},{'Positive': 1},[],"{'dbr:Coffeemaker': {'@types': [''], 'confiden...",1.0
118,63738b5c78232564042adf4b,Coffee pot - Pehtoori,8182,15/3/2022,"Workshop for a group, at Design Museum",Anonymous,501851,Senior citizens,#8182S6,Finnish,...,12,2,14,49,48,{'Interest': 1},{'Positive': 1},[],"{'dbr:Coffeemaker': {'@types': [''], 'confiden...",1.0
119,63738b7bfb9758706e727891,*,B561,21/09/2022,"Workshop for a group, at Design Museum",Hamed Malek Rashed,317892,Finnish language students,#B561S1,English,...,12,2,14,49,48,"{'Anticipation': 1, 'Interest': 1}",{'Positive': 1},[],{'dbr:Teapot': {'@types': ['http://www.w3.org/...,


In [176]:
filename = 'DB132_DMH - Pop-up_VR_Museum - Values and Emotions for Transcribed Audio-recorded Stories'
audioStoryValues_df = pd.read_json(folder + '/' + filename + '.json')
audioStoryValues_df

Unnamed: 0,_id,Object name,Object number,Date of recording,Context/event/workshop,Contributor name,User ID,End-user community,Story ID(#),Original language,...,_timestamp_year,_timestamp_month,_timestamp_day,_timestamp_hour,_timestamp_minute,_timestamp_second,MFT_Values,EkmanEmotions,Original language.1,_updated
0,63738b53b6bab71ffc679aa4,Canister - Jerry,32030,15/12/2021,"Workshop for a group, at Design Museum",Päivi Castren-Kortekangas,789838,Senior citizens,#32030S3,Finnish,...,2022,12,1,21,22,58,[],[],,
1,63738b52ec6863140217f367,Bicycle - Jopo,11780,15/03/2022,"Workshop for a group, at Design Museum",Eeva-Kaarina Öyry,751188,Senior citizens,#751188S1,Finnish,...,2022,12,1,21,22,58,[],[enjoyment],,
2,63738b54461cf10d8073bc83,Canister – Jerry,32030,27/09/2022,Design Evening,Lauri,213442,General audience,#32030S5,Finnish,...,2022,12,1,21,22,57,[],[],,
3,63738b53fb9758706e727878,Canister - Jerry,32030,22/03/2022,Laajasalo,Maria Levander,789889,Senior citizens,#32030S1,Finnish,...,2022,12,1,21,22,57,[fairness],[],,
4,63738b53f3d1b15c7d3cd1f0,Canister - Jerry,32030,15/12/2021,"Workshop for a group, at Design Museum",Päivi Castren-Kortekangas,789838,Senior citizens,#32030S2,Finnish,...,2022,12,1,21,22,57,"[harm, fairness]",[],,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,63738b7aec6863140217f380,Vase - Aalto vase,41793,17/03/2022,Lahti,Terttu Valonen,677821,Senior citizens,#41793S6,Finnish,...,2022,12,1,21,22,17,[care],[],,
118,63738b5c78232564042adf4b,Coffee pot - Pehtoori,8182,15/3/2022,"Workshop for a group, at Design Museum",Anonymous,501851,Senior citizens,#8182S6,Finnish,...,2022,12,1,21,22,16,[harm],[],,1.0
119,63738b7bfb9758706e727891,*,B561,21/09/2022,"Workshop for a group, at Design Museum",Hamed Malek Rashed,317892,Finnish language students,#B561S1,English,...,2022,12,1,21,22,16,[harm],[],,
120,63738b5b619d8873905ef6fd,Coffee pot - Pehtoori,8182,15/3/2022,"Workshop for a group, at Design Museum",Anonymous,501851,Senior citizens,#8182S5,Finnish,...,2022,12,1,21,22,15,[care],[],,1.0


In [179]:
# Missing story in audioStoryEmotions
df = audioStoryValues_df[['_id', 'Story ID(#)']]
df2 = audioStoryEmotions_df[['_id', 'Story ID(#)']]

audioStoryMerged_df = df.merge(df2.drop_duplicates(), on=['_id'], 
                   how='left', indicator=True)
missing_df = audioStoryMerged_df.loc[ audioStoryMerged_df['_merge'] == 'left_only' ]
missing_df

Unnamed: 0,_id,Story ID(#)_x,Story ID(#)_y,_merge
45,63738b62619d8873905ef701,#44175S1,,left_only


In [180]:
audioStoryValues_df.columns

Index(['_id', 'Object name', 'Object number', 'Date of recording',
       'Context/event/workshop', 'Contributor name', 'User ID',
       'End-user community', 'Story ID(#)', 'Original language',
       'Audio recording file name', 'Start timestamp', 'End timestamp',
       'Finnish translation', 'English translation', 'Swedish translation',
       'Other language translation', 'Additional info', '_datasetid',
       '_timestamp', '_timestamp_year', '_timestamp_month', '_timestamp_day',
       '_timestamp_hour', '_timestamp_minute', '_timestamp_second',
       'MFT_Values', 'EkmanEmotions', 'Original language ', '_updated'],
      dtype='object')

**Column distribution (shared columns)**

In [181]:
columnDistribution(audioStoryValues_df, 'Object name', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (Object name)
Number of elements: 121


Unnamed: 0,_id,Story ID(#),Object name
0,63738b53b6bab71ffc679aa4,#32030S3,Canister - Jerry
1,63738b52ec6863140217f367,#751188S1,Bicycle - Jopo
2,63738b54461cf10d8073bc83,#32030S5,Canister – Jerry
3,63738b53fb9758706e727878,#32030S1,Canister - Jerry
4,63738b53f3d1b15c7d3cd1f0,#32030S2,Canister - Jerry
...,...,...,...
116,63738b7a4d6d36651d600e22,#41793S5,Vase - Aalto vase
117,63738b7aec6863140217f380,#41793S6,Vase - Aalto vase
118,63738b5c78232564042adf4b,#8182S6,Coffee pot - Pehtoori
120,63738b5b619d8873905ef6fd,#8182S5,Coffee pot - Pehtoori




Rows with empty values in column (Object name)
Number of elements: 1


Unnamed: 0,_id,Story ID(#),Object name
119,63738b7bfb9758706e727891,#B561S1,*




Different values in column (Object name): 22


['Canister - Jerry', 'Bicycle - Jopo', 'Canister – Jerry', 'Ceramic tile - Viimeinen Ateria', 'Chair - Pastille', 'Coffee pot - Pehtoori', 'Chair: Pirkka', 'Cup - Myrna', 'Cutlery - Savonia', 'Decorative object - Pässi (Ram)', 'Cup: Iris', 'High chair 616', 'Moped - Solifer export', 'Mug - Moomin mug', 'Pot - Timo Sarpaneva', 'Rug - Suo', 'Scissors - Fiskars', 'Shirt - Jokapoika', 'Smart phone: Nokia 9210 Communicator', 'Stool - Stool 60', 'Stool - Sauna Stool', 'Vase - Aalto vase']


Value distribution: 


Rug - Suo                               16
Decorative object - Pässi (Ram)         16
Cup - Myrna                             15
Chair - Pastille                        12
Moped - Solifer export                  10
Coffee pot - Pehtoori                    9
Vase - Aalto vase                        6
Stool - Stool 60                         5
Shirt - Jokapoika                        4
Scissors - Fiskars                       4
Canister - Jerry                         4
Cutlery - Savonia                        4
Mug - Moomin mug                         3
Pot - Timo Sarpaneva                     3
Ceramic tile - Viimeinen Ateria          2
Canister – Jerry                         2
Bicycle - Jopo                           1
Cup: Iris                                1
Chair: Pirkka                            1
Smart phone: Nokia 9210 Communicator     1
Stool - Sauna Stool                      1
High chair 616                           1
Name: Object name, dtype: int64





In [193]:
columnDistribution(audioStoryValues_df, 'Object number', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (Object number)
Number of elements: 122


Unnamed: 0,_id,Story ID(#),Object number
0,63738b53b6bab71ffc679aa4,#32030S3,32030
1,63738b52ec6863140217f367,#751188S1,11780
2,63738b54461cf10d8073bc83,#32030S5,32030
3,63738b53fb9758706e727878,#32030S1,32030
4,63738b53f3d1b15c7d3cd1f0,#32030S2,32030
...,...,...,...
117,63738b7aec6863140217f380,#41793S6,41793
118,63738b5c78232564042adf4b,#8182S6,8182
119,63738b7bfb9758706e727891,#B561S1,B561
120,63738b5b619d8873905ef6fd,#8182S5,8182




Rows with empty values in column (Object number)
Number of elements: 0


Unnamed: 0,_id,Story ID(#),Object number




Different values in column (Object number): 22


['32030', '11780', '35971', '44165', '8182', '44164', '44176', '44168', '41768', '44175', '9226', '32271', '44185', 'C370', '32361', '44174', '41821', '41377', '44163', '9321', '41793', 'B561']


Value distribution: 


41768    16
32361    16
44176    15
44165    12
32271    10
8182      9
41793     6
32030     6
44163     5
44168     4
44174     4
41821     4
44185     3
C370      3
35971     2
44175     1
9226      1
11780     1
44164     1
41377     1
9321      1
B561      1
Name: Object number, dtype: int64





In [194]:
columnDistribution(audioStoryValues_df, 'Contributor name', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (Contributor name)
Number of elements: 122


Unnamed: 0,_id,Story ID(#),Contributor name
0,63738b53b6bab71ffc679aa4,#32030S3,Päivi Castren-Kortekangas
1,63738b52ec6863140217f367,#751188S1,Eeva-Kaarina Öyry
2,63738b54461cf10d8073bc83,#32030S5,Lauri
3,63738b53fb9758706e727878,#32030S1,Maria Levander
4,63738b53f3d1b15c7d3cd1f0,#32030S2,Päivi Castren-Kortekangas
...,...,...,...
117,63738b7aec6863140217f380,#41793S6,Terttu Valonen
118,63738b5c78232564042adf4b,#8182S6,Anonymous
119,63738b7bfb9758706e727891,#B561S1,Hamed Malek Rashed
120,63738b5b619d8873905ef6fd,#8182S5,Anonymous




Rows with empty values in column (Contributor name)
Number of elements: 0


Unnamed: 0,_id,Story ID(#),Contributor name




Different values in column (Contributor name): 31


['Päivi Castren-Kortekangas', 'Eeva-Kaarina Öyry', 'Lauri', 'Maria Levander', 'Saifuddin', 'Anonymous', 'Mirja. J', 'Olli', 'Riitta Katriina Louhensalo', 'Ana Rodrigues', 'Anita Salmela', 'Elmeri Härkönen', 'Leena Lappalainen', 'Terttu Valonen', 'Marja-Liisa', 'Tuuli Tiihonen', 'Fish', 'Pauliina Härkönen', 'Sofi', 'Helena', 'Veikko Luoti', 'Kaarina Oinonen', 'Kristiina Haara', 'Leena Riitinen', 'Marke Kaskimies', 'Pia Hintikka', 'Sanjeewa Pawantha', 'Anneli Luoti', 'Kirsti L', 'Liisa Kemppi', 'Hamed Malek Rashed']


Value distribution: 


Anonymous                     18
Sofi                          11
Riitta Katriina Louhensalo     9
Eeva-Kaarina Öyry              8
Leena Lappalainen              7
Terttu Valonen                 7
Ana Rodrigues                  6
Marke Kaskimies                5
Fish                           5
Leena Riitinen                 4
Olli                           4
Päivi Castren-Kortekangas      3
Kirsti L                       3
Kristiina Haara                3
Helena                         3
Tuuli Tiihonen                 3
Mirja. J                       3
Marja-Liisa                    2
Anita Salmela                  2
Saifuddin                      2
Maria Levander                 2
Pia Hintikka                   2
Lauri                          2
Pauliina Härkönen              1
Elmeri Härkönen                1
Veikko Luoti                   1
Kaarina Oinonen                1
Sanjeewa Pawantha              1
Anneli Luoti                   1
Liisa Kemppi                   1
Hamed Male





In [195]:
columnDistribution(audioStoryValues_df, 'User ID', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (User ID)
Number of elements: 122


Unnamed: 0,_id,Story ID(#),User ID
0,63738b53b6bab71ffc679aa4,#32030S3,789838
1,63738b52ec6863140217f367,#751188S1,751188
2,63738b54461cf10d8073bc83,#32030S5,213442
3,63738b53fb9758706e727878,#32030S1,789889
4,63738b53f3d1b15c7d3cd1f0,#32030S2,789838
...,...,...,...
117,63738b7aec6863140217f380,#41793S6,677821
118,63738b5c78232564042adf4b,#8182S6,501851
119,63738b7bfb9758706e727891,#B561S1,317892
120,63738b5b619d8873905ef6fd,#8182S5,501851




Rows with empty values in column (User ID)
Number of elements: 0


Unnamed: 0,_id,Story ID(#),User ID




Different values in column (User ID): 41


[789838, 751188, 213442, 789889, 119809, 315375, 234555, 845666, 921848, 344521, 788016, 111222, 147199, 924889, 331897, 594472, 677821, 434969, 626889, 388721, 185280, 254668, 451211, 512341, 964084, 436574, 501851, 766245, 523452, 139120, 766242, 652225, 981129, 133333, 221852, 631141, 111333, 561211, 608474, 317892, 923304]


Value distribution: 


512341    11
788016     9
751188     8
677821     7
594472     7
111222     6
501851     5
254668     5
652225     5
344521     4
766242     4
561211     3
139120     3
436574     3
388721     3
789838     3
845666     3
981129     2
234555     2
626889     2
921848     2
119809     2
789889     2
213442     2
147199     2
315375     2
608474     1
111333     1
631141     1
317892     1
221852     1
133333     1
185280     1
523452     1
766245     1
964084     1
451211     1
434969     1
331897     1
924889     1
923304     1
Name: User ID, dtype: int64





In [196]:
columnDistribution(audioStoryValues_df, 'End-user community', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (End-user community)
Number of elements: 122


Unnamed: 0,_id,Story ID(#),End-user community
0,63738b53b6bab71ffc679aa4,#32030S3,Senior citizens
1,63738b52ec6863140217f367,#751188S1,Senior citizens
2,63738b54461cf10d8073bc83,#32030S5,General audience
3,63738b53fb9758706e727878,#32030S1,Senior citizens
4,63738b53f3d1b15c7d3cd1f0,#32030S2,Senior citizens
...,...,...,...
117,63738b7aec6863140217f380,#41793S6,Senior citizens
118,63738b5c78232564042adf4b,#8182S6,Senior citizens
119,63738b7bfb9758706e727891,#B561S1,Finnish language students
120,63738b5b619d8873905ef6fd,#8182S5,Senior citizens




Rows with empty values in column (End-user community)
Number of elements: 0


Unnamed: 0,_id,Story ID(#),End-user community




Different values in column (End-user community): 4


['Senior citizens', 'General audience', 'Finnish language students', 'Asylum seekers']


Value distribution: 


Senior citizens              87
General audience             20
Asylum seekers               12
Finnish language students     3
Name: End-user community, dtype: int64





In [197]:
columnDistribution(audioStoryValues_df, 'Original language', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (Original language)
Number of elements: 118


Unnamed: 0,_id,Story ID(#),Original language
0,63738b53b6bab71ffc679aa4,#32030S3,Finnish
1,63738b52ec6863140217f367,#751188S1,Finnish
2,63738b54461cf10d8073bc83,#32030S5,Finnish
3,63738b53fb9758706e727878,#32030S1,Finnish
4,63738b53f3d1b15c7d3cd1f0,#32030S2,Finnish
...,...,...,...
117,63738b7aec6863140217f380,#41793S6,Finnish
118,63738b5c78232564042adf4b,#8182S6,Finnish
119,63738b7bfb9758706e727891,#B561S1,English
120,63738b5b619d8873905ef6fd,#8182S5,Finnish




Rows with empty values in column (Original language)
Number of elements: 4


Unnamed: 0,_id,Story ID(#),Original language
9,63738b56fb9758706e72787b,#44165S3,
20,63738b5978ff61337567e276,#44165S10,
83,63738b6f461cf10d8073bc94,#32361S2,
86,63738b704d6d36651d600e1c,#32361S6,




Different values in column (Original language): 2


['Finnish', 'English']


Value distribution: 


Finnish    99
English    19
Name: Original language, dtype: int64





**Column distribution (extra columns)**

In [199]:
columnDistribution(audioStoryValues_df, 'MFT_Values', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (MFT_Values)
Number of elements: 122


Unnamed: 0,_id,Story ID(#),MFT_Values
0,63738b53b6bab71ffc679aa4,#32030S3,[]
1,63738b52ec6863140217f367,#751188S1,[]
2,63738b54461cf10d8073bc83,#32030S5,[]
3,63738b53fb9758706e727878,#32030S1,[fairness]
4,63738b53f3d1b15c7d3cd1f0,#32030S2,"[harm, fairness]"
...,...,...,...
117,63738b7aec6863140217f380,#41793S6,[care]
118,63738b5c78232564042adf4b,#8182S6,[harm]
119,63738b7bfb9758706e727891,#B561S1,[harm]
120,63738b5b619d8873905ef6fd,#8182S5,[care]




Rows with empty values in column (MFT_Values)
Number of elements: 0


Unnamed: 0,_id,Story ID(#),MFT_Values




Different values in column (MFT_Values): 7


[nan, 'fairness', 'harm', 'liberty', 'care', 'loyalty', 'sanctity']


Value distribution: 


care        27
harm        22
loyalty     19
fairness    18
liberty      2
sanctity     1
Name: MFT_Values, dtype: int64





In [201]:
columnDistribution(audioStoryValues_df, 'EkmanEmotions', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (EkmanEmotions)
Number of elements: 122


Unnamed: 0,_id,Story ID(#),EkmanEmotions
0,63738b53b6bab71ffc679aa4,#32030S3,[]
1,63738b52ec6863140217f367,#751188S1,[enjoyment]
2,63738b54461cf10d8073bc83,#32030S5,[]
3,63738b53fb9758706e727878,#32030S1,[]
4,63738b53f3d1b15c7d3cd1f0,#32030S2,[]
...,...,...,...
117,63738b7aec6863140217f380,#41793S6,[]
118,63738b5c78232564042adf4b,#8182S6,[]
119,63738b7bfb9758706e727891,#B561S1,[]
120,63738b5b619d8873905ef6fd,#8182S5,[]




Rows with empty values in column (EkmanEmotions)
Number of elements: 0


Unnamed: 0,_id,Story ID(#),EkmanEmotions




Different values in column (EkmanEmotions): 7


[nan, 'enjoyment', 'surprise', 'fear', 'sadness', 'anger', 'disgust']


Value distribution: 


enjoyment    12
disgust       6
fear          3
sadness       3
surprise      2
anger         2
Name: EkmanEmotions, dtype: int64





In [207]:
columnDistribution(audioStoryEmotions_df, 'emotions', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (emotions)
Number of elements: 121


Unnamed: 0,_id,Story ID(#),emotions
0,63738b53b6bab71ffc679aa4,#32030S3,"{'Sadness': 2, 'Interest': 3, 'Joy': 1, 'Anger..."
1,63738b52ec6863140217f367,#751188S1,"{'Joy': 2, 'Interest': 1, 'Anticipation': 1, '..."
2,63738b53fb9758706e727878,#32030S1,{'Interest': 1}
3,63738b53f3d1b15c7d3cd1f0,#32030S2,"{'Joy': 1, 'Disapproval': 1, 'Interest': 2, 'S..."
4,63738b5578ff61337567e273,#35971S2,"{'Interest': 2, 'Joy': 1}"
...,...,...,...
116,63738b7aec6863140217f380,#41793S6,"{'Fear': 1, 'Joy': 2, 'Interest': 2, 'Love': 1}"
117,63738b5c78ff61337567e278,#8182S7,{'Interest': 2}
118,63738b5c78232564042adf4b,#8182S6,{'Interest': 1}
119,63738b7bfb9758706e727891,#B561S1,"{'Anticipation': 1, 'Interest': 1}"




Rows with empty values in column (emotions)
Number of elements: 0


Unnamed: 0,_id,Story ID(#),emotions




Different values in column (emotions): 14


['Sadness', 'Interest', 'Joy', 'Anger', 'Anticipation', 'Trust', 'Disapproval', 'Surprise', 'Love', 'Serenity', 'Fear', nan, 'Disgust', 'False']




Unnamed: 0,_id,Story ID(#),emotions
0,63738b53b6bab71ffc679aa4,#32030S3,Sadness
0,63738b53b6bab71ffc679aa4,#32030S3,Interest
0,63738b53b6bab71ffc679aa4,#32030S3,Joy
0,63738b53b6bab71ffc679aa4,#32030S3,Anger
1,63738b52ec6863140217f367,#751188S1,Joy
...,...,...,...
117,63738b5c78ff61337567e278,#8182S7,Interest
118,63738b5c78232564042adf4b,#8182S6,Interest
119,63738b7bfb9758706e727891,#B561S1,Anticipation
119,63738b7bfb9758706e727891,#B561S1,Interest


Value distribution: 


Interest        73
Joy             40
Anticipation    40
Trust           35
Surprise        35
Sadness         22
Anger           16
Fear            15
Love            12
Disgust          8
Disapproval      6
Serenity         2
False            1
Name: emotions, dtype: int64





In [208]:
columnDistribution(audioStoryEmotions_df, 'sentiment', displayColumns = ['_id', 'Story ID(#)'])

Rows with non-empty values in column (sentiment)
Number of elements: 121


Unnamed: 0,_id,Story ID(#),sentiment
0,63738b53b6bab71ffc679aa4,#32030S3,{'Positive': 0.33333333333333304}
1,63738b52ec6863140217f367,#751188S1,{'Positive': 1}
2,63738b53fb9758706e727878,#32030S1,{'Positive': 1}
3,63738b53f3d1b15c7d3cd1f0,#32030S2,{'Positive': 0.6000000000000001}
4,63738b5578ff61337567e273,#35971S2,{'Positive': 1}
...,...,...,...
116,63738b7aec6863140217f380,#41793S6,{'Positive': 0.7142857142857141}
117,63738b5c78ff61337567e278,#8182S7,{'Positive': 1}
118,63738b5c78232564042adf4b,#8182S6,{'Positive': 1}
119,63738b7bfb9758706e727891,#B561S1,{'Positive': 1}




Rows with empty values in column (sentiment)
Number of elements: 0


Unnamed: 0,_id,Story ID(#),sentiment




Different values in column (sentiment): 4


['Positive', 'Neutral', 'Negative', nan]




Unnamed: 0,_id,Story ID(#),sentiment
0,63738b53b6bab71ffc679aa4,#32030S3,Positive
1,63738b52ec6863140217f367,#751188S1,Positive
2,63738b53fb9758706e727878,#32030S1,Positive
3,63738b53f3d1b15c7d3cd1f0,#32030S2,Positive
4,63738b5578ff61337567e273,#35971S2,Positive
...,...,...,...
116,63738b7aec6863140217f380,#41793S6,Positive
117,63738b5c78ff61337567e278,#8182S7,Positive
118,63738b5c78232564042adf4b,#8182S6,Positive
119,63738b7bfb9758706e727891,#B561S1,Positive


Value distribution: 


Positive    86
Neutral     13
Negative    13
Name: sentiment, dtype: int64



