In [1]:
import os
import pandas as pd
import numpy as np

# Data Preparation

In [2]:
# attempt to get all the files within the balnton_data directory
path = r"./blanton_data/"
directories = os.listdir(path)

# this is the master dataframe
df_master = pd.DataFrame()

# these are the transaction databases, stored as indexes in a dictionary
# dictionary of dictionaries of arrays
transaction_database = {
    'emotional' : {},
    'aesthetic' : {},
    'either' : {}
}

valid_cols = ['accession_#', 'artist_sort_name', 'artist_life_dates',
           'artist_nationality', 'title', 'creation_date', 'medium', 'credit_line',
           'dimensions', 'student_id', 'emotional_reaction', 'aesthetically_pleasing']

In [3]:
def update_transaction_database(student_df):
    # get the student_id to create a new entry in the transaction_database
    student_id = student_df.at[0, 'student_id']
    
    # create new entry in database
    transaction_database['emotional'][student_id] = []
    transaction_database['aesthetic'][student_id] = []
    transaction_database['either'][student_id] = []
    
    for index, row in student_df.iterrows():
        is_emotional = row['emotional_reaction'] == 1
        is_aesthetic = row['aesthetically_pleasing'] == 1
        
        if is_emotional:
            transaction_database['emotional'][student_id].append(row['accession_#'])
        if is_aesthetic:
            transaction_database['aesthetic'][student_id].append(row['accession_#'])
        if is_emotional or is_aesthetic:
            transaction_database['either'][student_id].append(row['accession_#'])

In [4]:
def clean_temp(file, df_temp):
    # lowercase and ignore whitespace for consistency
    df_temp.columns = map(str.lower, df_temp.columns)
    new_columns = []
    for column in df_temp.columns:
        new_column = column.strip().replace(" ", "_")
        new_columns.append(new_column)
    df_temp.columns = new_columns

    # correct bad spelling
    if 'asthetically_pleasing' in df_temp.columns:
        df_temp = df_temp.rename(columns={'asthetically_pleasing' : 'aesthetically_pleasing'})

    # abhi's case - student_id missing
    if 'student_id' not in df_temp.columns:
        gen_id = file.split('_')[0]
        df_temp['student_id'] = gen_id
    return df_temp.copy()

In [5]:
for file in directories:
    rel_path = path + file
    if file.endswith('.xlsx'):
        df_temp = pd.read_excel(rel_path)
    
        df_temp = clean_temp(file, df_temp)
        
        # student_id, emotional_reaction, and aesthetically_pleasing
        # are not always there, filter out the ones that don't have one of these columns
        is_valid = 'emotional_reaction' in df_temp.columns and 'aesthetically_pleasing' in df_temp.columns
        
        if(is_valid):
            
            # removed the 'unnamed' feilds
            df_temp = df_temp[valid_cols]
            
            # add an 'either' column
            # df_temp['either'] = np.where((df_temp['emotional_reaction'] | df_temp['aesthetically_pleasing'] > 0), 1, 0)
            df_temp['either'] = df_temp['emotional_reaction'].astype(int) + df_temp['aesthetically_pleasing'].astype(int)
            
            update_transaction_database(df_temp)

            df_master = df_master.append(df_temp, ignore_index = True)

# sort by student_id to make things easier
df_master = df_master.sort_values(by=['student_id'])
df_master
        

Unnamed: 0,accession_#,artist_sort_name,artist_life_dates,artist_nationality,title,creation_date,medium,credit_line,dimensions,student_id,emotional_reaction,aesthetically_pleasing,either
986,2002.9,"Raimondi, Marcantonio","(Argini, Italy, circa 1470 or 1482 - circa 152...",Italian,"The Flagellation, from the Small Passion, afte...",circa 1512,Engraving,"Blanton Museum of Art, The University of Texas...",12.4 cm x 9.8 cm (4 7/8 in. x 3 7/8 in.),AnaW4804,0,0,0
987,1982.5,"Gavarni, Paul","(Paris, 1804 - 1866, Paris)",French,Voyez le restant de la vente! [See the rest of...,1839,Lithograph,"Blanton Museum of Art, The University of Texas...",29.9 cm x 21 cm (11 3/4 in. x 8 1/4 in.),AnaW4804,0,0,0
985,1982.869.1/4,"Chávez Morado, José","(Silao, Mexico, 1909 – 2002, Guanajuato, Mexico)",Mexican,"El amor y el crimen [Love and Crime], from the...",1936,Linocut,"Blanton Museum of Art, The University of Texas...",23.5 cm x 19 cm (9 1/4 in. x 7 1/2 in.),AnaW4804,1,0,1
984,1991.66.19/40B,"Piranesi, Giovanni Battista","(Mogliano (Treviso), Italy, 1720 - 1778, Rome)",Italian,Dimostrazione in grande di alcune delle parti ...,1756,Etching,"Blanton Museum of Art, The University of Texas...",52.8 cm x 69.7 cm (20 13/16 in. x 27 7/16 in.),AnaW4804,0,0,0
983,2014.65,"Frank, Natalie","(Austin, Texas, 1980 - )",American,Rapunzel I,2011-2014,Gouache and chalk pastel on paper,"Blanton Museum of Art, The University of Texas...",76.2 cm x 55.9 cm (30 in. x 22 in.),AnaW4804,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1764,2000.14,"Carrière, Eugène","(Gournay (Seine-et-Oise), 1849 - 1906, Paris)",French,Marguerite Carrière,,Lithograph from three stones on chine collé,"Blanton Museum of Art, The University of Texas...",59.4 cm x 46.8 cm (23 3/8 in. x 18 7/16 in.),viswa,1,0,1
1765,1989.103.6/13,"Kirchner, Ernst Ludwig","(Aschaffenburg, Germany, 1880 - 1938, Frauenki...",German,"David Müller, from Bauhaus Drucke - Neue Europ...",1919,Woodcut,"Blanton Museum of Art, The University of Texas...",48.3 cm x 40.7 cm (19 in. x 16 in.),viswa,0,0,0
1766,1980.32,Anonymous,,Greek-Attic,Black-figure Neck Amphora of Panathenaic Shape...,circa 540 BCE,Terracotta,"Blanton Museum of Art, The University of Texas...",27.5 cm (10 13/16 in.),viswa,0,0,0
1768,1995.177,"Rethel, Alfred","(Diepenbend (Aachen), 1816 - 1859, Düsseldorf)",German,Der Tod als Freund [Death as a Friend],1851,Wood engraving (by Richard Julius Jungtow),"Blanton Museum of Art, The University of Texas...",30.2 x 27 cm (11 7/8 x 10 5/8 in.),viswa,1,1,2


In [6]:
transaction_database

{'emotional': {'sra2398': ['2004.114',
   '1979.30',
   '1977.112',
   '2007.42',
   '2017.277',
   '2014.65',
   '2017.444',
   '2018.89',
   '2004.129.1/3-3/3',
   '1982.1017',
   '1998.71',
   '2006.94',
   '2017.965',
   '1991.276',
   '1982.1340',
   '1989.24',
   'G1968.69',
   '2009.13',
   'P1967.1.12/20',
   '1992.253.1/35',
   '1992.253.32/35',
   '1999.10',
   '2010.100',
   '2017.1168',
   '2004.82',
   '1981.31',
   '1991.418',
   '1997.133',
   '2017.1447',
   '1995.24',
   '2017.429',
   '2016.1',
   '2018.195',
   '1991.248',
   '1985.58.7/10',
   '1992.253.8/35',
   '2005.151',
   '2002.1668',
   'G1972.8.2'],
  'akp2597': [2014.94,
   '2017.361',
   '1992.253.27/35',
   '1992.253.35/35',
   '1991.187',
   '2002.1359',
   '2017.30',
   '2014.1',
   '2017.93',
   '1982.1264',
   '2017.1010',
   '2017.304',
   '1986.333',
   'P1969.11.2',
   '2017.605.9',
   '2017.1447',
   '2017.1148',
   '1998.156',
   '2017.995',
   '2017.487',
   '1998.278',
   '2004.110',
   'G1974.

# Data Analysis
## Questions to Answer:
- What artist was the most likely to elicit an emotional reaction?
- What artist was the most likely to elicit an aesthetically pleasing reaction?
- What pieces of art were the most widely judged to have any reaction?
- What student had the most “1” reactions – find the art lover
- What student had the most “0” reactions – find the art grinch


In [7]:
"""
returns a dictionary tallying the count of a certain attrbute 
for each unique item defined in category

df - dataframe
category - distinct items
tally_by - column to count number of distinct items by
target - target value to add +1 to the tally
"""
def get_count(df, category, tally_by, target, by_one):
    count = {}
    for index, row in df.iterrows():
        
        inc = 1
        if by_one == False:
            inc = row[tally_by]
        
        if row[tally_by] >= target:
            if row[category] not in count:
                count[row[category]] = inc
                
            else:
                count[row[category]] += inc
    return count

In [8]:
def max_count(count):
    return max(count, key=count.get)
def min_count(count):
    return min(count, key=count.get)

In [9]:
# What artist was the most likely to elicit an emotional reaction?
emo_artist = get_count(df_master, 'artist_sort_name', 'emotional_reaction', 1, True)

# What artist was the most likely to elicit an aesthetically pleasing reaction?
aes_artist = get_count(df_master, 'artist_sort_name', 'aesthetically_pleasing', 1, True)

# What pieces of art were the most widely judged to have any reaction?
reaction_art = get_count(df_master, 'title', 'either', 1, True)

# What student had the most “1” reactions – find the art lover
# What student had the most “0” reactions – find the art grinch
lover_student = get_count(df_master, 'student_id', 'either', 1, False)


In [10]:
print("What artist was the most likely to elicit an emotional reaction?")
print(max_count(emo_artist))
print()
print("What artist was the most likely to elicit an aesthetically pleasing reaction?")
print(max_count(aes_artist))
print()
print("What pieces of art were the most widely judged to have any reaction?")
print(max_count(reaction_art))
print()
print("What student had the most “1” reactions – find the art lover")
print(max_count(lover_student))
print()
print("What student had the most “0” reactions – find the art grinch")
print(min_count(lover_student))


What artist was the most likely to elicit an emotional reaction?
Camnitzer, Luis

What artist was the most likely to elicit an aesthetically pleasing reaction?
Anonymous

What pieces of art were the most widely judged to have any reaction?
Untitled

What student had the most “1” reactions – find the art lover
st33578

What student had the most “0” reactions – find the art grinch
ich
