In [1]:
import csv
import pprint as pp

In [2]:
# prints first 5 lines of a dataset
def first_five(dataset):
    for row in dataset[:5]:
        print(row)

In [3]:
# reading contents from a CSV file
def read_csv(file_name, header=True):
    with open(file_name, encoding='utf8') as fd:
        all_data = list(csv.reader(fd))
    
    if header:
        return all_data[0], all_data[1:]
    return all_data

In [4]:
# removing parentheses from a given column in the dataset
def remove_parentheses(index, dataset):
    parentheses = ['(', ')']
    
    for row in dataset:
        value = row[index]
        for char in parentheses:
            value = value.replace(char, '')
        row[index] = value

In [5]:
# for a given column, if it has some content, update to title case
# else update with given unknown value
def update_unknown_or_title(index, unknown, dataset):
    for row in dataset:
        value = row[index]
        if value:
            row[index] = value.title()
        else:
            row[index] = unknown

In [6]:
# removing parentheses and converting to int for a given column in the dataset
def clean_and_convert_date(index, dataset):
    parentheses = ['(', ')']
    
    for row in dataset:
        value = row[index]
        if value:
            for char in parentheses:
                value = value.replace(char, '')
            row[index] = int(value)            

In [7]:
def remove_character(date):
    bad_characters = ['(', ')', 'c', 'C', '.', 's', 'S', "'", ' ']
    
    for char in bad_characters:
        date = date.replace(char, '')
    
    return date

In [8]:
# if a given date column has unwanted character, removes it
# and if it is range of years, calculates average
def process_date(index, dataset):
    for row in dataset:
        date = row[index]
        if date:
            date = remove_character(date)
            if '-' in date:
                date_range = list(map(int, date.split('-')))
                date = round(sum(date_range)/len(date_range))
            else:
                date = int(date)
            
            row[index] = date

In [9]:
# generates frequency table for give column
def frequency_table(index, dataset):
    table = dict()
    
    for row in dataset:
        item = row[index]
        table[item] = table.get(item, 0) + 1
    
    return table

In [10]:
header, moma = read_csv('artworks.csv')

In [11]:
print(len(moma))

16725


In [12]:
first_five(moma)

['Dress MacLeod from Tartan Sets', 'Sarah Charlesworth', '(American)', '(1947)', '(2013)', '(Female)', '1986', 'Prints & Illustrated Books']
['Duplicate of plate from folio 11 verso (supplementary suite, plate 4) from ARDICIA', 'Pablo Palazuelo', '(Spanish)', '(1916)', '(2007)', '(Male)', '1978', 'Prints & Illustrated Books']
['Tailpiece (page 55) from SAGESSE', 'Maurice Denis', '(French)', '(1870)', '(1943)', '(Male)', '1889-1911', 'Prints & Illustrated Books']
['Headpiece (page 129) from LIVRET DE FOLASTRIES, À JANOT PARISIEN', 'Aristide Maillol', '(French)', '(1861)', '(1944)', '(Male)', '1927-1940', 'Prints & Illustrated Books']
['97 rue du Bac', 'Eugène Atget', '(French)', '(1857)', '(1927)', '(Male)', '1903', 'Photography']


In [13]:
remove_parentheses(2, moma) # removing parentheses from 'Nationality' column

In [14]:
first_five(moma)

['Dress MacLeod from Tartan Sets', 'Sarah Charlesworth', 'American', '(1947)', '(2013)', '(Female)', '1986', 'Prints & Illustrated Books']
['Duplicate of plate from folio 11 verso (supplementary suite, plate 4) from ARDICIA', 'Pablo Palazuelo', 'Spanish', '(1916)', '(2007)', '(Male)', '1978', 'Prints & Illustrated Books']
['Tailpiece (page 55) from SAGESSE', 'Maurice Denis', 'French', '(1870)', '(1943)', '(Male)', '1889-1911', 'Prints & Illustrated Books']
['Headpiece (page 129) from LIVRET DE FOLASTRIES, À JANOT PARISIEN', 'Aristide Maillol', 'French', '(1861)', '(1944)', '(Male)', '1927-1940', 'Prints & Illustrated Books']
['97 rue du Bac', 'Eugène Atget', 'French', '(1857)', '(1927)', '(Male)', '1903', 'Photography']


In [15]:
remove_parentheses(5, moma) # removing parentheses from 'Gender' column

In [16]:
first_five(moma)

['Dress MacLeod from Tartan Sets', 'Sarah Charlesworth', 'American', '(1947)', '(2013)', 'Female', '1986', 'Prints & Illustrated Books']
['Duplicate of plate from folio 11 verso (supplementary suite, plate 4) from ARDICIA', 'Pablo Palazuelo', 'Spanish', '(1916)', '(2007)', 'Male', '1978', 'Prints & Illustrated Books']
['Tailpiece (page 55) from SAGESSE', 'Maurice Denis', 'French', '(1870)', '(1943)', 'Male', '1889-1911', 'Prints & Illustrated Books']
['Headpiece (page 129) from LIVRET DE FOLASTRIES, À JANOT PARISIEN', 'Aristide Maillol', 'French', '(1861)', '(1944)', 'Male', '1927-1940', 'Prints & Illustrated Books']
['97 rue du Bac', 'Eugène Atget', 'French', '(1857)', '(1927)', 'Male', '1903', 'Photography']


In [17]:
update_unknown_or_title(2, 'Nationality Unknown', moma)

In [18]:
first_five(moma)

['Dress MacLeod from Tartan Sets', 'Sarah Charlesworth', 'American', '(1947)', '(2013)', 'Female', '1986', 'Prints & Illustrated Books']
['Duplicate of plate from folio 11 verso (supplementary suite, plate 4) from ARDICIA', 'Pablo Palazuelo', 'Spanish', '(1916)', '(2007)', 'Male', '1978', 'Prints & Illustrated Books']
['Tailpiece (page 55) from SAGESSE', 'Maurice Denis', 'French', '(1870)', '(1943)', 'Male', '1889-1911', 'Prints & Illustrated Books']
['Headpiece (page 129) from LIVRET DE FOLASTRIES, À JANOT PARISIEN', 'Aristide Maillol', 'French', '(1861)', '(1944)', 'Male', '1927-1940', 'Prints & Illustrated Books']
['97 rue du Bac', 'Eugène Atget', 'French', '(1857)', '(1927)', 'Male', '1903', 'Photography']


In [19]:
update_unknown_or_title(5, 'Gender Unknown/Other', moma)

In [20]:
first_five(moma)

['Dress MacLeod from Tartan Sets', 'Sarah Charlesworth', 'American', '(1947)', '(2013)', 'Female', '1986', 'Prints & Illustrated Books']
['Duplicate of plate from folio 11 verso (supplementary suite, plate 4) from ARDICIA', 'Pablo Palazuelo', 'Spanish', '(1916)', '(2007)', 'Male', '1978', 'Prints & Illustrated Books']
['Tailpiece (page 55) from SAGESSE', 'Maurice Denis', 'French', '(1870)', '(1943)', 'Male', '1889-1911', 'Prints & Illustrated Books']
['Headpiece (page 129) from LIVRET DE FOLASTRIES, À JANOT PARISIEN', 'Aristide Maillol', 'French', '(1861)', '(1944)', 'Male', '1927-1940', 'Prints & Illustrated Books']
['97 rue du Bac', 'Eugène Atget', 'French', '(1857)', '(1927)', 'Male', '1903', 'Photography']


In [21]:
clean_and_convert_date(3, moma) # removing parentheses and converting to int, 'BeginDate' column

In [22]:
first_five(moma)

['Dress MacLeod from Tartan Sets', 'Sarah Charlesworth', 'American', 1947, '(2013)', 'Female', '1986', 'Prints & Illustrated Books']
['Duplicate of plate from folio 11 verso (supplementary suite, plate 4) from ARDICIA', 'Pablo Palazuelo', 'Spanish', 1916, '(2007)', 'Male', '1978', 'Prints & Illustrated Books']
['Tailpiece (page 55) from SAGESSE', 'Maurice Denis', 'French', 1870, '(1943)', 'Male', '1889-1911', 'Prints & Illustrated Books']
['Headpiece (page 129) from LIVRET DE FOLASTRIES, À JANOT PARISIEN', 'Aristide Maillol', 'French', 1861, '(1944)', 'Male', '1927-1940', 'Prints & Illustrated Books']
['97 rue du Bac', 'Eugène Atget', 'French', 1857, '(1927)', 'Male', '1903', 'Photography']


In [23]:
clean_and_convert_date(4, moma) # removing parentheses and converting to int, 'EndDate' column

In [24]:
first_five(moma)

['Dress MacLeod from Tartan Sets', 'Sarah Charlesworth', 'American', 1947, 2013, 'Female', '1986', 'Prints & Illustrated Books']
['Duplicate of plate from folio 11 verso (supplementary suite, plate 4) from ARDICIA', 'Pablo Palazuelo', 'Spanish', 1916, 2007, 'Male', '1978', 'Prints & Illustrated Books']
['Tailpiece (page 55) from SAGESSE', 'Maurice Denis', 'French', 1870, 1943, 'Male', '1889-1911', 'Prints & Illustrated Books']
['Headpiece (page 129) from LIVRET DE FOLASTRIES, À JANOT PARISIEN', 'Aristide Maillol', 'French', 1861, 1944, 'Male', '1927-1940', 'Prints & Illustrated Books']
['97 rue du Bac', 'Eugène Atget', 'French', 1857, 1927, 'Male', '1903', 'Photography']


In [25]:
process_date(6, moma) # 'Date' column

In [26]:
first_five(moma)

['Dress MacLeod from Tartan Sets', 'Sarah Charlesworth', 'American', 1947, 2013, 'Female', 1986, 'Prints & Illustrated Books']
['Duplicate of plate from folio 11 verso (supplementary suite, plate 4) from ARDICIA', 'Pablo Palazuelo', 'Spanish', 1916, 2007, 'Male', 1978, 'Prints & Illustrated Books']
['Tailpiece (page 55) from SAGESSE', 'Maurice Denis', 'French', 1870, 1943, 'Male', 1900, 'Prints & Illustrated Books']
['Headpiece (page 129) from LIVRET DE FOLASTRIES, À JANOT PARISIEN', 'Aristide Maillol', 'French', 1861, 1944, 'Male', 1934, 'Prints & Illustrated Books']
['97 rue du Bac', 'Eugène Atget', 'French', 1857, 1927, 'Male', 1903, 'Photography']


In [27]:
# we have got an year in which an artist is born
# and also an year in which the art was created
# now we have to get the age of an artist when the art was created

artists_ages = list()
for row in moma:
    birth_year = row[3]
    art_year = row[6]
    
    if birth_year:
        artists_ages.append(art_year - birth_year)
    else:
        artists_ages.append(0)

In [28]:
# there are some cases where artists' age is very low or negative
# to summarize, we will categorize ages less that 20 as unknown

final_ages = list()
for age in artists_ages:
    if age > 20:
        final_ages.append(age)
    else:
        final_ages.append('Unknown')

In [29]:
# for simplification, we will be convering ages to decades
# ex: if age is 24, we will consider an artist is in his '20s'

decades = list()
for age in final_ages:
    if age == 'Unknown':
        decades.append(age)
    else:
        decade = str(age)[:-1]
        decade += '0s'
        decades.append(decade)

In [30]:
# generating frequency table for decades

decades_frequency = dict()
for item in decades:
    decades_frequency[item] = decades_frequency.get(item, 0) + 1

In [31]:
pp.pprint(decades_frequency)

{'100s': 3,
 '110s': 3,
 '20s': 1856,
 '30s': 4722,
 '40s': 4081,
 '50s': 2434,
 '60s': 1357,
 '70s': 559,
 '80s': 364,
 '90s': 253,
 'Unknown': 1093}


In [32]:
artists_freq = frequency_table(1, moma)

In [33]:
pp.pprint(artists_freq)

{'A. G. Fronzoni': 6,
 'A. Karra': 1,
 'A. M. Cassandre': 9,
 'A. Paramonov': 1,
 'A. Radishchev': 1,
 'A. Strachov': 1,
 'A.R. Penck (Ralf Winkler)': 18,
 'Aaron Curry': 1,
 'Aaron Fink': 1,
 'Aaron Morse': 2,
 'Aaron Siskind': 19,
 'Abbas Kiarostami': 1,
 'Abby Leigh': 1,
 'Abel Barroso': 1,
 'Abelardo Morell': 2,
 'Abigail Child': 1,
 'Abraham Cruzvillegas': 1,
 'Abraham Shterenberg': 1,
 'Abraham Walkowitz': 19,
 'Abram Games': 7,
 'Abram Krol': 3,
 'Achille Castiglioni': 2,
 'Achille Perilli': 1,
 'Ad Reinhardt': 5,
 'Adam Bartos': 1,
 'Adam Fuss': 1,
 'Adam Shankman': 1,
 'Adja Yunkers': 3,
 'Adolf Abel': 1,
 'Adolf De Meyer': 1,
 'Adolf Dehn': 1,
 'Adolph Gottlieb': 2,
 'Adolphe Braun': 1,
 'Adolpho Wildt': 1,
 'Adrian Allinson': 1,
 'Adrian Piper': 14,
 'Adrian Siegel': 2,
 'Adrian Wiszniewski': 3,
 'Adriana Maraz-Bernik': 1,
 'Adrián Villar Rojas': 1,
 'Adão Pinheiro': 2,
 'Aenne Biermann': 2,
 'Agam (Yaacov Agam)': 1,
 'Agnes Denes': 3,
 'Agnes Lyall': 1,
 'Agnes Martin': 3,


 'Christopher Le Brun': 1,
 'Christopher Martin Hofstetter': 1,
 'Christopher P. James': 1,
 'Christopher Richard Wynne Nevinson': 3,
 'Christopher Williams': 8,
 'Christopher Wilmarth': 8,
 'Christopher Wool': 19,
 'Chryssa': 8,
 'Chuck Close': 9,
 'Chuck Jones': 6,
 'Chuyên Bùi Thac': 1,
 'Cindy Bernard': 1,
 'Cindy Sherman': 18,
 'Cindy van den Bremen': 1,
 'Claes Oldenburg': 12,
 'Clare Strand': 1,
 'Clarence H. White': 8,
 'Clarence John Laughlin': 4,
 'Clarence Kennedy': 2,
 'Claude Chabrol': 1,
 'Claude Flight': 1,
 'Claude Parent': 1,
 'Claudette Schreuders': 1,
 'Claudia Andujar': 2,
 'Claudia Comte': 1,
 'Claudio Cesar': 1,
 'Claudio Perna': 1,
 'Clement Cowles': 1,
 'Cleve Gray': 1,
 'Clifford and Rosemary Ellis': 1,
 'Clinton Adams': 1,
 'Clinton Hill': 2,
 'Clorindo Testa': 1,
 'Clyde Geronimi': 2,
 'Colin Lanceley': 3,
 'Colin Self': 1,
 'Colleen Frances Kenyon': 2,
 'Connie Rasinski': 1,
 'Conrad Felixmüller': 1,
 'Conrad Marca-Relli': 1,
 'Constant (Constant Anton Nieuw

 'Grace A. Snyder': 1,
 'Grace Hartigan': 8,
 'Graciela Carnevale': 1,
 'Graciela Iturbide': 3,
 'Graham Nickson': 2,
 'Graham Sutherland': 2,
 'Grant Mudford': 2,
 'Grapus': 1,
 'Graziella Urbinati': 2,
 'Green': 1,
 'Greg Colson': 3,
 'Greg Goldberg': 1,
 'Gregory Amenoff': 1,
 'Gregory Hoblit': 1,
 'Gregory Masurovsky': 7,
 'Grenville Davey': 1,
 'Greta Von Nessen': 1,
 'Grete Stern': 1,
 'Grisha Bruskin': 3,
 'Gugelot Institute': 1,
 'Guido van der Werve': 1,
 'Guillermo Bermúdez': 1,
 'Guillermo Kuitca': 4,
 'Guillermo Meza': 1,
 'Gundula Schulze-Eldowy': 1,
 'Gunta Stölzl': 13,
 'Gunvor Nelson': 1,
 'Gustav Klimt': 2,
 'Gustav Klutsis': 4,
 'Gustav Metzger': 3,
 'Gustav Schenk': 1,
 'Gustave Doré': 22,
 'Gustave Le Gray': 6,
 'Gustave Singier': 2,
 'Guy Ben-Ner': 1,
 'Guy Bourdin': 1,
 'Guy Harloff': 1,
 'Guy Maddin': 1,
 'Guy Peellaert': 1,
 'Guy Ritchie': 1,
 'Guy Tillim': 1,
 'Guy de Cointet': 2,
 'Gwen John': 1,
 'György Kepes': 3,
 'György Sándor Ligeti': 1,
 'Gérard Duchène

 'John Houck': 1,
 'John Hunter': 1,
 'John Huston': 1,
 'John J. Cu Roi': 1,
 'John Kane': 1,
 'John Langdon': 1,
 'John Levee': 3,
 'John Lurie': 3,
 'John Maeda': 1,
 'John Mansbridge': 1,
 'John Marin': 1,
 'John McGreer': 1,
 'John McKay': 1,
 'John McLaughlin': 3,
 'John Milisenda': 1,
 "John O'Reilly": 1,
 'John Piper': 6,
 'John Ross': 1,
 'John Schott': 5,
 'John Scofield': 1,
 'John Shaw': 1,
 'John Sloan': 20,
 'John Smith': 1,
 'John Storrs': 2,
 'John Szarkowski': 6,
 'John Taylor Arms': 1,
 'John Thomson': 40,
 'John Vachon': 2,
 'John W. Church': 1,
 'John Walker': 7,
 'John Waters': 1,
 'John Wesley': 1,
 'John William Carnell': 1,
 "Jon T. O'Neal": 1,
 'Jon Widman': 1,
 'Jonas J. Fendell': 1,
 'Jonas Wood': 5,
 'Jonathan Borofsky': 1,
 'Jonathan Horowitz': 1,
 'Jonathan Lasker': 1,
 'Jonathan Monk': 1,
 'Jonathas de Andrade': 1,
 'Joon-ho Bong': 1,
 'Joost Schmidt': 1,
 'Jordi Secall Roure': 1,
 'Jorge Castillo': 10,
 'Jorge Macchi': 1,
 'Jorge Pardo': 9,
 'Jorge Ribal

 'Miguel Angel Rojas': 1,
 'Miguel Covarrubias': 1,
 'Miguel Rio Branco': 6,
 'Miguel Rodrigo Mazuré': 1,
 'Miguel Vilá': 1,
 'Mihajlo Arsovski': 4,
 'Mihály Biró': 1,
 'Mike Disfarmer': 1,
 'Mike Kelley': 2,
 'Mike Nichols': 1,
 'Mike Smith': 1,
 'Mikhael Subotzky': 1,
 'Mikhail Larionov': 4,
 'Milan Knížák': 23,
 'Mildred Thompson': 1,
 'Milos Forman': 1,
 'Milton Brooks': 1,
 'Milton Elting Hebald': 4,
 'Milton Glaser': 1,
 'Milton Hirschl': 1,
 'Mimmo Paladino': 5,
 'Ming Smith': 3,
 'Minna Citron': 2,
 'Minnie Evans': 1,
 'Minor White': 6,
 'Mira Schendel': 8,
 'Mircea Cantor': 1,
 'Miriam Cahn': 1,
 'Miriam Schapiro': 1,
 'Mirko Ilić': 5,
 'Miro Svolik': 1,
 'Miroslav Sutej': 2,
 'Miroslaw Balka': 1,
 'Misch Kohn': 3,
 'Mitch Epstein': 2,
 'Mogens Lassen': 1,
 'Moisei Fradkin': 1,
 'Mona Hatoum': 2,
 'Monica Bonvicini': 2,
 'Monica Majoli': 1,
 'Monika Baer': 1,
 'Monika Von Boch': 1,
 'Monte Cazazza': 1,
 'Monteiro Filho': 1,
 'Moon Hoon': 2,
 'Moris (Israel Meza Moreno)': 1,
 '

 'Ruth Bessoudo Courvoisier': 1,
 'Ruth Eckstein': 1,
 'Ruth Reeves': 4,
 'Ruth Root': 1,
 'Ruth Vollmer': 1,
 'Ruvim Mazel': 1,
 'Ryan McGinness': 4,
 'Ryoji Akiyama': 2,
 'Ryoko Aoki': 3,
 'Ryuichi Yamashiro': 2,
 'Rémy Zaugg': 1,
 'S. Craig Zahler': 1,
 'S. L. Levitsky': 1,
 'SEO (Seo Soo-Kyoung)': 2,
 'Saara Hopea': 3,
 'Sabine Weiss': 1,
 'Saburo Murakami': 1,
 'Sadamasa Motonaga': 1,
 'Sadie Benning': 2,
 'Sage Sohier': 2,
 'Sahlan Momo': 2,
 'Saint-Edme': 1,
 'Sally Osborn': 2,
 'Salvador Dalí': 10,
 'Sam Contis': 1,
 'Sam Francis': 5,
 'Sam Gilliam': 1,
 'Sam Kaner': 1,
 'Sam Lucente': 3,
 'Sam Watters': 1,
 'Sameer Makarius': 2,
 'Samuel Fosso': 1,
 'Sandeep Mukherjee': 1,
 'Sandra Cinto': 1,
 'Sandra Vásquez de la Horra': 1,
 'Sandro Chia': 3,
 'Sang-ok Shin': 1,
 'Sanja Iveković': 3,
 'Santiago Cucullu': 4,
 'Sara Facio': 1,
 'Sara Petty': 1,
 'Sarah Charlesworth': 1,
 'Sarah Grilo': 1,
 'Sarah Morris': 2,
 'Sarah Sze': 1,
 'Sari Dienes': 1,
 'Saul Bass': 2,
 'Saul Steinberg

 'Yoriaki Matsudaira': 2,
 'Yoshi Wada': 1,
 'Yoshihiro Kimura': 1,
 'Yoshio Hayakawa': 1,
 'Yoshio Watanabe': 1,
 'Yoshito Takahashi': 1,
 'Yoshitomo Nara': 21,
 'Yoshitoshi Mori': 1,
 'Yoshiyuki Tomino': 1,
 'Yousuf Karsh': 1,
 'Yozo Hamaguchi': 1,
 'Yrjö Kukkapuro': 1,
 'Yto Barrada': 1,
 'Yu Li': 1,
 'Yui Kugimiya': 1,
 'Yuken Teruya': 2,
 'Yuki Kimura': 2,
 'Yukihisa Isobe': 2,
 'Yukinori Yanagi': 3,
 'Yusaku Kamekura': 1,
 'Yusuf Arakkal': 1,
 'Yutaka Matsuzawa': 21,
 'Yves José Zimmerman': 1,
 'Yves Klein': 3,
 'Yves Tanguy': 17,
 'Yvonne Jacquette': 1,
 'Yvonne Rainer': 1,
 'Yılmaz Güney': 1,
 'Zaha Hadid': 1,
 'Zak Smith': 1,
 'Zanele Muholi': 1,
 'Zao Wou-ki': 4,
 'Zarina': 4,
 'Zbigniew Makowski': 1,
 'Zbigniew Rybczynski': 1,
 'Zdenek Rossmann': 6,
 'Zdenĕk Ziegler': 1,
 'Zeke Berman': 4,
 'Zero (Hans Schleger)': 3,
 'Zhang Dali': 1,
 'Zhang Ke': 1,
 'Zhou Xiaohu': 1,
 'Zigmunds Priede': 1,
 'Zinovii Gorbovets': 1,
 'Zoe Leonard': 7,
 'Zofia Rydet': 1,
 'Zoltan Kemeny': 1,


In [34]:
gender_freq = frequency_table(5, moma)

In [35]:
pp.pprint(gender_freq)

{'Female': 2443, 'Gender Unknown/Other': 791, 'Male': 13491}


In [36]:
nationality_freq = frequency_table(2, moma)

In [37]:
pp.pprint(nationality_freq)

{'American': 7444,
 'Angolan': 1,
 'Argentine': 82,
 'Australian': 46,
 'Austrian': 100,
 'Bahamian': 1,
 'Belgian': 190,
 'Brazilian': 102,
 'British': 748,
 'Bulgarian': 2,
 'Cambodian': 1,
 'Cameroonian': 3,
 'Canadian': 113,
 'Canadian Inuit': 2,
 'Chilean': 77,
 'Chinese': 26,
 'Colombian': 107,
 'Costa Rican': 8,
 'Croatian': 27,
 'Cuban': 36,
 'Czech': 115,
 'Danish': 67,
 'Dutch': 203,
 'Ecuadorian': 3,
 'Egyptian': 5,
 'Filipino': 1,
 'Finnish': 32,
 'French': 3042,
 'Georgian': 6,
 'German': 1141,
 'Greek': 7,
 'Guatemalan': 11,
 'Haitian': 4,
 'Hungarian': 24,
 'Icelandic': 2,
 'Indian': 34,
 'Iranian': 4,
 'Irish': 6,
 'Israeli': 52,
 'Italian': 405,
 'Japanese': 299,
 'Kenyan': 1,
 'Korean': 17,
 'Latvian': 5,
 'Lebanese': 1,
 'Luxembourgish': 5,
 'Malian': 2,
 'Mexican': 169,
 'Moroccan': 2,
 'Mozambican': 1,
 'Nationality Unknown': 627,
 'New Zealander': 3,
 'Nigerian': 2,
 'Norwegian': 13,
 'Pakistani': 5,
 'Palestinian': 1,
 'Panamanian': 2,
 'Peruvian': 15,
 'Polish':