# Input capabilities

In [30]:
import pandas as pd

df = pd.read_csv('data/artwork.csv', 
                 nrows = 50, 
                 index_col='id',
                 usecols=['id', 'artist', 'title', 'medium', 'year', 'height', 'width'])
df.head()

Unnamed: 0_level_0,artist,title,medium,year,width,height
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1035,"Blake, Robert",A Figure Bowing before a Seated Old Man with h...,"Watercolour, ink, chalk and graphite on paper....",,394.0,419.0
1036,"Blake, Robert","Two Drawings of Frightened Figures, Probably f...",Graphite on paper,,311.0,213.0
1037,"Blake, Robert",The Preaching of Warning. Verso: An Old Man En...,Graphite on paper. Verso: graphite on paper,1785.0,343.0,467.0
1038,"Blake, Robert",Six Drawings of Figures with Outstretched Arms,Graphite on paper,,318.0,394.0
1039,"Blake, William",The Circle of the Lustful: Francesca da Rimini...,Line engraving on paper,1826.0,243.0,335.0


In [16]:
# save for later
df.to_pickle('data/artwork.pickle')

# Indexing and filtering

In [17]:
df['artist']

id
1035    Blake, Robert
1036    Blake, Robert
Name: artist, dtype: object

In [18]:
pd.unique(df['artist'])

array(['Blake, Robert'], dtype=object)

In [21]:
s = df['artist'] == 'Blake, William'
s.value_counts()

True     45
False     5
Name: artist, dtype: int64

In [25]:
# selecting by position
df.loc[1035, 'artist'] # 1035 - id column!

'Blake, Robert'

In [26]:
df.iloc[0, 0]

'Blake, Robert'

In [27]:
df.iloc[0, :]

artist                                        Blake, Robert
title     A Figure Bowing before a Seated Old Man with h...
medium    Watercolour, ink, chalk and graphite on paper....
year                                                    NaN
Name: 1035, dtype: object

In [28]:
df.iloc[0:2, 0:2]

Unnamed: 0_level_0,artist,title
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1035,"Blake, Robert",A Figure Bowing before a Seated Old Man with h...
1036,"Blake, Robert","Two Drawings of Frightened Figures, Probably f..."


In [31]:
# manipulation
df['height'] * df['width']

id
1035    165086.0
1036     66243.0
1037    160181.0
1038    125292.0
1039     81405.0
1040     81120.0
1041     80828.0
1042     83640.0
1043     80735.0
1044     82620.0
1045     80240.0
1046     27600.0
1047     29747.0
1048     30141.0
1049     30400.0
1050     30096.0
1051     30294.0
1052     30294.0
1053     29850.0
1054     30096.0
1055     30096.0
1056     29944.0
1057     30200.0
1058     29898.0
1059     28650.0
1060     30200.0
1061     27900.0
1062     30200.0
1063     29700.0
1064     29550.0
1065     30049.0
1066     29204.0
1067    189832.0
1068     15272.0
1069     10864.0
1070     10864.0
1071      9085.0
1072      7680.0
1073      7680.0
1074    130950.0
1075     81291.0
1076     24766.0
1077    214625.0
1078    120600.0
1079    147030.0
1080    119232.0
1081    153906.0
1082    187960.0
1083    213350.0
1183     33598.0
dtype: float64

In [33]:
df['width'].sort_values().head()

id
1068     92.0
1070    112.0
1069    112.0
1071    115.0
1073    120.0
Name: width, dtype: float64

In [35]:
# convert
pd.to_numeric(df['width']).head(3)

id
1035    394.0
1036    311.0
1037    343.0
Name: width, dtype: float64

# Operations on groups

In [38]:
grouped = df.groupby('artist')
for name, group_df in grouped:
    print(name)
    print(group_df)
    break

Blake, Robert
             artist                                              title  \
id                                                                       
1035  Blake, Robert  A Figure Bowing before a Seated Old Man with h...   
1036  Blake, Robert  Two Drawings of Frightened Figures, Probably f...   
1038  Blake, Robert     Six Drawings of Figures with Outstretched Arms   

                                                 medium    year  width  height  
id                                                                              
1035  Watercolour, ink, chalk and graphite on paper....     NaN  394.0   419.0  
1036                                  Graphite on paper     NaN  311.0   213.0  
1037        Graphite on paper. Verso: graphite on paper  1785.0  343.0   467.0  
1038                                  Graphite on paper     NaN  318.0   394.0  
