# Normalizing Data


In [7]:
import pandas as pd
data = pd.read_csv('data/artwork_sample.csv')

In [8]:
data.dtypes


id                      int64
accession_number       object
artist                 object
artistRole             object
artistId                int64
title                  object
dateText               object
medium                 object
creditLine             object
year                  float64
acquisitionYear         int64
dimensions             object
width                   int64
height                  int64
depth                 float64
units                  object
inscription           float64
thumbnailCopyright    float64
thumbnailUrl           object
url                    object
dtype: object

In [9]:
## normalizing should be done only on numeric values e.g. year, and height, weight, depth etc.
data.height.mean() # Works


351.5

In [10]:
# data.artist.mean() # does not work.

In [11]:
data.height


0    419
1    213
2    467
3    394
4    335
5    338
6    334
7    340
8    335
9    340
Name: height, dtype: int64

In [12]:
data.height.min()


213

In [13]:
data.height.max()


467

In [14]:
data.height.mean()


351.5

In [15]:
data.height.std()


66.81857692455162

In [16]:
data.height.var() # Variance


4464.722222222223

In [17]:
height = data.height

In [18]:
# standardization in statistics
norm = (height - height.mean()) / height.std()


In [19]:
norm


0    1.010198
1   -2.072777
2    1.728561
3    0.636051
4   -0.246937
5   -0.202040
6   -0.261903
7   -0.172108
8   -0.246937
9   -0.172108
Name: height, dtype: float64

In [20]:
# normalizing all values between 0 and 1
minmax = (height - height.min()) / (height.max() - height.min())
minmax


0    0.811024
1    0.000000
2    1.000000
3    0.712598
4    0.480315
5    0.492126
6    0.476378
7    0.500000
8    0.480315
9    0.500000
Name: height, dtype: float64

In [21]:
minmax.min()


0.0

In [22]:
minmax.max()


1.0

In [23]:
data


Unnamed: 0,id,accession_number,artist,artistRole,artistId,title,dateText,medium,creditLine,year,acquisitionYear,dimensions,width,height,depth,units,inscription,thumbnailCopyright,thumbnailUrl,url
0,1035,A00001,"Blake, Robert",artist,38,A Figure Bowing before a Seated Old Man with h...,date not known,"Watercolour, ink, chalk and graphite on paper....",Presented by Mrs John Richmond 1922,,1922,support: 394 x 419 mm,394,419,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-a-fi...
1,1036,A00002,"Blake, Robert",artist,38,"Two Drawings of Frightened Figures, Probably f...",date not known,Graphite on paper,Presented by Mrs John Richmond 1922,,1922,support: 311 x 213 mm,311,213,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-two-...
2,1037,A00003,"Blake, Robert",artist,38,The Preaching of Warning. Verso: An Old Man En...,?c.1785,Graphite on paper. Verso: graphite on paper,Presented by Mrs John Richmond 1922,1785.0,1922,support: 343 x 467 mm,343,467,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-the-...
3,1038,A00004,"Blake, Robert",artist,38,Six Drawings of Figures with Outstretched Arms,date not known,Graphite on paper,Presented by Mrs John Richmond 1922,,1922,support: 318 x 394 mm,318,394,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-six-...
4,1039,A00005,"Blake, William",artist,39,The Circle of the Lustful: Francesca da Rimini...,"1826–7, reprinted 1892",Line engraving on paper,Purchased with the assistance of a special gra...,1826.0,1919,image: 243 x 335 mm,243,335,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-the-...
5,1040,A00006,"Blake, William",artist,39,Ciampolo the Barrator Tormented by the Devils,"1826–7, reprinted 1892",Line engraving on paper,Purchased with the assistance of a special gra...,1826.0,1919,image: 240 x 338 mm,240,338,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-ciam...
6,1041,A00007,"Blake, William",artist,39,The Baffled Devils Fighting,"1826–7, reprinted 1892",Line engraving on paper,Purchased with the assistance of a special gra...,1826.0,1919,image: 242 x 334 mm,242,334,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-the-...
7,1042,A00008,"Blake, William",artist,39,The Six-Footed Serpent Attacking Agnolo Brunel...,"1826–7, reprinted 1892",Line engraving on paper,Purchased with the assistance of a special gra...,1826.0,1919,image: 246 x 340 mm,246,340,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-the-...
8,1043,A00009,"Blake, William",artist,39,The Serpent Attacking Buoso Donati,"1826–7, reprinted 1892",Line engraving on paper,Purchased with the assistance of a special gra...,1826.0,1919,image: 241 x 335 mm,241,335,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-the-...
9,1044,A00010,"Blake, William",artist,39,The Pit of Disease: The Falsifiers,"1826–7, reprinted 1892",Line engraving on paper,Purchased with the assistance of a special gra...,1826.0,1919,image: 243 x 340 mm,243,340,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-the-...


In [24]:
data['minmax_standarized_height'] = minmax
data['standarized_height'] = norm


In [25]:
data


Unnamed: 0,id,accession_number,artist,artistRole,artistId,title,dateText,medium,creditLine,year,...,width,height,depth,units,inscription,thumbnailCopyright,thumbnailUrl,url,minmax_standarized_height,standarized_height
0,1035,A00001,"Blake, Robert",artist,38,A Figure Bowing before a Seated Old Man with h...,date not known,"Watercolour, ink, chalk and graphite on paper....",Presented by Mrs John Richmond 1922,,...,394,419,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-a-fi...,0.811024,1.010198
1,1036,A00002,"Blake, Robert",artist,38,"Two Drawings of Frightened Figures, Probably f...",date not known,Graphite on paper,Presented by Mrs John Richmond 1922,,...,311,213,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-two-...,0.0,-2.072777
2,1037,A00003,"Blake, Robert",artist,38,The Preaching of Warning. Verso: An Old Man En...,?c.1785,Graphite on paper. Verso: graphite on paper,Presented by Mrs John Richmond 1922,1785.0,...,343,467,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-the-...,1.0,1.728561
3,1038,A00004,"Blake, Robert",artist,38,Six Drawings of Figures with Outstretched Arms,date not known,Graphite on paper,Presented by Mrs John Richmond 1922,,...,318,394,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-six-...,0.712598,0.636051
4,1039,A00005,"Blake, William",artist,39,The Circle of the Lustful: Francesca da Rimini...,"1826–7, reprinted 1892",Line engraving on paper,Purchased with the assistance of a special gra...,1826.0,...,243,335,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-the-...,0.480315,-0.246937
5,1040,A00006,"Blake, William",artist,39,Ciampolo the Barrator Tormented by the Devils,"1826–7, reprinted 1892",Line engraving on paper,Purchased with the assistance of a special gra...,1826.0,...,240,338,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-ciam...,0.492126,-0.20204
6,1041,A00007,"Blake, William",artist,39,The Baffled Devils Fighting,"1826–7, reprinted 1892",Line engraving on paper,Purchased with the assistance of a special gra...,1826.0,...,242,334,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-the-...,0.476378,-0.261903
7,1042,A00008,"Blake, William",artist,39,The Six-Footed Serpent Attacking Agnolo Brunel...,"1826–7, reprinted 1892",Line engraving on paper,Purchased with the assistance of a special gra...,1826.0,...,246,340,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-the-...,0.5,-0.172108
8,1043,A00009,"Blake, William",artist,39,The Serpent Attacking Buoso Donati,"1826–7, reprinted 1892",Line engraving on paper,Purchased with the assistance of a special gra...,1826.0,...,241,335,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-the-...,0.480315,-0.246937
9,1044,A00010,"Blake, William",artist,39,The Pit of Disease: The Falsifiers,"1826–7, reprinted 1892",Line engraving on paper,Purchased with the assistance of a special gra...,1826.0,...,243,340,,mm,,,http://www.tate.org.uk/art/images/work/A/A00/A...,http://www.tate.org.uk/art/artworks/blake-the-...,0.5,-0.172108
