In [1]:
import pandas as pd
import numpy as np

In [None]:
# dataframe series of multiple dimensions, ordered collection of columns with dif types of values (num, boolean, string)
"""A dataframe may also be understood as a dict of series, where the keys are the
column names and the values are the series that will form the columns of the dataframe. 
Furthermore, all elements in each series are mapped according to an array of labels, called the index."""



In [2]:
data = {'color' : ['blue','green','yellow','red','white'], 
        'object' : ['ball','pen','pencil','paper','mug'], 
        'price' : [1.2,1.0,0.6,0.9,1.7]}
frame = pd.DataFrame(data)
frame

Unnamed: 0,color,object,price
0,blue,ball,1.2
1,green,pen,1.0
2,yellow,pencil,0.6
3,red,paper,0.9
4,white,mug,1.7


In [4]:
# para seleccionar una parte de las columnas
frame = pd.DataFrame(data, columns = ['object','price'])
frame

Unnamed: 0,object,price
0,ball,1.2
1,pen,1.0
2,pencil,0.6
3,paper,0.9
4,mug,1.7


In [8]:
# can also put index names
frame = pd.DataFrame(data, columns = ['object','price'], index=['one','two','three','four','five'])
frame

Unnamed: 0,object,price
one,ball,1.2
two,pen,1.0
three,pencil,0.6
four,paper,0.9
five,mug,1.7


In [9]:
#create a dataframe with an array
frame3 = pd.DataFrame(np.arange(16).reshape((4,4)), columns=['ball','pen','pencil','paper'], index=['red','blue','yellow','white'])
frame3

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [10]:
# selecting elements 
frame.columns

Index(['object', 'price'], dtype='object')

In [11]:
frame.index

Index(['one', 'two', 'three', 'four', 'five'], dtype='object')

In [12]:
frame.values

array([['ball', 1.2],
       ['pen', 1.0],
       ['pencil', 0.6],
       ['paper', 0.9],
       ['mug', 1.7]], dtype=object)

In [17]:
# contents of a column
frame['price']

one      1.2
two      1.0
three    0.6
four     0.9
five     1.7
Name: price, dtype: float64

In [18]:
frame.price # pasado como una atributo del dataframe

one      1.2
two      1.0
three    0.6
four     0.9
five     1.7
Name: price, dtype: float64

In [27]:
# iloc permite acceder a una row en particular utilzando el número
frame.iloc[2]

object    pencil
price        0.6
Name: three, dtype: object

In [28]:
frame.loc['three'] #con loc pasas el nombre del index del row si es que lo tiene

object    pencil
price        0.6
Name: three, dtype: object

In [29]:
frame.iloc[[2,4]] # entrega los dos valores del índice 2 y 4

Unnamed: 0,object,price
three,pencil,0.6
five,mug,1.7


In [30]:
frame.loc[['three', 'five']]

Unnamed: 0,object,price
three,pencil,0.6
five,mug,1.7


In [32]:
# cortes verticales
frame[0:3]

Unnamed: 0,object,price
one,ball,1.2
two,pen,1.0
three,pencil,0.6


In [34]:
# para acceder a un valor en particular pasar primero nombre de col y luego numero de row
frame['object'][2]

'pencil'

In [37]:
#assign values
frame.columns.name = 'item' # nombre del conjunto de las columnas
frame.index.name = 'id' # nombre del conjunto de los índices
frame

item,object,price
id,Unnamed: 1_level_1,Unnamed: 2_level_1
one,ball,1.2
two,pen,1.0
three,pencil,0.6
four,paper,0.9
five,mug,1.7


In [38]:
frame['new'] = [3.0,1.3,2.2,0.8,1.1] # new column
frame

item,object,price,new
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,ball,1.2,3.0
two,pen,1.0,1.3
three,pencil,0.6,2.2
four,paper,0.9,0.8
five,mug,1.7,1.1


In [39]:
frame.isin([1.0, 'pen'])

item,object,price,new
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,False,False,False
two,True,True,False
three,False,False,False
four,False,False,False
five,False,False,False


In [40]:
frame[frame.isin([1.0, 'pen'])]

item,object,price,new
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,,,
two,pen,1.0,
three,,,
four,,,
five,,,


In [41]:
del frame['new'] # delete new column
frame

item,object,price
id,Unnamed: 1_level_1,Unnamed: 2_level_1
one,ball,1.2
two,pen,1.0
three,pencil,0.6
four,paper,0.9
five,mug,1.7


In [51]:
# filtering 
frame[frame['price'] < 1]


item,object,price
id,Unnamed: 1_level_1,Unnamed: 2_level_1
three,pencil,0.6
four,paper,0.9


In [52]:
#dataframe from nested dict
nestdict = { 'red': { 2012: 22, 2013: 33 },
'white': { 2011: 13, 2012: 22, 2013: 16}, 'blue': {2011: 17, 2012: 27, 2013: 18}} # la segunda key pasa a ser el index
frame2 = pd.DataFrame(nestdict)
frame2

Unnamed: 0,red,white,blue
2012,22.0,22,27
2013,33.0,16,18
2011,,13,17


In [53]:
#transposition of a df
frame2.T

Unnamed: 0,2012,2013,2011
red,22.0,33.0,
white,22.0,16.0,13.0
blue,27.0,18.0,17.0
