### DataFrame definition: It's a two-dimensional object that contains data in tabular form. It's possible to add new rows and columns

### Importing pandas and numpy to use DataFrame

In [0]:
# Importing pandas, numpy and creating a alias
import pandas as pd
import numpy as np

### Creating DataFrames

In [0]:
# Using DataFrame() function to create a DataFrame (empty)
df1 = pd.DataFrame()
print(df1)
type(df1)

Empty DataFrame
Columns: []
Index: []
Out[34]: pandas.core.frame.DataFrame

In [0]:
# Creating a dictionary to create a non-empty DataFrame
dict1 = {'id': [1, 2, 3, 4, 5],
         'name': ['Matheus', 'Rodrigo', 'Rafael', 'Gabriela', 'Giovana']}
print(dict1)

df2 = pd.DataFrame(data = dict1)
df2

{'id': [1, 2, 3, 4, 5], 'name': ['Matheus', 'Rodrigo', 'Rafael', 'Gabriela', 'Giovana']}


Unnamed: 0,id,name
0,1,Matheus
1,2,Rodrigo
2,3,Rafael
3,4,Gabriela
4,5,Giovana


In [0]:
# Creating a DataFrame with custom index
df3 = pd.DataFrame(data = dict1, index = [29, 1, 0, 205, 88])
df3

Unnamed: 0,id,name
29,1,Matheus
1,2,Rodrigo
0,3,Rafael
205,4,Gabriela
88,5,Giovana


In [0]:
# Creating a Series with pandas Series() function
series1 = pd.Series([1, 2, 3])
print(series1)
type(series1)

series2 = pd.Series(['a', 'b', 'c'])
print(series2)

0    1
1    2
2    3
dtype: int64
0    a
1    b
2    c
dtype: object


In [0]:
# Creating a DataFrame with Series
df4 = pd.DataFrame({'collmun1': series1,
                    'collmun2': series2,})
df4

Unnamed: 0,collmun1,collmun2
0,1,a
1,2,b
2,3,c


In [0]:
# Creating an array with numpy array() function
array1 = np.array([[1, 2, 3],
                   ['Curitiba', 'São Paulo', 'Rio de Janeiro'],
                   ['PR', 'SP', 'RJ']])
print(array1)
type(array1)

[['1' '2' '3']
 ['Curitiba' 'São Paulo' 'Rio de Janeiro']
 ['PR' 'SP' 'RJ']]
Out[41]: numpy.ndarray

In [0]:
# Creating a DataFrame with numpy array, using transpose() function to transpose the array
df5 = pd.DataFrame(data = array1.transpose(),
                   index = ['row1', 'row2', 'row3'],
                   columns = ['id', 'city', 'state'])
df5

Unnamed: 0,id,city,state
row1,1,Curitiba,PR
row2,2,São Paulo,SP
row3,3,Rio de Janeiro,RJ


In [0]:
# Creating a matrix with numpy matrix() function
matrix1 = np.matrix([[1, 2, 3],
                   ['Curitiba', 'São Paulo', 'Rio de Janeiro'],
                   ['PR', 'SP', 'RJ']])
print(matrix1)
type(matrix1)

[['1' '2' '3']
 ['Curitiba' 'São Paulo' 'Rio de Janeiro']
 ['PR' 'SP' 'RJ']]
Out[47]: numpy.matrix

In [0]:
# Creating a DataFrame with numpy matrix, using transpose() function to transpose the matrix
df6 = pd.DataFrame(data = matrix1.transpose(),
                   index = ['row1', 'row2', 'row3'],
                   columns = ['id', 'city', 'state'])
df6

Unnamed: 0,id,city,state
row1,1,Curitiba,PR
row2,2,São Paulo,SP
row3,3,Rio de Janeiro,RJ


### Selecting DataFrames

In [0]:
# Ploting DataFrame3 for example
df3

Unnamed: 0,id,name
29,1,Matheus
1,2,Rodrigo
0,3,Rafael
205,4,Gabriela
88,5,Giovana


In [0]:
# Adding a new column to DataFrame3
df3['id_plus_5'] = df3['id'] + 5
df3

Unnamed: 0,id,name,id_plus_5
29,1,Matheus,6
1,2,Rodrigo,7
0,3,Rafael,8
205,4,Gabriela,9
88,5,Giovana,10


In [0]:
# Adding "index" column to DataFrame3
df3['index'] = 0
df3

Unnamed: 0,id,name,id_plus_5,index
29,1,Matheus,6,0
1,2,Rodrigo,7,0
0,3,Rafael,8,0
205,4,Gabriela,9,0
88,5,Giovana,10,0


In [0]:
# Updating index values of DataFrame3
df3.index = [29, 0, 1, 2222, 68]
df3

Unnamed: 0,id,name,id_plus_5,index
29,1,Matheus,6,0
0,2,Rodrigo,7,0
1,3,Rafael,8,0
2222,4,Gabriela,9,0
68,5,Giovana,10,0


In [0]:
# Printing the column "name" of DataFrame3
print(df3 ['name'])

29       Matheus
0        Rodrigo
1         Rafael
2222    Gabriela
68       Giovana
Name: name, dtype: object


In [0]:
# Simple way to print the column "name" of DataFrame3 (Be careful with reserved words)
df3.name

Out[60]: 29       Matheus
0        Rodrigo
1         Rafael
2222    Gabriela
68       Giovana
Name: name, dtype: object

In [0]:
# Selecting the columns "name" and "index" of DataFrame3
df3[['name', 'index']]

Unnamed: 0,name,index
29,Matheus,0
0,Rodrigo,0
1,Rafael,0
2222,Gabriela,0
68,Giovana,0


In [0]:
# Selecting even values of column "id" of DataFrame3
df3[df3.id % 2 == 0]

Unnamed: 0,id,name,id_plus_5,index
0,2,Rodrigo,7,0
2222,4,Gabriela,9,0


In [0]:
# Selecting values of column "name" of DataFrame3 that contains 'R'
df3[df3.name.str.contains('R')]

Unnamed: 0,id,name,id_plus_5,index
0,2,Rodrigo,7,0
1,3,Rafael,8,0


In [0]:
# Selecting values of column "name" of DataFrame3 that contains 'R' and id = 3
df3[(df3.name.str.contains('R')) & (df3.id == 3)]

Unnamed: 0,id,name,id_plus_5,index
1,3,Rafael,8,0


In [0]:
# Selecting values of column "name" of DataFrame3 that contains 'R' or id = 5
df3[(df3.name.str.contains('R')) | (df3.id == 5)]

Unnamed: 0,id,name,id_plus_5,index
0,2,Rodrigo,7,0
1,3,Rafael,8,0
68,5,Giovana,10,0


In [0]:
# Using loc() function to select the row with index = 0
df3.loc[0]

Out[70]: id                 2
name         Rodrigo
id_plus_5          7
index              0
Name: 0, dtype: object

In [0]:
#Using iloc() function to select the row by its position
df3.iloc[0:1]

Unnamed: 0,id,name,id_plus_5,index
29,1,Matheus,6,0


In [0]:
# Using loc() function to select the rows with index = 0 and 1
df3.loc[0:1]

Unnamed: 0,id,name,id_plus_5,index
0,2,Rodrigo,7,0
1,3,Rafael,8,0


In [0]:
# Using iloc() function to select the rows by their position
df3.iloc[0:2]

Unnamed: 0,id,name,id_plus_5,index
29,1,Matheus,6,0
0,2,Rodrigo,7,0


In [0]:
# Using loc() function to select the rows with index between (contains) 68 and 1, in the reverse order (-1)
df3.loc[68:1:-1]

Unnamed: 0,id,name,id_plus_5,index
68,5,Giovana,10,0
2222,4,Gabriela,9,0
1,3,Rafael,8,0


In [0]:
# Using iloc() function to select the rows with index between (contains) 68 and 1, in the reverse order and going 2 by 2 (-2)
df3.loc[68:1:-2]

Unnamed: 0,id,name,id_plus_5,index
68,5,Giovana,10,0
1,3,Rafael,8,0


In [0]:
# Selecting a column using loc() function
df3.loc[:, 'name']

Out[80]: 29       Matheus
0        Rodrigo
1         Rafael
2222    Gabriela
68       Giovana
Name: name, dtype: object

In [0]:
# Selecting a column using iloc() function
df3.iloc[:, 1]

Out[81]: 29       Matheus
0        Rodrigo
1         Rafael
2222    Gabriela
68       Giovana
Name: name, dtype: object

In [0]:
# Selecting a value by column name and index value
df3.loc[2222, 'name']

Out[82]: 'Gabriela'

In [0]:
# Selecting a value by column and index position
df3.iloc[3, 1]

Out[87]: 'Gabriela'

In [0]:
# Updating a value by column name and index value
df3.loc[2222, 'name'] = "Marcela"
df3

Unnamed: 0,id,name,id_plus_5,index
29,1,Matheus,6,0
0,2,Rodrigo,7,0
1,3,Rafael,8,0
2222,4,Marcela,9,0
68,5,Giovana,10,0


In [0]:
# Updating a value by column and index position
df3.iloc[3, 1] = "Gabriela"
df3

Unnamed: 0,id,name,id_plus_5,index
29,1,Matheus,6,0
0,2,Rodrigo,7,0
1,3,Rafael,8,0
2222,4,Gabriela,9,0
68,5,Giovana,10,0


In [0]:
# Selecting the last row of the DataFrame3
df3.iloc[-1]

Out[91]: id                 5
name         Giovana
id_plus_5         10
index              0
Name: 68, dtype: object

In [0]:
# Selecting a specific value of DataFrame3 by its position
df3.iloc[-1, -2]

Out[95]: 10

In [0]:
# Selecting 2 rows with loc() function
df3.loc[[2222, 0]]

Unnamed: 0,id,name,id_plus_5,index
2222,4,Gabriela,9,0
0,2,Rodrigo,7,0


In [0]:
# Selecting 2 rows with iloc() function
df3.iloc[[1, 3]]

Unnamed: 0,id,name,id_plus_5,index
0,2,Rodrigo,7,0
2222,4,Gabriela,9,0
