### Carregando o conjunto de dados

In [7]:
import pandas as pd
from sklearn.datasets import load_iris

data = load_iris()

iris_df = pd.DataFrame(data.data, columns=data.feature_names)
iris_df["encoded_target"] = data.target

In [8]:
iris_df.columns = [
    c.replace("(cm)", "").rstrip().replace(" ", "_") for c in iris_df.columns
]

In [9]:
iris_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,encoded_target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [10]:
iris_df.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'encoded_target'],
      dtype='object')

### Explorando o conjunto de dados

In [11]:
iris_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   sepal_length    150 non-null    float64
 1   sepal_width     150 non-null    float64
 2   petal_length    150 non-null    float64
 3   petal_width     150 non-null    float64
 4   encoded_target  150 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 6.0 KB


In [12]:
iris_df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,encoded_target
count,150.0,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333,1.0
std,0.828066,0.435866,1.765298,0.762238,0.819232
min,4.3,2.0,1.0,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.35,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.9,4.4,6.9,2.5,2.0


### Projeção, Deleção e Junção

### Projeção

In [13]:
iris_df.loc[0]

sepal_length      5.1
sepal_width       3.5
petal_length      1.4
petal_width       0.2
encoded_target    0.0
Name: 0, dtype: float64

In [14]:
iris_df.describe().loc['mean']

sepal_length      5.843333
sepal_width       3.057333
petal_length      3.758000
petal_width       1.199333
encoded_target    1.000000
Name: mean, dtype: float64

In [15]:
iris_df.iloc[1]

sepal_length      4.9
sepal_width       3.0
petal_length      1.4
petal_width       0.2
encoded_target    0.0
Name: 1, dtype: float64

In [16]:
iris_df.iloc[1,2]

1.4

### Deleção

In [17]:
iris_df.drop([0])

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,encoded_target
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
5,5.4,3.9,1.7,0.4,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [18]:
iris_df.drop(iris_df.query("sepal_length >= 4.5").index)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,encoded_target
8,4.4,2.9,1.4,0.2,0
13,4.3,3.0,1.1,0.1,0
38,4.4,3.0,1.3,0.2,0
42,4.4,3.2,1.3,0.2,0


In [19]:
iris_df.drop(iris_df.query("sepal_length >= 4.5").index).reset_index()

Unnamed: 0,index,sepal_length,sepal_width,petal_length,petal_width,encoded_target
0,8,4.4,2.9,1.4,0.2,0
1,13,4.3,3.0,1.1,0.1,0
2,38,4.4,3.0,1.3,0.2,0
3,42,4.4,3.2,1.3,0.2,0


### Junção

In [20]:
s1 = pd.Series(["a", "b"])
s2 = pd.Series(["c", "d"])

In [21]:
pd.concat([s1, s2], axis=1)

Unnamed: 0,0,1
0,a,c
1,b,d


In [22]:
pd.concat([s1, s2], axis=0)

0    a
1    b
0    c
1    d
dtype: object

### Com merge

In [23]:
df1 = pd.DataFrame({"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]})
df2 = pd.DataFrame({"rkey": ["foo", "bar", "baz", "foo"], "value": [5, 6, 7, 8]})

print(df1.merge(df2, left_on="lkey", right_on="rkey", suffixes=("_left", "_right")))

  lkey  value_left rkey  value_right
0  foo           1  foo            5
1  foo           1  foo            8
2  foo           5  foo            5
3  foo           5  foo            8
4  bar           2  bar            6
5  baz           3  baz            7


### Com join

In [24]:
df = pd.DataFrame(
    {
        "key": ["K0", "K1", "K2", "K3", "K4", "K5"],
        "A": ["A0", "A1", "A2", "A3", "A4", "A5"],
    }
)

other = pd.DataFrame({"key": ["K0", "K1", "K2"], "B": ["B0", "B1", "B2"]})

df.join(other, lsuffix="_caller", rsuffix="_other")

Unnamed: 0,key_caller,A,key_other,B
0,K0,A0,K0,B0
1,K1,A1,K1,B1
2,K2,A2,K2,B2
3,K3,A3,,
4,K4,A4,,
5,K5,A5,,


In [25]:
df.join(other, lsuffix="_caller", rsuffix="_other", how="right")

Unnamed: 0,key_caller,A,key_other,B
0,K0,A0,K0,B0
1,K1,A1,K1,B1
2,K2,A2,K2,B2


### Agregações

In [26]:
iris_df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,encoded_target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [27]:
iris_df.groupby(by=['encoded_target']).count()

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
encoded_target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,50,50,50,50
1,50,50,50,50
2,50,50,50,50


In [28]:
iris_df.groupby(by=['encoded_target']).mean()

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
encoded_target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,5.006,3.428,1.462,0.246
1,5.936,2.77,4.26,1.326
2,6.588,2.974,5.552,2.026


In [29]:
iris_df.groupby(by=['encoded_target']).count().reset_index()

Unnamed: 0,encoded_target,sepal_length,sepal_width,petal_length,petal_width
0,0,50,50,50,50
1,1,50,50,50,50
2,2,50,50,50,50


### Função Apply

In [30]:
iris_df * 2

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,encoded_target
0,10.2,7.0,2.8,0.4,0
1,9.8,6.0,2.8,0.4,0
2,9.4,6.4,2.6,0.4,0
3,9.2,6.2,3.0,0.4,0
4,10.0,7.2,2.8,0.4,0
...,...,...,...,...,...
145,13.4,6.0,10.4,4.6,4
146,12.6,5.0,10.0,3.8,4
147,13.0,6.0,10.4,4.0,4
148,12.4,6.8,10.8,4.6,4


In [31]:
target_names = data.target_names

In [32]:
def mapear_especie(x):
    return target_names[x]

In [33]:
iris_df["especie"] = iris_df["encoded_target"].apply(lambda x: mapear_especie(x))

In [34]:
iris_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,encoded_target,especie
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2,virginica
146,6.3,2.5,5.0,1.9,2,virginica
147,6.5,3.0,5.2,2.0,2,virginica
148,6.2,3.4,5.4,2.3,2,virginica
