![](./image/pandas-logo.png)

# Python Pandas: Tips & Tricks

Oleh Channel YouTube [Indonesia Belajar](https://www.youtube.com/IndonesiaBelajarKomputer)

## \#01: Menyertakan Prefix dan Suffix pada seluruh Kolom Data Frame

### Import Modules

In [1]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

1.0.0
1.18.1


### Persiapan Data Frame

In [2]:
n_rows = 5
n_cols = 5
cols = tuple('ABCDE')

df = pd.DataFrame(np.random.randint(1, 10, size=(n_rows, n_cols)), 
                  columns=cols)
df

Unnamed: 0,A,B,C,D,E
0,2,1,9,3,6
1,1,7,7,9,7
2,7,7,5,4,6
3,9,3,3,2,9
4,1,1,4,9,3


### Menyertakan Prefix Kolom

In [3]:
df.add_prefix('kolom_')

Unnamed: 0,kolom_A,kolom_B,kolom_C,kolom_D,kolom_E
0,2,1,9,3,6
1,1,7,7,9,7
2,7,7,5,4,6
3,9,3,3,2,9
4,1,1,4,9,3


### Menyertakan Suffix Kolom

In [4]:
df.add_suffix('_field')

Unnamed: 0,A_field,B_field,C_field,D_field,E_field
0,2,1,9,3,6
1,1,7,7,9,7
2,7,7,5,4,6
3,9,3,3,2,9
4,1,1,4,9,3


## \#02: Filtering atau penyaringan baris (rows) pada Data Frame

### Import Modules

In [5]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

1.0.0
1.18.1


### Persiapan Data Frame

In [6]:
n_rows = 10
n_cols = 5
cols = tuple('ABCDE')

df = pd.DataFrame(np.random.randint(1, 5, size=(n_rows, n_cols)), 
                  columns=cols)
df

Unnamed: 0,A,B,C,D,E
0,3,4,2,1,4
1,4,1,3,4,4
2,1,1,4,4,2
3,2,3,3,2,1
4,4,1,4,4,1
5,3,4,4,3,1
6,1,3,3,2,1
7,3,4,1,4,3
8,1,2,3,1,1
9,1,2,3,1,2


### Filtering dengan operator logika `|` dan  `&`

In [7]:
df[(df['A'] == 1) | (df['A'] == 3)]

Unnamed: 0,A,B,C,D,E
0,3,4,2,1,4
2,1,1,4,4,2
5,3,4,4,3,1
6,1,3,3,2,1
7,3,4,1,4,3
8,1,2,3,1,1
9,1,2,3,1,2


### Filtering dengan fungsi `isin()`

In [8]:
df[df['A'].isin([1, 3])]

Unnamed: 0,A,B,C,D,E
0,3,4,2,1,4
2,1,1,4,4,2
5,3,4,4,3,1
6,1,3,3,2,1
7,3,4,1,4,3
8,1,2,3,1,1
9,1,2,3,1,2


### Mengenal operator negasi `~`

In [9]:
df[~df['A'].isin([1, 3])]

Unnamed: 0,A,B,C,D,E
1,4,1,3,4,4
3,2,3,3,2,1
4,4,1,4,4,1


## \#03 Konversi tipe data String ke Numerik pada kolom Data Frame

### Import Modules

In [16]:
import pandas as pd

print(pd.__version__)

1.0.0


### Persiapan Data Frame

In [35]:
data = {'col1':['1', '2', '3', 'teks'], 
        'col2':['1', '2', '3', '4']}
df = pd.DataFrame(data)
df

Unnamed: 0,col1,col2
0,1,1
1,2,2
2,3,3
3,teks,4


In [12]:
df.dtypes

col1    object
col2    object
dtype: object

### Konversi tipe data dengan fungsi `astype()`

In [30]:
df_x = df.astype({'col2':'int'})
df_x

Unnamed: 0,col1,col2
0,1,1
1,2,2
2,3,3
3,teks,4


In [31]:
df_x.dtypes

col1    object
col2     int64
dtype: object

### Konversi tipe data numerik dengan fungsi `to_numeric()`

In [36]:
df.apply(pd.to_numeric, errors='coerce')

Unnamed: 0,col1,col2
0,1,1
1,2,2
2,3,3
3,teks,4


## \#4 Pemilihan/seleksi kolom Data Frame berdasarkan tipe data

### Import Modules

In [39]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

1.0.0
1.18.1


### Persiapan Data Frame

In [74]:
n_rows = 5
n_cols = 2
cols = ['bil_pecahan', 'bil_bulat']

df = pd.DataFrame(np.random.randint(1, 20, size = (n_rows, n_cols)), 
                  columns=cols)

df.index = pd.util.testing.makeDateIndex(n_rows, freq='H')
df = df.reset_index()
df['teks'] = list('ABCDE')
df['bil_pecahan'] = df['bil_pecahan'].astype('float')
df

Unnamed: 0,index,bil_pecahan,bil_bulat,teks
0,2000-01-01 00:00:00,4.0,16,A
1,2000-01-01 01:00:00,18.0,2,B
2,2000-01-01 02:00:00,7.0,3,C
3,2000-01-01 03:00:00,14.0,3,D
4,2000-01-01 04:00:00,16.0,1,E


In [72]:
df.dtypes

index          datetime64[ns]
bil_pecahan           float64
bil_bulat               int64
string_col             object
dtype: object

### Memilih kolom bertipe data numerik

In [78]:
df.select_dtypes(include='number')

Unnamed: 0,bil_pecahan,bil_bulat
0,4.0,16
1,18.0,2
2,7.0,3
3,14.0,3
4,16.0,1


In [79]:
df.select_dtypes(include='float')

Unnamed: 0,bil_pecahan
0,4.0
1,18.0
2,7.0
3,14.0
4,16.0


In [80]:
df.select_dtypes(include='int')

Unnamed: 0,bil_bulat
0,16
1,2
2,3
3,3
4,1


### Memilih kolom bertipe data string atau `object`

In [81]:
df.select_dtypes(include = 'object')

Select string columns


Unnamed: 0,teks
0,A
1,B
2,C
3,D
4,E


### Memilih kolom bertipe data `datetime`

In [84]:
df.select_dtypes(include='datetime')

Unnamed: 0,index
0,2000-01-01 00:00:00
1,2000-01-01 01:00:00
2,2000-01-01 02:00:00
3,2000-01-01 03:00:00
4,2000-01-01 04:00:00


### Memilih kolom dengan kombinasi tipe data

In [86]:
df.select_dtypes(include = ['number', 'object'])

Unnamed: 0,bil_pecahan,bil_bulat,teks
0,4.0,16,A
1,18.0,2,B
2,7.0,3,C
3,14.0,3,D
4,16.0,1,E


## \#5 Membalik urutan baris dan kolom pada Data Frame

### Import Modules

In [88]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

1.0.0
1.18.1


### Persiapan Data Frame

In [89]:
n_rows = 5
n_cols = 5
cols = tuple('ABCDE')

df = pd.DataFrame(np.random.randint(1, 10, size=(n_rows, n_cols)), 
                  columns=cols)
df

Unnamed: 0,A,B,C,D,E
0,2,8,7,8,7
1,4,7,9,4,1
2,4,8,9,9,1
3,8,8,2,4,5
4,3,3,7,6,3


### Membalik urutan kolom

In [91]:
df.loc[:, ::-1]

Unnamed: 0,E,D,C,B,A
0,7,8,7,8,2
1,1,4,9,7,4
2,1,9,9,8,4
3,5,4,2,8,8
4,3,6,7,3,3


### Membalik urutan baris

In [95]:
df.loc[::-1]

Unnamed: 0,A,B,C,D,E
4,3,3,7,6,3
3,8,8,2,4,5
2,4,8,9,9,1
1,4,7,9,4,1
0,2,8,7,8,7


### Membalik urutan baris dan melakukan penyesuaian ulang `index`

In [97]:
df.loc[::-1].reset_index(drop = True)

Unnamed: 0,A,B,C,D,E
0,3,3,7,6,3
1,8,8,2,4,5
2,4,8,9,9,1
3,4,7,9,4,1
4,2,8,7,8,7
