![](./image/pandas-logo.png)

# Python Pandas: Tips & Tricks

Oleh Channel YouTube [Indonesia Belajar](https://www.youtube.com/IndonesiaBelajarKomputer)

## \#01: Menyertakan Prefix dan Suffix pada seluruh Kolom Data Frame

### Import Modules

In [None]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

### Persiapan Data Frame

In [None]:
n_rows = 5
n_cols = 5
cols = tuple('ABCDE')

df = pd.DataFrame(np.random.randint(1, 10, size=(n_rows, n_cols)), 
                  columns=cols)
df

In [None]:
tuple('ABCDE')

### Menyertakan Prefix Kolom

In [None]:
df.add_prefix('kolom_')

### Menyertakan Suffix Kolom

In [None]:
df.add_suffix('_field')

## \#02: Pemilihan baris (rows selection) pada Data Frame

### Import Modules

In [None]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

### Persiapan Data Frame

In [None]:
n_rows = 10
n_cols = 5
cols = tuple('ABCDE')

df = pd.DataFrame(np.random.randint(1, 5, size=(n_rows, n_cols)), 
                  columns=cols)
df

### Selection dengan operator logika `|`

In [None]:
df[(df['A'] == 1) | (df['A'] == 3)]

### Selection dengan fungsi `isin()`

In [None]:
df[df['A'].isin([1, 3])]

### Mengenal operator negasi `~`

In [None]:
df[~df['A'].isin([1, 3])]

## \#03 Konversi tipe data String ke Numerik pada kolom Data Frame

### Import Modules

In [None]:
import pandas as pd

print(pd.__version__)

### Persiapan Data Frame

In [None]:
data = {'col1':['1', '2', '3', 'teks'], 
        'col2':['1', '2', '3', '4']}

df = pd.DataFrame(data)
df

In [None]:
df.dtypes

### Konversi tipe data dengan fungsi `astype()`

In [None]:
df_x = df.astype({'col2':'int'})
df_x

In [None]:
df_x.dtypes

### Konversi tipe data numerik dengan fungsi `to_numeric()`

In [None]:
df.apply(pd.to_numeric, errors='coerce')

## \#4 Pemilihan kolom (columns selection) pada Data Frame berdasarkan tipe data

### Import Modules

In [1]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

1.0.3
1.18.1


### Persiapan Data Frame

In [2]:
n_rows = 5
n_cols = 2
cols = ['bil_pecahan', 'bil_bulat']

df = pd.DataFrame(np.random.randint(1, 20, size=(n_rows, n_cols)), 
                  columns=cols)
df['bil_pecahan'] = df['bil_pecahan'].astype('float')

df.index = pd.util.testing.makeDateIndex(n_rows, freq='H')
df = df.reset_index()

df['teks'] = list('ABCDE')

df

  import pandas.util.testing


Unnamed: 0,index,bil_pecahan,bil_bulat,teks
0,2000-01-01 00:00:00,14.0,12,A
1,2000-01-01 01:00:00,16.0,3,B
2,2000-01-01 02:00:00,13.0,13,C
3,2000-01-01 03:00:00,13.0,5,D
4,2000-01-01 04:00:00,14.0,14,E


In [3]:
df.dtypes

index          datetime64[ns]
bil_pecahan           float64
bil_bulat               int64
teks                   object
dtype: object

### Memilih kolom bertipe data numerik

In [4]:
df.select_dtypes(include='number')

Unnamed: 0,bil_pecahan,bil_bulat
0,14.0,12
1,16.0,3
2,13.0,13
3,13.0,5
4,14.0,14


In [5]:
df.select_dtypes(include='float')

Unnamed: 0,bil_pecahan
0,14.0
1,16.0
2,13.0
3,13.0
4,14.0


In [6]:
df.select_dtypes(include='int')

Unnamed: 0,bil_bulat
0,12
1,3
2,13
3,5
4,14


### Memilih kolom bertipe data string atau `object`

In [7]:
df.select_dtypes(include='object')

Unnamed: 0,teks
0,A
1,B
2,C
3,D
4,E


### Memilih kolom bertipe data `datetime`

In [8]:
df.select_dtypes(include='datetime')

Unnamed: 0,index
0,2000-01-01 00:00:00
1,2000-01-01 01:00:00
2,2000-01-01 02:00:00
3,2000-01-01 03:00:00
4,2000-01-01 04:00:00


### Memilih kolom dengan kombinasi tipe data

In [9]:
df.select_dtypes(include=['number', 'object'])

Unnamed: 0,bil_pecahan,bil_bulat,teks
0,14.0,12,A
1,16.0,3,B
2,13.0,13,C
3,13.0,5,D
4,14.0,14,E


## \#5 Membalik urutan baris dan kolom pada Data Frame

### Import Modules

In [None]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

### Persiapan Data Frame

In [None]:
n_rows = 5
n_cols = 5
cols = tuple('ABCDE')

df = pd.DataFrame(np.random.randint(1, 10, size=(n_rows, n_cols)), 
                  columns=cols)
df

### Membalik urutan kolom

In [None]:
df.loc[:, ::-1]

### Membalik urutan baris

In [None]:
df.loc[::-1]

### Membalik urutan baris dan melakukan penyesuaian ulang `index`

In [None]:
df.loc[::-1].reset_index(drop = True)

## \#6 Mengganti nama (label) kolom pada Data Frame

### Import Modules

In [None]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

### Persiapan Data Frame

In [None]:
n_rows = 5
n_cols = 5
cols = tuple('ABCDE')

df = pd.DataFrame(np.random.randint(1, 10, size=(n_rows, n_cols)), 
                  columns=cols)
df

### Mengganti nama (label) kolom pada Data Frame

In [None]:
df.rename(columns={'C':'Hobi'})

In [None]:
df.rename(columns={'A':'Nama', 'B':'Alamat', 'D':'Kota'})

## \# 7 Menghapus (drop) missing values (`NaN`)

### Import Modules

In [None]:
import pandas as pd

print(pd.__version__)

### Persiapan Data Frame

In [None]:
df = pd.util.testing.makeMissingDataframe().reset_index() 
df.head()

In [None]:
df = df.rename(columns={'index':'Z'})
df.head()

In [None]:
df_backup = df.copy(deep=True)

### Menghapus (drop) setiap kolom yang mengandung missing values

In [None]:
df = df.dropna(axis='columns') 
df.head()

### Menghapus (drop) setiap baris yang mengandung missing values

In [None]:
df = df_backup.copy(deep=True)
df = df.dropna(axis='rows')
df.head()

### Persentase missing values untuk tiap kolom

In [None]:
df = df_backup.copy(deep=True)
df.isna().mean()

### Menghapus (drop) setiap kolom yang mengandung missing values berdasarkan threshold

In [None]:
treshold = len(df)*0.9
df = df.dropna(thresh=treshold, axis='columns')
df.head()