#13: Konversi nilai numerik ke dalam sejumlah kategori

In [1]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

2.3.3
2.3.4


Persiapan Data Frame

In [2]:
n_rows = 10
n_cols = 1
cols = ('usia',)

df = pd.DataFrame(np.random.randint(1, 99, size=(n_rows, n_cols)), 
                  columns=cols)
df

Unnamed: 0,usia
0,23
1,64
2,87
3,49
4,17
5,98
6,68
7,36
8,10
9,64


Pengelompokkan nilai numerik ke dalam beberapa kategori menggunakan cut()

In [3]:
df['kelompok_usia'] = pd.cut(df['usia'], 
                             bins=[0, 18, 65, 99], 
                             labels=['anak', 'dewasa', 'manula'])
df

Unnamed: 0,usia,kelompok_usia
0,23,dewasa
1,64,dewasa
2,87,manula
3,49,dewasa
4,17,anak
5,98,manula
6,68,manula
7,36,dewasa
8,10,anak
9,64,dewasa


#14: Menggabungkan (merge) dua Data Frame

In [4]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

2.3.3
2.3.4


Persiapan Data Frame

In [5]:
n_rows = 5
n_cols = 5
cols = tuple('ABCDE')

df = pd.DataFrame(np.random.randint(1, 20, size=(n_rows, n_cols)), 
                  columns=cols)
df

Unnamed: 0,A,B,C,D,E
0,2,13,19,9,18
1,7,12,12,10,14
2,15,17,5,6,7
3,7,4,8,10,8
4,7,1,13,6,8


In [6]:
df1 = df.copy(deep=True)
df1 = df1.drop([1, 4])
df1

Unnamed: 0,A,B,C,D,E
0,2,13,19,9,18
2,15,17,5,6,7
3,7,4,8,10,8


In [7]:
df2 = df.copy(deep=True)
df2 = df2.drop([0, 3])
df2

Unnamed: 0,A,B,C,D,E
1,7,12,12,10,14
2,15,17,5,6,7
4,7,1,13,6,8


Menggabungkan dua Data Frame

In [8]:
df_inner = pd.merge(df1, df2, how='inner')
df_inner

Unnamed: 0,A,B,C,D,E
0,15,17,5,6,7


In [9]:
df_outer = pd.merge(df1, df2, how='outer')
df_outer

Unnamed: 0,A,B,C,D,E
0,2,13,19,9,18
1,7,1,13,6,8
2,7,4,8,10,8
3,7,12,12,10,14
4,15,17,5,6,7


#15: Memecah nilai string dari suatu kolom ke dalam beberapa kolom baru

In [10]:
import pandas as pd

print(pd.__version__)

2.3.3


Persiapan Data Frame

In [11]:
data = {'nama':['Didi Kempot', 'Glenn Fredly', 'Mbah Surip'], 
        'tempat_kelahiran':['Surakarta, Jawa Tengah', 'Jakarta, DKI Jakarta', 'Mojokerto, Jawa Timur']}
df = pd.DataFrame(data)
df

Unnamed: 0,nama,tempat_kelahiran
0,Didi Kempot,"Surakarta, Jawa Tengah"
1,Glenn Fredly,"Jakarta, DKI Jakarta"
2,Mbah Surip,"Mojokerto, Jawa Timur"


Memecah nama depan dan nama belakang

In [12]:
df[['nama_depan', 'nama_belakang']] = df['nama'].str.split(' ', expand=True)
df

Unnamed: 0,nama,tempat_kelahiran,nama_depan,nama_belakang
0,Didi Kempot,"Surakarta, Jawa Tengah",Didi,Kempot
1,Glenn Fredly,"Jakarta, DKI Jakarta",Glenn,Fredly
2,Mbah Surip,"Mojokerto, Jawa Timur",Mbah,Surip


Memecah nama kota dan propinsi

In [13]:
df[['kota', 'propinsi']] = df['tempat_kelahiran'].str.split(',', expand=True)
df

Unnamed: 0,nama,tempat_kelahiran,nama_depan,nama_belakang,kota,propinsi
0,Didi Kempot,"Surakarta, Jawa Tengah",Didi,Kempot,Surakarta,Jawa Tengah
1,Glenn Fredly,"Jakarta, DKI Jakarta",Glenn,Fredly,Jakarta,DKI Jakarta
2,Mbah Surip,"Mojokerto, Jawa Timur",Mbah,Surip,Mojokerto,Jawa Timur


#16: Menata ulang Data Frame dengan mutiple indexes menggunakan unstack()

In [14]:
import pandas as pd

print(pd.__version__)

2.3.3


Persiapan Data Frame

In [15]:
df = pd.read_csv('./data/titanicfull.csv')
df.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S
1,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S
2,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S


Data Frame dengan multiple indexes dari hasil groupping

In [16]:
df.groupby(['sex', 'pclass'])['survived'].mean().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,survived
sex,pclass,Unnamed: 2_level_1
female,1,0.965278
female,2,0.886792
female,3,0.490741
male,1,0.340782
male,2,0.146199
male,3,0.15213


Menata ulang Data Frame dengan mutiple indexes

In [17]:
df.groupby(['sex', 'pclass'])['survived'].mean().unstack()

pclass,1,2,3
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.965278,0.886792,0.490741
male,0.340782,0.146199,0.15213
