#45: Memadukan loc dan iloc untuk melakukan seleksi data

In [1]:
import pandas as pd

print(pd.__version__)

2.3.3


Persiapan Data Frame

In [2]:
df = pd.read_csv('./data/titanicfull.csv')
df.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S
1,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S
2,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S


Memadukan loc dan iloc untuk melakukan seleksi data

In [3]:
df.iloc[15:20, :].loc[:, 'name':'age']

Unnamed: 0,name,sex,age
15,"Baumann, Mr. John D",male,
16,"Baxter, Mr. Quigg Edmond",male,24.0
17,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50.0
18,"Bazzani, Miss. Albina",female,32.0
19,"Beattie, Mr. Thomson",male,36.0


In [4]:
df.loc[:, 'name':'age'].iloc[15:20, :]

Unnamed: 0,name,sex,age
15,"Baumann, Mr. John D",male,
16,"Baxter, Mr. Quigg Edmond",male,24.0
17,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50.0
18,"Bazzani, Miss. Albina",female,32.0
19,"Beattie, Mr. Thomson",male,36.0


#46: Seleksi weekdays dan weekends pada data deret waktu (time series)

In [5]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

2.3.3
2.3.4


Persiapan Data Frame

In [6]:
n_rows = 365
n_cols = 2
cols = ['col1', 'col2']

df = pd.DataFrame(np.random.randint(1, 20, size=(n_rows, n_cols)), columns=cols)

df.index = pd.date_range(start='2024-01-01', periods=n_rows, freq='D')

print(df)

            col1  col2
2024-01-01    11    13
2024-01-02    13    10
2024-01-03     4    11
2024-01-04    11     6
2024-01-05     8    15
...          ...   ...
2024-12-26    16    13
2024-12-27     7     5
2024-12-28     3    11
2024-12-29     1    14
2024-12-30     4     3

[365 rows x 2 columns]


Seleksi weekdays dan weekends

In [7]:
weekdays_df = df[df.index.dayofweek.isin([0, 1, 2, 3, 4])]
weekdays_df.head(7)

Unnamed: 0,col1,col2
2024-01-01,11,13
2024-01-02,13,10
2024-01-03,4,11
2024-01-04,11,6
2024-01-05,8,15
2024-01-08,11,7
2024-01-09,3,14


In [8]:
weekends_df = df[df.index.dayofweek.isin([5, 6])]
weekends_df.head(7)

Unnamed: 0,col1,col2
2024-01-06,19,14
2024-01-07,19,13
2024-01-13,10,15
2024-01-14,10,3
2024-01-20,4,13
2024-01-21,13,13
2024-01-27,13,2


#47: Deteksi dan penanganan kolom dengan tipe data beragam (mixed data types)

In [9]:
import pandas as pd

print(pd.__version__)

2.3.3


Persiapan Data Frame

In [10]:
d = {'nama':['bejo', 'tejo', 'wati', 'tiwi', 'cecep'], 
     'ipk':[2, '3', 3, 2.75, '3.25']}
df = pd.DataFrame(d)
df

Unnamed: 0,nama,ipk
0,bejo,2.0
1,tejo,3.0
2,wati,3.0
3,tiwi,2.75
4,cecep,3.25


Deteksi dan penanganan kolom dengan tipe data beragam (mixed data types)

In [11]:
df.dtypes

nama    object
ipk     object
dtype: object

In [12]:
df['ipk'].apply(type)

0      <class 'int'>
1      <class 'str'>
2      <class 'int'>
3    <class 'float'>
4      <class 'str'>
Name: ipk, dtype: object

In [13]:
df['ipk'].apply(type).value_counts()

ipk
<class 'int'>      2
<class 'str'>      2
<class 'float'>    1
Name: count, dtype: int64

In [14]:
df['ipk'] = df['ipk'].astype(float)

In [15]:
df['ipk'].apply(type).value_counts()

ipk
<class 'float'>    5
Name: count, dtype: int64

#48: Mengenal Cummulative Count dengan cumcount()

In [16]:
import pandas as pd

print(pd.__version__)

2.3.3


Persiapan Data Frame

In [17]:
d = {'penjual':['bejo', 'tejo', 'wati', 'bejo', 'cecep', 'tejo', 'wati', 'bejo'], 
     'barang':['monitor', 'monitor', 'keyboard', 'mouse', 'keyboard', 'monitor', 'laptop', 'monitor']}

df = pd.DataFrame(d)
df

Unnamed: 0,penjual,barang
0,bejo,monitor
1,tejo,monitor
2,wati,keyboard
3,bejo,mouse
4,cecep,keyboard
5,tejo,monitor
6,wati,laptop
7,bejo,monitor


Mengenal Cummulative Count dengan cumcount()

In [18]:
df['count_tiap_penjual'] = df.groupby('penjual').cumcount() + 1
df

Unnamed: 0,penjual,barang,count_tiap_penjual
0,bejo,monitor,1
1,tejo,monitor,1
2,wati,keyboard,1
3,bejo,mouse,2
4,cecep,keyboard,1
5,tejo,monitor,2
6,wati,laptop,2
7,bejo,monitor,3


In [19]:
df['count_tiap_barang'] = df.groupby('barang').cumcount() + 1
df

Unnamed: 0,penjual,barang,count_tiap_penjual,count_tiap_barang
0,bejo,monitor,1,1
1,tejo,monitor,1,2
2,wati,keyboard,1,1
3,bejo,mouse,2,1
4,cecep,keyboard,1,2
5,tejo,monitor,2,3
6,wati,laptop,2,1
7,bejo,monitor,3,4


In [20]:
df['count_pasangan_kolom'] = df.groupby(['penjual', 'barang']).cumcount() + 1
df

Unnamed: 0,penjual,barang,count_tiap_penjual,count_tiap_barang,count_pasangan_kolom
0,bejo,monitor,1,1,1
1,tejo,monitor,1,2,1
2,wati,keyboard,1,1,1
3,bejo,mouse,2,1,1
4,cecep,keyboard,1,2,1
5,tejo,monitor,2,3,2
6,wati,laptop,2,1,1
7,bejo,monitor,3,4,2
