In [1]:
# Copyright © 2022 Indonesia AI. All Rights Reserved.
# contact@aiforindonesia.org

### Membuat DataFrame

In [43]:
import pandas as pd

df = pd.DataFrame(data= {"Nama" : ["Ahmad", "Joko", "Adi"],
                         "Umur": [12, 13, 15],
                         "Kelas": [6, 7, 8]})

In [44]:
df.columns

Index(['Nama', 'Umur', 'Kelas'], dtype='object')

In [45]:
df.columns.values[0] = "Nama Singkat"

In [46]:
df

Unnamed: 0,Nama Singkat,Umur,Kelas
0,Ahmad,12,6
1,Joko,13,7
2,Adi,15,8


In [47]:
data = {
  "calories": [420, 380, 390],
  "duration": [50, 40, 45]
}


df = pd.DataFrame(data)
df

Unnamed: 0,calories,duration
0,420,50
1,380,40
2,390,45


### Mengakses elemen pada DataFrame

In [61]:
# ILOC: Positional indexing
df.iloc[0:2, [0,1]]

Unnamed: 0,calories,duration
0,420,50
1,380,40


In [62]:
df.iloc[0:2, 0:2]

Unnamed: 0,calories,duration
0,420,50
1,380,40


In [63]:
df.iloc[:, 0:2]

Unnamed: 0,calories,duration
0,420,50
1,380,40
2,390,45


In [66]:
# LOC: Label indexing
df.loc[0:2, :'calories']

Unnamed: 0,calories
0,420
1,380
2,390


In [67]:
df.loc[0:2, ['calories', 'duration']]

Unnamed: 0,calories,duration
0,420,50
1,380,40
2,390,45


In [11]:
df['calories'] # mengakses column

0    420
1    380
2    390
Name: calories, dtype: int64

In [12]:
df['calories'][0] # mengakses column dan row

420

In [13]:
df[['calories']] # mengakses column (menghasilkan DataFrame)

Unnamed: 0,calories
0,420
1,380
2,390


In [14]:
df[['calories']].loc[[0, 1]] # mengakses column (menghasilkan DataFrame) lalu mengakses row (menghasilkan DataFrame)

Unnamed: 0,calories
0,420
1,380


### Memberi nama pada Index

In [15]:
data = {
  "calories": [420, 380, 390],
  "duration": [50, 40, 45]
}

df = pd.DataFrame(data, index = ['day1', 'day2', 'day3'])
print(df) 

      calories  duration
day1       420        50
day2       380        40
day3       390        45


In [16]:
df

Unnamed: 0,calories,duration
day1,420,50
day2,380,40
day3,390,45


### Membaca File CSV dengan DataFrame

In [17]:
df = pd.read_csv('./data.csv')
print(df)

     Duration  Pulse  Maxpulse  Calories
0          60    110       130     409.1
1          60    117       145     479.0
2          60    103       135     340.0
3          45    109       175     282.4
4          45    117       148     406.0
..        ...    ...       ...       ...
164        60    105       140     290.8
165        60    110       145     300.0
166        60    115       145     310.2
167        75    120       150     320.4
168        75    125       150     330.4

[169 rows x 4 columns]


In [18]:
df

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
0,60,110,130,409.1
1,60,117,145,479.0
2,60,103,135,340.0
3,45,109,175,282.4
4,45,117,148,406.0
...,...,...,...,...
164,60,105,140,290.8
165,60,110,145,300.0
166,60,115,145,310.2
167,75,120,150,320.4


### Melihat DataFrame

In [19]:
df.head()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
0,60,110,130,409.1
1,60,117,145,479.0
2,60,103,135,340.0
3,45,109,175,282.4
4,45,117,148,406.0


In [20]:
df.tail()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
164,60,105,140,290.8
165,60,110,145,300.0
166,60,115,145,310.2
167,75,120,150,320.4
168,75,125,150,330.4


In [21]:
df.head(10)

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
0,60,110,130,409.1
1,60,117,145,479.0
2,60,103,135,340.0
3,45,109,175,282.4
4,45,117,148,406.0
5,60,102,127,300.0
6,60,110,136,374.0
7,45,104,134,253.3
8,30,109,133,195.1
9,60,98,124,269.0


In [22]:
df.shape

(169, 4)

In [23]:
df.dtypes

Duration      int64
Pulse         int64
Maxpulse      int64
Calories    float64
dtype: object

### Membaca Informasi pada DataFrame

In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 169 entries, 0 to 168
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Duration  169 non-null    int64  
 1   Pulse     169 non-null    int64  
 2   Maxpulse  169 non-null    int64  
 3   Calories  164 non-null    float64
dtypes: float64(1), int64(3)
memory usage: 5.4 KB


In [25]:
df.describe()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
count,169.0,169.0,169.0,164.0
mean,63.846154,107.461538,134.047337,375.790244
std,42.299949,14.510259,16.450434,266.379919
min,15.0,80.0,100.0,50.3
25%,45.0,100.0,124.0,250.925
50%,60.0,105.0,131.0,318.6
75%,60.0,111.0,141.0,387.6
max,300.0,159.0,184.0,1860.4


In [26]:
df['Calories'].mean(), df['Calories'].median(), df['Calories'].mode()[0]

(375.79024390243916, 318.6, 300.0)

In [27]:
df.corr()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
Duration,1.0,-0.155408,0.009403,0.922717
Pulse,-0.155408,1.0,0.786535,0.025121
Maxpulse,0.009403,0.786535,1.0,0.203813
Calories,0.922717,0.025121,0.203813,1.0


---