# DataFrame

### Membuat dataframe

In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame(data={'Nama': ["Ahmad", "Joko", "Ado"],
                       "Umur": [12, 13, 15],
                       "Kelas": [6,7,8]})

In [3]:
df

Unnamed: 0,Nama,Umur,Kelas
0,Ahmad,12,6
1,Joko,13,7
2,Ado,15,8


In [4]:
df.columns

Index(['Nama', 'Umur', 'Kelas'], dtype='object')

In [5]:
df.columns.values[0] = "Nama Depan"

In [6]:
df

Unnamed: 0,Nama Depan,Umur,Kelas
0,Ahmad,12,6
1,Joko,13,7
2,Ado,15,8


In [7]:
data = {
    "calories": [420, 380, 390],
    "duration": [50, 40, 45]
}

In [8]:
df = pd.DataFrame(data)

In [9]:
df

Unnamed: 0,calories,duration
0,420,50
1,380,40
2,390,45


## Mengakses elemen pada dataFrame

In [10]:
# ILOC: Positional indexing
df.iloc[0:2, [0,1]]

Unnamed: 0,calories,duration
0,420,50
1,380,40


In [11]:
# Loc: Label indexing

In [12]:
df.loc[0:2, : 'calories']

Unnamed: 0,calories
0,420
1,380
2,390


In [13]:
df.loc[0:2, ['calories', 'duration']]

Unnamed: 0,calories,duration
0,420,50
1,380,40
2,390,45


In [14]:
df['calories']

0    420
1    380
2    390
Name: calories, dtype: int64

In [15]:
df['calories'][0]

420

In [17]:
df[['calories']].loc[[0, 1]]

Unnamed: 0,calories
0,420
1,380


### Memberi nama pada index

In [18]:
data = {
    "calories": [420,380,390],
    "duration": [50, 40, 45]
}

In [19]:
df = pd.DataFrame(data, index = ['data1', 'data2', 'data3'])

In [20]:
print(df)

       calories  duration
data1       420        50
data2       380        40
data3       390        45


In [21]:
df.iloc[0:3, 0:1]

Unnamed: 0,calories
data1,420
data2,380
data3,390


### Membaca file csv dengan dataFrame

In [22]:
df = pd.read_csv('./data.csv')

In [23]:
type(df)

pandas.core.frame.DataFrame

### Melihat data frame

In [24]:
df.head()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
0,60,110,130,409.1
1,60,117,145,479.0
2,60,103,135,340.0
3,45,109,175,282.4
4,45,117,148,406.0


In [25]:
df.tail()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
164,60,105,140,290.8
165,60,110,145,300.0
166,60,115,145,310.2
167,75,120,150,320.4
168,75,125,150,330.4


In [26]:
df.head(10)

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
0,60,110,130,409.1
1,60,117,145,479.0
2,60,103,135,340.0
3,45,109,175,282.4
4,45,117,148,406.0
5,60,102,127,300.0
6,60,110,136,374.0
7,45,104,134,253.3
8,30,109,133,195.1
9,60,98,124,269.0


In [27]:
df.shape

(169, 4)

In [28]:
df.dtypes

Duration      int64
Pulse         int64
Maxpulse      int64
Calories    float64
dtype: object

### Membaca informasi pada dataFrame

In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 169 entries, 0 to 168
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Duration  169 non-null    int64  
 1   Pulse     169 non-null    int64  
 2   Maxpulse  169 non-null    int64  
 3   Calories  164 non-null    float64
dtypes: float64(1), int64(3)
memory usage: 5.4 KB


In [30]:
df.describe()

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
count,169.0,169.0,169.0,164.0
mean,63.846154,107.461538,134.047337,375.790244
std,42.299949,14.510259,16.450434,266.379919
min,15.0,80.0,100.0,50.3
25%,45.0,100.0,124.0,250.925
50%,60.0,105.0,131.0,318.6
75%,60.0,111.0,141.0,387.6
max,300.0,159.0,184.0,1860.4


In [31]:
df['Calories'].mean()

375.79024390243904

In [32]:
df['Calories'].median()

318.6

In [33]:
df['Calories'].mode()[0]

300.0