In [None]:
import numpy as np
import pandas as pd

#Series Objects

In [None]:
revenues = pd.Series([555,6000,9999]) #cetak indeks dan values nya
revenues

0     555
1    6000
2    9999
dtype: int64

In [None]:
revenues.values #menampilkan values dari series yg ada

array([ 555, 6000, 9999])

In [None]:
revenues.index

RangeIndex(start=0, stop=3, step=1)

In [None]:
city_revenues = pd.Series(
    [4200, 8000, 6500],
    index=["Amsterdam", "Toronto", "Tokyo"]
)
city_revenues

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

In [None]:
city_revenues.index

Index(['Amsterdam', 'Toronto', 'Tokyo'], dtype='object')

In [None]:
#memasukan tipe dictionary ke series
city_employee_count = pd.Series({"Amsterdam": 5, "Tokyo": 8}) #keys nya jadi index, valuenya jadi series
city_employee_count

Amsterdam    5
Tokyo        8
dtype: int64

In [None]:
"Tokyo" in city_employee_count

True

In [None]:
"New York" in city_employee_count

False

In [None]:
city_employee_count

Amsterdam    5
Tokyo        8
dtype: int64

In [None]:
city_revenues

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

#Data Frame

In [None]:
#DataFrame: kumpulan series, index dari semua series akan tetap menjadi index
city_data = pd.DataFrame({
    "revenue": city_revenues,
    "employee_count": city_employee_count
})
city_data

Unnamed: 0,revenue,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [None]:
city_data.index

In [None]:
city_data.values

In [None]:
city_data.axes

In [None]:
city_data.axes[0] #lihat index header baris

In [None]:
city_data.axes[1] #lihat index header kolom

In [None]:
city_data.keys() #lihat isi kolom

In [None]:
city_data.columns #selain .keys()

#Mengakses Elemen Series

In [None]:
city_revenues = pd.Series(
    [4200, 8000, 6500],
    index=["Amsterdam", "Toronto", "Tokyo"]
)
city_revenues

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

In [None]:
city_revenues["Amsterdam"]

4200

In [None]:
city_revenues[0]

4200

In [None]:
city_revenues[:2] #mengarah ke index tokyo

Amsterdam    4200
Toronto      8000
dtype: int64

In [None]:
city_revenues[1:]

Toronto    8000
Tokyo      6500
dtype: int64

In [None]:
city_revenues["Amsterdam":]

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

In [None]:
city_revenues[:"Tokyo"]

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

.loc dan iloc

.loc: mengarah ke label index

.iloc: mengarah ke nomor index [0,1,...]

In [None]:
colors = pd.Series(
    ["red", "purple", "blue", "green", "yellow"],
    index=[1, 2, 3, 5, 8]
)

In [None]:
colors.loc[1]

'red'

In [None]:
colors.iloc[1]

'purple'

In [None]:
colors.loc[3:8]

3      blue
5     green
8    yellow
dtype: object

In [None]:
colors.iloc[1:3]

2    purple
3      blue
dtype: object

#Mengakses DataFrame

In [None]:
city_data

Unnamed: 0,revenues,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [None]:
city_data["revenues"] #mengeluarkan kolom index dengan kolom revenues

Amsterdam    4200
Tokyo        6500
Toronto      8000
Name: revenues, dtype: int64

In [None]:
#cara lain
city_data.revenues

Amsterdam    4200
Tokyo        6500
Toronto      8000
Name: revenues, dtype: int64

In [None]:
toys = pd.DataFrame([
    {"name": "ball", "shape": "sphere"},
    {"name": "Rubik's cube", "shape": "cube"}
])

#keys jadi kolom, value jadi isi dari tiap kolom

In [None]:
toys 

Unnamed: 0,name,shape
0,ball,sphere
1,Rubik's cube,cube


In [None]:
toys["shape"]

0    sphere
1      cube
Name: shape, dtype: object

In [None]:
toys.shape

(2, 2)

In [None]:
city_data.loc["Amsterdam"]

revenues          4200.0
employee_count       5.0
Name: Amsterdam, dtype: float64

In [None]:
city_data.iloc[0]

revenues          4200.0
employee_count       5.0
Name: Amsterdam, dtype: float64

In [None]:
city_data.loc["Amsterdam":"Tokyo"]

Unnamed: 0,revenues,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0


In [None]:
city_data.loc["Amsterdam":"Tokyo", "revenues"] #melihat index amsterdam hingga tokyo dengan kolom revenues

Amsterdam    4200
Tokyo        6500
Name: revenues, dtype: int64

In [None]:
city_data.loc["Amsterdam":"Tokyo", "revenues", "employee_count"] #error terlalu banyak index

IndexingError: ignored

In [None]:
#solusi
city_data.loc["Amsterdam":"Tokyo", ["revenues", "employee_count"]] #kolom di buatkan list

Unnamed: 0,revenues,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0


In [None]:
city_data.revenues.sum() #.sum(): menjumlah isi dari suatu kolom

18700

In [None]:
#cara lain
city_data["revenues"].sum()

18700

#Menggabungkan Multiple DataFrame

In [None]:
further_city_data = pd.DataFrame(
    {"revenue": [7000, 3400], "employee_count": [2, 2]},
    index=["New York", "Barcelona"]
)

In [None]:
further_city_data

Unnamed: 0,revenue,employee_count
New York,7000,2
Barcelona,3400,2


In [None]:
city_data

Unnamed: 0,revenues,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [None]:
#menggabungkan data frame
# menggunakan pd.concat

all_city_data = pd.concat([city_data,further_city_data], sort=False)

In [None]:
all_city_data

Unnamed: 0,revenue,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,
New York,7000,2.0
Barcelona,3400,2.0


In [None]:
city_countries = pd.DataFrame({
    "country": ["Holland", "Japan", "Holland", "Canada", "Spain"],
    "capital": [1, 1, 0, 0, 0]},
    index=["Amsterdam", "Tokyo", "Rotterdam", "Toronto", "Barcelona"]
)

In [None]:
city_countries

Unnamed: 0,country,capital
Amsterdam,Holland,1
Tokyo,Japan,1
Rotterdam,Holland,0
Toronto,Canada,0
Barcelona,Spain,0


In [None]:
cities = pd.concat([all_city_data,city_countries], axis=1, sort = False) #axis=1 menggabungkan kolom setiap dataFrame

In [None]:
cities

Unnamed: 0,revenue,employee_count,country,capital
Amsterdam,4200.0,5.0,Holland,1.0
Tokyo,6500.0,8.0,Japan,1.0
Toronto,8000.0,,Canada,0.0
New York,7000.0,2.0,,
Barcelona,3400.0,2.0,Spain,0.0
Rotterdam,,,Holland,0.0


In [None]:
pd.concat([all_city_data, city_countries], axis=1, sort = True, join="inner") #join = "inner": menggabungkan dataframe dengan index yg memiliki isi kolom disetiap dataFrame
#tips: untuk.concat kalo bisa sort = False, karena jika True akan memakan banyak komputasi komputer sehingga proses lebih lama

Unnamed: 0,revenue,employee_count,country,capital
Amsterdam,4200,5.0,Holland,1
Barcelona,3400,2.0,Spain,0
Tokyo,6500,8.0,Japan,1
Toronto,8000,,Canada,0


In [None]:
countries = pd.DataFrame({
    "population_millions": [17, 127, 37],
    "continent": ["Europe", "Asia", "North America"]
}, index=["Holland", "Japan", "Canada"])

In [None]:
countries

Unnamed: 0,population_millions,continent
Holland,17,Europe
Japan,127,Asia
Canada,37,North America


In [None]:
cities

Unnamed: 0,revenue,employee_count,country,capital
Amsterdam,4200.0,5.0,Holland,1.0
Tokyo,6500.0,8.0,Japan,1.0
Toronto,8000.0,,Canada,0.0
New York,7000.0,2.0,,
Barcelona,3400.0,2.0,Spain,0.0
Rotterdam,,,Holland,0.0


menggabungkan data frame dengan suatu kondisi

In [None]:
#pd.merge
pd.merge(cities,countries,left_on="country",right_index=True, right_on="continent")

Unnamed: 0,revenue,employee_count,country,capital,population_millions,continent
Amsterdam,4200.0,5.0,Holland,1.0,17,Europe
Rotterdam,,,Holland,0.0,17,Europe
Tokyo,6500.0,8.0,Japan,1.0,127,Asia
Toronto,8000.0,,Canada,0.0,37,North America
