# Data Manipulation
Data Manipulation aims to facilitate data analysis by machines. Here are the steps to perform Data Manipulation using Python:

- Import libraries that will be needed.

In [None]:
import pandas as pd
import numpy as np

Pandas has two objects, namely series and dataframe.

## 1. Object Series
Object Series has one data dimension. It does not have a column name because it only has one column and has an index.

In [None]:
data = [0.25, 0.50, 0.75, 1]

- Converting data into series.

In [None]:
data = pd.Series(data)

In [None]:
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

- Convert from series to array.

In [None]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

- Displays to index.
  
   The index is a range, where the starting point is inclusive of the range and the stop point is exclusive to the range.

In [None]:
data.index

RangeIndex(start=0, stop=4, step=1)

- Change to list form (no relation, but here we can change type to list)

In [None]:
list(range(1,10))

[1, 2, 3, 4, 5, 6, 7, 8, 9]

- This is how to call the data, and make sure the form is series.

In [None]:
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [None]:
data[2]

0.75

- Implicit index is the default index (0,1,2,3)
- We can define the index. When we define the index, it is called an explicit index.
- When defining an index, the number of indexes must be equal to the number of data.

In [None]:
data = pd.Series([0.25, 0.50, 0.75, 1], index = ['a', 'b', 'c', 'd'])

In [None]:
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [None]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [None]:
data.index

Index(['a', 'b', 'c', 'd'], dtype='object')

- Call the data

In [None]:
data['a']

0.25

This is data selection

Even if we have created an explicit index, we can still call the implicit index.

In [None]:
# Implicit Index
data[3]

1.0

When the implicit index and the explicit index are the same, when we call the data, it will only rely on the explicit.

In [None]:
data_2 = pd.Series([0.25, 0.50, 0.75, 1], index = [2,5,3,7])

# 2 is Explicit Index

In [None]:
data_2

2    0.25
5    0.50
3    0.75
7    1.00
dtype: float64

In [None]:
data_2[2]

0.25

In [None]:
data_2[0]

# Key error occurs because there is no index 0.

KeyError: ignored

Loc & Iloc


In [None]:
data_2=pd.Series([0.25,0.5,0.75,1],index=[2,5,3,7])

In [None]:
data_2

In [None]:
data_2[2:3]

In [None]:
data_2.loc[7]

In [None]:
data_2.iloc[2]

Create DataFrame

In [None]:
dict_populasi={'Jakarta':750,'Bogor':490,'Depok':350,'Tangerang':270,'Bekasi':670}

In [None]:
dict_populasi

In [None]:
populasi=pd.Series(dict_populasi)

In [None]:
populasi

In [None]:
populasi.loc['Depok']

In [None]:
populasi.iloc[2]

In [None]:
dict_luas={'Jakarta':737,'Bogor':325,'Depok':247,'Tangerang':333,'Bekasi':444}

In [None]:
luas=pd.Series(dict_luas)

In [None]:
luas

In [None]:
daerah=pd.DataFrame({'pop':populasi,'luas':luas})

In [None]:
daerah

In [None]:
daerah['luas']['Jakarta']

In [None]:
daerah.pop

In [None]:
daerah['pop']

Change Coloumns Name

In [None]:
daerah=pd.DataFrame({'Populasi':populasi,'Luas':luas})

In [None]:
daerah

Add Coloumns to DataFrame

In [None]:
daerah['Pop/Area']=daerah['Populasi']/daerah['Luas']

In [None]:
daerah

Add New Rows to DataFrame

In [None]:
daerah.tambah=pd.DataFrame({'Bandung':[151,148,0.1]})

In [None]:
daerah.tambah

In [None]:
daerah.tambah=daerah.tambah.T

In [None]:
daerah.tambah.columns=daerah.columns

In [None]:
daerah.tambah

In [None]:
pd.concat([daerah,daerah.tambah])