### Pandas 기본

In [5]:
import pandas as pd 

data = {
    'id' : [1,2,3],
    'name' : ['choi', 'jung', 'lee'],
    'age' : [23, 26,28],
    'assets' : [150.4, 123.4, 88.88],
    'job' : ['student', 'CEO', 'Dad']
}

In [6]:
df = pd.DataFrame(data)
df

Unnamed: 0,id,name,age,assets,job
0,1,choi,23,150.4,student
1,2,jung,26,123.4,CEO
2,3,lee,28,88.88,Dad


In [7]:
# columns 값을 지정하여 넘기면 원하는 순서대로 테이블 생성
# index 값을 지정하여 넘기면 index 값을 원한느 값으로 채움

df = pd.DataFrame(data, columns = ['id', 'name', 'job', 'age', 'assets', 'hobby'], 
                 index = ['one', 'two', 'three'])
df

Unnamed: 0,id,name,job,age,assets,hobby
one,1,choi,student,23,150.4,
two,2,jung,CEO,26,123.4,
three,3,lee,Dad,28,88.88,


In [8]:
# 색인 기능을 활용해서 데이터를 한번에 삽입하기

df['hobby'] = 'reading books'
df

Unnamed: 0,id,name,job,age,assets,hobby
one,1,choi,student,23,150.4,reading books
two,2,jung,CEO,26,123.4,reading books
three,3,lee,Dad,28,88.88,reading books


In [13]:
# 색인 기능을 활용하여 배열을 전달할때 데이터를 한꺼번에 삽입 가능

import numpy as np

df['age'] = np.arange(3)
df

Unnamed: 0,id,name,job,age,assets,hobby
one,1,choi,student,0,150.4,reading books
two,2,jung,CEO,1,123.4,reading books
three,3,lee,Dad,2,88.88,reading books


In [16]:
# 데이터 row 추가

df.loc[4] = ['4','hong','freelancer',3, 60.5,'gangnam']
df

Unnamed: 0,id,name,job,age,assets,hobby
one,1,choi,student,0,150.4,reading books
two,2,jung,CEO,1,123.4,reading books
three,3,lee,Dad,2,88.88,reading books
4,4,hong,freelancer,3,60.5,gangnam


In [17]:
# row 삭제

df.drop(4)

Unnamed: 0,id,name,job,age,assets,hobby
one,1,choi,student,0,150.4,reading books
two,2,jung,CEO,1,123.4,reading books
three,3,lee,Dad,2,88.88,reading books


In [20]:
# 값 transpose

df.T

Unnamed: 0,one,two,three,4
id,1,2,3,4
name,choi,jung,lee,hong
job,student,CEO,Dad,freelancer
age,0,1,2,3
assets,150.4,123.4,88.88,60.5
hobby,reading books,reading books,reading books,gangnam


In [21]:
# 값 추출하기

df.values

array([[1, 'choi', 'student', 0, 150.4, 'reading books'],
       [2, 'jung', 'CEO', 1, 123.4, 'reading books'],
       [3, 'lee', 'Dad', 2, 88.88, 'reading books'],
       ['4', 'hong', 'freelancer', 3, 60.5, 'gangnam']], dtype=object)

In [22]:
df['name'].values

array(['choi', 'jung', 'lee', 'hong'], dtype=object)

### Pandas - 인덱스 정렬 

In [24]:
import numpy as np
import pandas as pd

se = pd.Series(range(10), index=[10,9,8,7,6,5,4,3,2,1])
se.index.name = 'index'

se

index
10    0
9     1
8     2
7     3
6     4
5     5
4     6
3     7
2     8
1     9
dtype: int64

In [26]:
# sort_index : 로우나 컬럼의 색인을 알파벳 순으로 정렬

se = se.sort_index()
se

index
1     9
2     8
3     7
4     6
5     5
6     4
7     3
8     2
9     1
10    0
dtype: int64

In [27]:
# ascending 변수를 false로 만들면 데이터 내림차순 정렬

import pandas as pd 

data = {
    'id' : [1,2,3],
    'name' : ['choi', 'jung', 'lee'],
    'age' : [23, 26,28],
    'assets' : [150.4, 123.4, 88.88],
    'job' : ['student', 'CEO', 'Dad']
}


In [28]:
df = pd.DataFrame(data)
df

Unnamed: 0,id,name,age,assets,job
0,1,choi,23,150.4,student
1,2,jung,26,123.4,CEO
2,3,lee,28,88.88,Dad


In [31]:
df.sort_index(ascending = False) #Flase = 내림차순

Unnamed: 0,id,name,age,assets,job
2,3,lee,28,88.88,Dad
1,2,jung,26,123.4,CEO
0,1,choi,23,150.4,student


In [34]:
# axis = 1 을 세팅하면, index 정렬이 아닌, 컬럼명 정렬을 하게된다.
# name-> age 순으로 컬럼명 변경

df.sort_index(axis=1, ascending =False)

Unnamed: 0,name,job,id,assets,age
0,choi,student,1,150.4,23
1,jung,CEO,2,123.4,26
2,lee,Dad,3,88.88,28


In [35]:
# sort_values : 값의 알파벳 순으로 정렬 (시리즈, 데이터프레임 모두 적용 가능)

df.sort_values(by='assets')

Unnamed: 0,id,name,age,assets,job
2,3,lee,28,88.88,Dad
1,2,jung,26,123.4,CEO
0,1,choi,23,150.4,student


In [38]:
 # 새로운 age값 추가
    
df['age'] = [10, 20, 10]

# age 10이 중복 오름차순 정렬 되었지만, assets으로 중복 정렬이 진행되었음   
df.sort_values(by = ['age','assets'])

Unnamed: 0,id,name,age,assets,job
2,3,lee,10,88.88,Dad
0,1,choi,10,150.4,student
1,2,jung,20,123.4,CEO
