# Pandas [DataFrame]
* 여러 개의 Series가 모여서 행과 열을 이룬 데이터
* Dictionary를 활용하여 DataFrame 생성 가능

In [3]:
import pandas as pd

## Series를 이용한 DataFrame 생성 방법

In [13]:
# 국가별 인구 수 시리즈 데이터
country = pd.Series([5180 ,12718 ,141500 ,32676 ])
country_gdp = pd.Series([1409250000,516700000, 169320000, 2041280000])

In [14]:
print(country, "\n")
print(country_gdp)

0      5180
1     12718
2    141500
3     32676
dtype: int64 

0    1409250000
1     516700000
2     169320000
3    2041280000
dtype: int64


In [15]:
df_country = pd.DataFrame({
    'gdp': country_gdp,
    'population' : country
})

In [16]:
df_country

Unnamed: 0,gdp,population
0,1409250000,5180
1,516700000,12718
2,169320000,141500
3,2041280000,32676


In [18]:
# 단 Series에 index를 따로 설정하여 같지 않다면, DataFrame이 정상적으로 생성이 되지 않는다.
country = pd.Series([5180 ,12718 ,141500 ,32676 ], index = ['korea', 'japan', 'china', 'usa'], name="country")

In [19]:
df_country = pd.DataFrame({
    'gdp': country_gdp,
    'population' : country
})

In [20]:
df_country

Unnamed: 0,gdp,population
0,1409250000.0,
1,516700000.0,
2,169320000.0,
3,2041280000.0,
china,,141500.0
japan,,12718.0
korea,,5180.0
usa,,32676.0


## Dictionary를 활용한 DataFrame 생성 방법

In [1]:
# Dictionary 생성
dict_country = {
    'country': ['china','japan','korea','usa'],
    'gdp': [1409250000,516700000, 169320000, 2041280000],
    'population': [141500,12718, 5180, 32676]
}

In [4]:
# DataFrame 생성
df_country = pd.DataFrame(dict_country)

In [6]:
df_country

Unnamed: 0,country,gdp,population
0,china,1409250000,141500
1,japan,516700000,12718
2,korea,169320000,5180
3,usa,2041280000,32676


In [7]:
# index 셋팅
df_country = df_country.set_index('country')

In [8]:
df_country

Unnamed: 0_level_0,gdp,population
country,Unnamed: 1_level_1,Unnamed: 2_level_1
china,1409250000,141500
japan,516700000,12718
korea,169320000,5180
usa,2041280000,32676


In [26]:
# DataFrame 속성 확인
df_country.shape

(4, 2)

In [28]:
df_country.size

8

In [29]:
df_country.ndim

2

In [30]:
df_country.values

array([[1409250000,     141500],
       [ 516700000,      12718],
       [ 169320000,       5180],
       [2041280000,      32676]], dtype=int64)

In [31]:
df_country.index

Index(['china', 'japan', 'korea', 'usa'], dtype='object', name='country')

In [32]:
df_country.columns

Index(['gdp', 'population'], dtype='object')

In [33]:
# 속성 수정 가능
df_country.index.name = '나라'
df_country.columns.name = '정보'

In [34]:
df_country

정보,gdp,population
나라,Unnamed: 1_level_1,Unnamed: 2_level_1
china,1409250000,141500
japan,516700000,12718
korea,169320000,5180
usa,2041280000,32676
