# *Pandas(Panel data)*

# 

# 👉🏻 *DataFrame Setting*

### *Cell 너비 설정*

In [1]:
import pandas as pd

pd.set_option('display.max.colwidth', 500)

In [2]:
data = {'open': [102000000000000000000000000000000000000, 202000000000000000000000000000000000000, 302000000000000000000000000000000000000, 402000000000000000000000000000000000000, 502000000000000000000000000000000000000],}

In [3]:
df = pd.DataFrame(data)
df

Unnamed: 0,open
0,102000000000000000000000000000000000000
1,202000000000000000000000000000000000000
2,302000000000000000000000000000000000000
3,402000000000000000000000000000000000000
4,502000000000000000000000000000000000000


### *최대 열 갯수 설정*

In [13]:
import pandas as pd

pd.set_option('display.max.columns', 3)

In [14]:
data = {'open': [10200.2, 30000, 30020, 41200, 12000],
        'high': [20100.4, 12400, 20000, 50050, 21200],
        'low': [15000.6, 20000, 40000, 20300, 41020],
        'close': [32000.2, 20040, 23400, 50000, 12400]}
df = pd.DataFrame(data)

In [15]:
df.head()

Unnamed: 0,open,...,close
0,10200.2,...,32000.2
1,30000.0,...,20040.0
2,30020.0,...,23400.0
3,41200.0,...,50000.0
4,12000.0,...,12400.0


### *최대 행 갯수 설정*

In [16]:
import pandas as pd
import numpy as np

In [17]:
df = pd.DataFrame(np.arange(200))
df

Unnamed: 0,0
0,0
1,1
2,2
3,3
4,4
...,...
195,195
196,196
197,197
198,198


In [18]:
df = pd.DataFrame(np.arange(200).reshape(100, 2))
df

Unnamed: 0,0,1
0,0,1
1,2,3
2,4,5
3,6,7
4,8,9
...,...,...
95,190,191
96,192,193
97,194,195
98,196,197


### *텍스트 정렬*

In [19]:
import pandas as pd

In [20]:
data = {'open': [10200.2, 30000, 30020, 41200, 12000],
        'high': [20100.4, 12400, 20000, 50050, 21200],
        'low': [15000.6, 20000, 40000, 20300, 41020],
        'close': [32000.2, 20040, 23400, 50000, 12400]}

In [21]:
df = pd.DataFrame(data)

In [22]:
dfStyler = df.style.set_properties(**{'text-aligh': 'right'})
dfStyler.set_table_styles([dict(selector='th', 
                                props=[('text-align', 'right')])])

Unnamed: 0,open,high,low,close
0,10200.2,20100.4,15000.6,32000.2
1,30000.0,12400.0,20000.0,20040.0
2,30020.0,20000.0,40000.0,23400.0
3,41200.0,50050.0,20300.0,50000.0
4,12000.0,21200.0,41020.0,12400.0


### *소수점 자릿수 설정*

In [23]:
pd.options.display.float_format = '{:.2f}'.format

In [24]:
df

Unnamed: 0,open,...,close
0,10200.2,...,32000.2
1,30000.0,...,20040.0
2,30020.0,...,23400.0
3,41200.0,...,50000.0
4,12000.0,...,12400.0


# 

# 👉🏻 *column 순서 지정 & index 부여*

In [25]:
import pandas as pd

In [8]:
data = {'open': [10200, 30000, 30020, 41200, 12000],
        'high': [20100, 12400, 20000, 50050, 21200],
        'low': [15000, 20000, 40000, 20300, 41020],
        'close': [32000, 20040, 23400, 50000, 12400]}

In [None]:
index = ['ㄱ', 'ㄴ', 'ㄷ', 'ㄹ', 'ㅁ']
data = pd.DataFrame(data, columns=['open', 'high', 'low', 'close'], index=index)
print(type(data))

In [6]:
data.head()

Unnamed: 0,open,high,low,close
ㄱ,10200,20100,15000,32000
ㄴ,30000,12400,20000,20040
ㄷ,30020,20000,40000,23400
ㄹ,41200,50050,20300,50000
ㅁ,12000,21200,41020,12400


In [7]:
print(data.open)
print(type(data.open))
print(data['open'])
print(type(data['open']))

ㄱ    10200
ㄴ    30000
ㄷ    30020
ㄹ    41200
ㅁ    12000
Name: open, dtype: int64
<class 'pandas.core.series.Series'>
ㄱ    10200
ㄴ    30000
ㄷ    30020
ㄹ    41200
ㅁ    12000
Name: open, dtype: int64
<class 'pandas.core.series.Series'>


# 

# 👉🏻 *Series → DataFrame*

In [1]:
import pandas as pd

In [2]:
li1 = [1, 2, 3]
li2 = [1, 2, 3, 4]

In [3]:
d = {'one': pd.Series(li1),
     'two': pd.Series(li2)}
df = pd.DataFrame(d)

In [5]:
df['three'] = ['일', '이', '삼', '시']

In [8]:
df['four'] = pd.Series([10,20,30])

In [9]:
df.head()

Unnamed: 0,one,two,three,four
0,1.0,1,일,10.0
1,2.0,2,이,20.0
2,3.0,3,삼,30.0
3,,4,시,


# 

# 👉🏻 *drop, del*

In [1]:
import pandas as pd
import pickle

In [2]:
with open('../data/sample.pickle', 'rb') as f:
    df = pickle.load(f)

In [3]:
df.head()

Unnamed: 0,id,predict
0,1,"증강현실, 미디어"
1,2,"경제, 투자"
2,3,etc
3,4,"통신, 광고, 서버"
4,5,"투자, 직업, 경제"


### ⓵

In [4]:
# axis=0 행, axis=1 열
df.drop('id', axis=1, inplace=True)

### ②

In [4]:
del df['id']

In [5]:
df

Unnamed: 0,predict
0,"증강현실, 미디어"
1,"경제, 투자"
2,etc
3,"통신, 광고, 서버"
4,"투자, 직업, 경제"
...,...
99995,etc
99996,"커머스, 식품"
99997,etc
99998,etc


# 

# 👉🏻 *indexing*

In [1]:
import pandas as pd

In [2]:
data = {'open': [10200, 30000, 30020, 41200, 12000],
        'high': [20100, 12400, 20000, 50050, 21200],
        'low': [15000, 20000, 40000, 20300, 41020],
        'close': [32000, 20040, 23400, 50000, 12400]}

In [3]:
df = pd.DataFrame(data)

### ⓵ *열 인덱싱*

In [4]:
print(df[['open', 'high']])

    open   high
0  10200  20100
1  30000  12400
2  30020  20000
3  41200  50050
4  12000  21200


### ⓵ *행 인덱싱*

In [5]:
print(df.loc[1])

open     30000
high     12400
low      20000
close    20040
Name: 1, dtype: int64


In [17]:
print(df.loc[:2])

    open   high    low  close
0  10200  20100  15000  32000
1  30000  12400  20000  20040
2  30020  20000  40000  23400


In [7]:
print(df.iloc[1])

open     30000
high     12400
low      20000
close    20040
Name: 1, dtype: int64


In [16]:
print(df.iloc[:2])

    open   high    low  close
0  10200  20100  15000  32000
1  30000  12400  20000  20040


In [18]:
print(df[:2])

    open   high    low  close
0  10200  20100  15000  32000
1  30000  12400  20000  20040
