# 데이터 재구성(재구조화)

## 1, 정돈된 데이터

### 1.1. stack

In [9]:
import pandas as pd
import numpy as np

In [13]:
fruit = pd.read_csv('data/fruit.csv',index_col='State')
fruit

Unnamed: 0_level_0,Apple,Orange,Banana
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Texas,12,10,40
Arizona,9,7,12
Florida,0,14,190


In [14]:
fruit.stack()

State          
Texas    Apple      12
         Orange     10
         Banana     40
Arizona  Apple       9
         Orange      7
         Banana     12
Florida  Apple       0
         Orange     14
         Banana    190
dtype: int64

In [16]:
fruit_tidy = fruit.stack().reset_index()
fruit_tidy

Unnamed: 0,State,level_1,0
0,Texas,Apple,12
1,Texas,Orange,10
2,Texas,Banana,40
3,Arizona,Apple,9
4,Arizona,Orange,7
5,Arizona,Banana,12
6,Florida,Apple,0
7,Florida,Orange,14
8,Florida,Banana,190


In [18]:
fruit_tidy.columns = ['state','fruit','weight']
fruit_tidy

Unnamed: 0,state,fruit,weight
0,Texas,Apple,12
1,Texas,Orange,10
2,Texas,Banana,40
3,Arizona,Apple,9
4,Arizona,Orange,7
5,Arizona,Banana,12
6,Florida,Apple,0
7,Florida,Orange,14
8,Florida,Banana,190


### 1.2.Melt

### <img src='https://t1.daumcdn.net/cfile/tistory/25177F4E5863D58A0C'/>

In [23]:
fruit = pd.read_csv('data/fruit.csv')
fruit

Unnamed: 0,State,Apple,Orange,Banana
0,Texas,12,10,40
1,Arizona,9,7,12
2,Florida,0,14,190


In [24]:
fruit.melt(id_vars=['State'], value_vars=['Apple','Orange','Banana'])

Unnamed: 0,State,variable,value
0,Texas,Apple,12
1,Arizona,Apple,9
2,Florida,Apple,0
3,Texas,Orange,10
4,Arizona,Orange,7
5,Florida,Orange,14
6,Texas,Banana,40
7,Arizona,Banana,12
8,Florida,Banana,190


In [29]:
fruit.melt(id_vars=['State'], value_vars=['Apple','Orange','Banana'], var_name='Fruit', value_name='weight')

Unnamed: 0,State,Fruit,weight
0,Texas,Apple,12
1,Arizona,Apple,9
2,Florida,Apple,0
3,Texas,Orange,10
4,Arizona,Orange,7
5,Florida,Orange,14
6,Texas,Banana,40
7,Arizona,Banana,12
8,Florida,Banana,190


### 1.4 스택된 데이터 되돌리기

In [31]:
fruit.stack()

0  State       Texas
   Apple          12
   Orange         10
   Banana         40
1  State     Arizona
   Apple           9
   Orange          7
   Banana         12
2  State     Florida
   Apple           0
   Orange         14
   Banana        190
dtype: object

In [32]:
fruit.stack().unstack()
# unstack() 옆으로 늘이기

Unnamed: 0,State,Apple,Orange,Banana
0,Texas,12,10,40
1,Arizona,9,7,12
2,Florida,0,14,190


In [33]:
fruit_tidy2 = fruit.melt(id_vars=['State'], value_vars=['Apple','Orange','Banana'], var_name='Fruit', value_name='weight')
fruit_tidy2

Unnamed: 0,State,Fruit,weight
0,Texas,Apple,12
1,Arizona,Apple,9
2,Florida,Apple,0
3,Texas,Orange,10
4,Arizona,Orange,7
5,Florida,Orange,14
6,Texas,Banana,40
7,Arizona,Banana,12
8,Florida,Banana,190


In [34]:
fruit_tidy2.pivot(index='State',columns='Fruit', values='weight')

Fruit,Apple,Banana,Orange
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Arizona,9,12,7
Florida,0,190,14
Texas,12,40,10


In [35]:
fruit.stack()

0  State       Texas
   Apple          12
   Orange         10
   Banana         40
1  State     Arizona
   Apple           9
   Orange          7
   Banana         12
2  State     Florida
   Apple           0
   Orange         14
   Banana        190
dtype: object

In [37]:
fruit.stack().unstack(level=0)

Unnamed: 0,0,1,2
State,Texas,Arizona,Florida
Apple,12,9,0
Orange,10,7,14
Banana,40,12,190


In [38]:
fruit.stack().unstack(level=1)

Unnamed: 0,State,Apple,Orange,Banana
0,Texas,12,10,40
1,Arizona,9,7,12
2,Florida,0,14,190


### 1,4 Groupby후 unstacking

In [1]:
def square(x):
    return x ** 2

for N in range(1,4):
    print(N, '의 제곱은', square(N))


1 의 제곱은 1
2 의 제곱은 4
3 의 제곱은 9


# Magic Commander


## %run : 외부코드 실행

In [5]:
def square(x):
    return x ** 2

for N in range(1,4):
    print(N, '의 제곱은', square(N))

1 의 제곱은 1
2 의 제곱은 4
3 의 제곱은 9


In [6]:
%run data/myscript.py

1 의 제곱은 1
2 의 제곱은 4
3 의 제곱은 9


In [9]:
square(6)

36

- 함수로 저장함 머신러닝, 쓸때는 꺼내서쓰면되니까 그렇게 하면됨.

## 코드 실행 시간 측정

In [21]:
%timeit L = [n ** 2 for n in range(1000)]

926 µs ± 46.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [24]:
%timeit
L =[]
for n in range(1000):
    L.append(n**2)

In [23]:
?xmode?

##  %history : 이전에 실행된 명령

In [27]:
%history -n 

   1:
def square(x):
    return x ** 2

for N in range(1,4):
    print(N, '의 제곱은', square(N))
   2: # Magic Commander
   3: %run : 외부코드 실행
   4: ## %run : 외부코드 실행
   5:
def square(x):
    return x ** 2

for N in range(1,4):
    print(N, '의 제곱은', square(N))
   6: %run data/myscript.py
   7: square(5)
   8: square(6)
   9: square(6)
  10: - 함수로 저장함 머신러닝, 쓸때는 꺼내서쓰면되니까 그렇게 하면됨.
  11: ## 코드 실행 시간 측정
  12: %timeit L = [for n in range(1000)]
  13: %timeit L = [for n in range(1000)]
  14: %timeit L = [n 0**2 for n in range(1000)]
  15: %timeit L = [n 0 ** 2 for n in range(1000)]


## automagic 함수 % 없이도 사용가능

In [30]:
pwd

'C:\\Users\\1pc\\Desktop\\4.28파이썬및 빅데이터'