# Pandas

In [None]:
import numpy as np
import pandas as pd

In [None]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

# Series

In [None]:
s1 = pd.Series([1, 3, 5, np.nan, 6, 8])
s1

In [None]:
s1.index

In [None]:
s1.values

#### numpy 배열을 사용하지만 object타입으로 이용할 경우 배열의 모든 원소의 데이터 타입이 달라도 됨.

In [None]:
s1 = pd.Series([np.nan, np.inf, 0, 1, 2, 3, 'a', 'b', 'c'])
s1

In [None]:
s1.values

## index 설정하기

In [None]:
index_seq = range(10, 20)
seq_data = range(10)
s2 = pd.Series(data=seq_data, index=index_seq)
s2

In [None]:
index_date = ['2018-10-17', '2018-10-18', '2018-10-19', '2018-10-20']
s3 = pd.Series([200, 195, np.nan, 205], index=index_date)
s3

In [None]:
s3.index

## 데이터 연산

In [None]:
s1 = pd.Series((range(10, 70, 10)))
s2 = pd.Series((range(1, 6)))



In [None]:
s1

In [None]:
s2

In [None]:
s1+s2

In [None]:
s1-s2

In [None]:
s1*s2

In [None]:
s1/s2

In [None]:
s1**s2

In [None]:
s1//s2

In [None]:
s1%s2

## Dictionary로 시리즈 입력

In [None]:
# key->index, value->value
dict_data = {
    '국어': 100,
    '영어': 95,
    '수학': 80
}
s5 = pd.Series(dict_data)
print(s5)

## 날짜 자동 생성 - date_range 

In [None]:
pd.date_range('2019-05-25', '2019-05-28')

In [None]:
pd.date_range(start='2019-05-25', end='2019-05-28')

In [None]:
pd.date_range(start='2019-05-25', periods=4)

In [None]:
pd.date_range(start='2019-05-25 10:00:00', periods=6)

### 날짜 데이터 포맷 (yyyy-mm-dd)

In [None]:
date_index1 = pd.date_range(start='2019-05-25', periods=4)
date_index2 = pd.date_range(start='2019.05.25', periods=4)
date_index3 = pd.date_range(start='2019/05/25', periods=4)
print('date_index1: ', repr(date_index1))
print('date_index2: ', repr(date_index2))
print('date_index3: ', repr(date_index3))


In [None]:
pd.date_range

In [None]:
date_index4 = pd.date_range(start='05/25/2019', periods=4)
date_index5 = pd.date_range(start='05-25-2019', periods=4)
date_index6 = pd.date_range(start='05.25.2019', periods=4)

print('date_index4: ', repr(date_index4))
print('date_index5: ', repr(date_index5))
print('date_index6: ', repr(date_index6))


### date_range's  time series Frequencies

In [None]:
# 2 day
pd.date_range(start="2019-05-25", periods=8, freq='2D')

In [None]:
# business day
pd.date_range(start="2019-05-25", periods=8, freq='B')

In [None]:
# 3 week
pd.date_range(start="2019-05-25", periods=8, freq='3W')

In [None]:
# 월말 Month Last
pd.date_range(start="2019-05-25", periods=8, freq='M')

In [None]:
# Buisness Month
pd.date_range(start="2019-05-25", periods=8, freq='BM')

In [None]:
# Buisness Month Start
pd.date_range(start="2019-05-25", periods=8, freq='BMS')

In [None]:
# Quarter
pd.date_range(start="2019-05-25", periods=8, freq='Q')

In [None]:
# Year
pd.date_range(start="2019-05-25 10:10", periods=8, freq='A')

In [None]:
# Hour
pd.date_range(start="2019-05-25 10:10", periods=8, freq='H')

In [None]:
# Minutes

pd.date_range(start="2019-05-25 10:10", periods=8, freq='min')
# pd.date_range(start="2019-05-25 10:10", periods=8, freq='T')

In [None]:
# Seconds

pd.date_range(start="2019-05-25 10:10", periods=8, freq='10S')

# DataFrame

Series는 1차원 데이터  
2차원 데이터 처리 필요 (Excel)  => DataFrame


In [None]:
df = pd.DataFrame([[10, 20, 30], [40, 50, 60], [70, 80, 90]])
df

In [None]:
type(df)

In [None]:
np_array = np.arange(10,100,10).reshape(3,3)
df1 = pd.DataFrame(np_array)
df1

## index와 column

In [None]:
df.index

In [None]:
df1.index

In [None]:
# index 지정
df1.index = pd.date_range('2019-05-05', periods=3)
df1

In [None]:
df1.index

In [None]:
# column 지정
df1.columns = ['A', 'B', 'C']
df1

In [None]:
# 초기화시 인자 전달
df2 = pd.DataFrame(np.arange(10, 100, 10).reshape(3,3), 
                  index=pd.date_range('2019-03-05', periods=3, freq='M'), 
                  columns=['A', 'B', 'C'])
df2

## 딕셔너리로 DataFrame 만들기

In [2]:
import pandas as pd
import numpy as np

In [3]:
table_data = {'연도':list(range(2010, 2020)),
              '매출액': [v**2 for i, v in enumerate(range(1,11))],
              '종업원 수': list(range(2, 30, 3))}


df = pd.DataFrame(table_data)
df

Unnamed: 0,연도,매출액,종업원 수
0,2010,1,2
1,2011,4,5
2,2012,9,8
3,2013,16,11
4,2014,25,14
5,2015,36,17
6,2016,49,20
7,2017,64,23
8,2018,81,26
9,2019,100,29


## Head & tail

In [4]:
df

Unnamed: 0,연도,매출액,종업원 수
0,2010,1,2
1,2011,4,5
2,2012,9,8
3,2013,16,11
4,2014,25,14
5,2015,36,17
6,2016,49,20
7,2017,64,23
8,2018,81,26
9,2019,100,29


In [5]:
df.head()

Unnamed: 0,연도,매출액,종업원 수
0,2010,1,2
1,2011,4,5
2,2012,9,8
3,2013,16,11
4,2014,25,14


In [6]:
df.head(3)

Unnamed: 0,연도,매출액,종업원 수
0,2010,1,2
1,2011,4,5
2,2012,9,8


In [7]:
df.tail()

Unnamed: 0,연도,매출액,종업원 수
5,2015,36,17
6,2016,49,20
7,2017,64,23
8,2018,81,26
9,2019,100,29


In [8]:
df.tail(3)

Unnamed: 0,연도,매출액,종업원 수
7,2017,64,23
8,2018,81,26
9,2019,100,29


### index, columns, values

In [9]:
df

Unnamed: 0,연도,매출액,종업원 수
0,2010,1,2
1,2011,4,5
2,2012,9,8
3,2013,16,11
4,2014,25,14
5,2015,36,17
6,2016,49,20
7,2017,64,23
8,2018,81,26
9,2019,100,29


In [10]:
df.index

RangeIndex(start=0, stop=10, step=1)

In [11]:
df.columns

Index(['연도', '매출액', '종업원 수'], dtype='object')

In [12]:
df.values

array([[2010,    1,    2],
       [2011,    4,    5],
       [2012,    9,    8],
       [2013,   16,   11],
       [2014,   25,   14],
       [2015,   36,   17],
       [2016,   49,   20],
       [2017,   64,   23],
       [2018,   81,   26],
       [2019,  100,   29]], dtype=int64)

## 데이터 연산

In [13]:
table_data1 = {
    'A': np.arange(1, 6),
    'B': np.arange(10, 60, 10),
    'C': np.arange(100, 600, 100),
}
df1 = pd.DataFrame(table_data1)
df1

Unnamed: 0,A,B,C
0,1,10,100
1,2,20,200
2,3,30,300
3,4,40,400
4,5,50,500


In [14]:
table_data2 = {
    'A': [6, 7, 8],
    'B': [60, 70, 80],
    'C': [600, 700, 800],
}
df2 = pd.DataFrame(table_data2)
df2

Unnamed: 0,A,B,C
0,6,60,600
1,7,70,700
2,8,80,800


In [15]:
# 덧셈
df1+df2

Unnamed: 0,A,B,C
0,7.0,70.0,700.0
1,9.0,90.0,900.0
2,11.0,110.0,1100.0
3,,,
4,,,


In [16]:
# 뺄셈
df1-df2

Unnamed: 0,A,B,C
0,-5.0,-50.0,-500.0
1,-5.0,-50.0,-500.0
2,-5.0,-50.0,-500.0
3,,,
4,,,


In [None]:
df1*df2

In [None]:
df1/df2

In [None]:
df1**df2

In [None]:
df1%df2

## 통계분석을 위한 method

In [17]:
df1 = pd.DataFrame({'봄': [256.5, 264.3, 215.9, 223.2, 312.8],
                    '여름': [770.6, 567.5, 599.8, 387.1, 446.2],
                    '가을': [363.5, 231.2, 293.1, 247.7, 381.6],
                    '겨울': [139.3, 59.9, 76.9, 109.1, 108.1]},
                    index=['2012', '2013', '2014', '2015', '2016'])
df1

Unnamed: 0,봄,여름,가을,겨울
2012,256.5,770.6,363.5,139.3
2013,264.3,567.5,231.2,59.9
2014,215.9,599.8,293.1,76.9
2015,223.2,387.1,247.7,109.1
2016,312.8,446.2,381.6,108.1


### sum / mean

In [19]:
df1.sum()

봄     1272.7
여름    2771.2
가을    1517.1
겨울     493.3
dtype: float64

In [20]:
df1.sum(axis=1)

2012    1529.9
2013    1122.9
2014    1185.7
2015     967.1
2016    1248.7
dtype: float64

In [21]:
df1.mean()

봄     254.54
여름    554.24
가을    303.42
겨울     98.66
dtype: float64

In [22]:
df1.mean(axis=1)

2012    382.475
2013    280.725
2014    296.425
2015    241.775
2016    312.175
dtype: float64

### min / max

In [23]:
df1.min()

봄     215.9
여름    387.1
가을    231.2
겨울     59.9
dtype: float64

In [24]:
df1.min(axis=1)

2012    139.3
2013     59.9
2014     76.9
2015    109.1
2016    108.1
dtype: float64

In [25]:
df1.max()

봄     312.8
여름    770.6
가을    381.6
겨울    139.3
dtype: float64

In [26]:
df1.max(axis=1)

2012    770.6
2013    567.5
2014    599.8
2015    387.1
2016    446.2
dtype: float64

### std / var

In [27]:
df1.std()

봄      38.628267
여름    148.888895
가을     67.358496
겨울     30.925523
dtype: float64

In [28]:
df1.std(axis=1)

2012    274.472128
2013    211.128782
2014    221.150739
2015    114.166760
2016    146.548658
dtype: float64

In [29]:
df1.var()

봄      1492.143
여름    22167.903
가을     4537.167
겨울      956.388
dtype: float64

In [30]:
df1.var(axis=1)

2012    75334.949167
2013    44575.362500
2014    48907.649167
2015    13034.049167
2016    21476.509167
dtype: float64

### cumsum / cumprod

In [31]:
df1.cumsum()

Unnamed: 0,봄,여름,가을,겨울
2012,256.5,770.6,363.5,139.3
2013,520.8,1338.1,594.7,199.2
2014,736.7,1937.9,887.8,276.1
2015,959.9,2325.0,1135.5,385.2
2016,1272.7,2771.2,1517.1,493.3


In [32]:
df1.cumsum(axis=1)

Unnamed: 0,봄,여름,가을,겨울
2012,256.5,1027.1,1390.6,1529.9
2013,264.3,831.8,1063.0,1122.9
2014,215.9,815.7,1108.8,1185.7
2015,223.2,610.3,858.0,967.1
2016,312.8,759.0,1140.6,1248.7


In [33]:
df1.cumprod()

Unnamed: 0,봄,여름,가을,겨울
2012,256.5,770.6,363.5,139.3
2013,67792.95,437315.5,84041.2,8344.07
2014,14636500.0,262301800.0,24632480.0,641659.0
2015,3266866000.0,101537000000.0,6101464000.0,70005000.0
2016,1021876000000.0,45305830000000.0,2328319000000.0,7567540000.0


In [34]:
df1.cumprod(axis=1)

Unnamed: 0,봄,여름,가을,겨울
2012,256.5,197658.9,71849010.0,10008570000.0
2013,264.3,149990.25,34677750.0,2077197000.0
2014,215.9,129496.82,37955520.0,2918779000.0
2015,223.2,86400.72,21401460.0,2334899000.0
2016,312.8,139571.36,53260430.0,5757453000.0


### describe()

In [35]:
df1.describe()

Unnamed: 0,봄,여름,가을,겨울
count,5.0,5.0,5.0,5.0
mean,254.54,554.24,303.42,98.66
std,38.628267,148.888895,67.358496,30.925523
min,215.9,387.1,231.2,59.9
25%,223.2,446.2,247.7,76.9
50%,256.5,567.5,293.1,108.1
75%,264.3,599.8,363.5,109.1
max,312.8,770.6,381.6,139.3


In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 3 columns):
연도       10 non-null int64
매출액      10 non-null int64
종업원 수    10 non-null int64
dtypes: int64(3)
memory usage: 368.0 bytes


## 인덱싱, 슬라이싱

In [37]:
df1

Unnamed: 0,봄,여름,가을,겨울
2012,256.5,770.6,363.5,139.3
2013,264.3,567.5,231.2,59.9
2014,215.9,599.8,293.1,76.9
2015,223.2,387.1,247.7,109.1
2016,312.8,446.2,381.6,108.1


### Column으로 접근

In [38]:
df1['봄']

2012    256.5
2013    264.3
2014    215.9
2015    223.2
2016    312.8
Name: 봄, dtype: float64

In [39]:
df1['여름']

2012    770.6
2013    567.5
2014    599.8
2015    387.1
2016    446.2
Name: 여름, dtype: float64

### Row로 접근

In [40]:
df1[1:3]

Unnamed: 0,봄,여름,가을,겨울
2013,264.3,567.5,231.2,59.9
2014,215.9,599.8,293.1,76.9


In [41]:
df1[:2]

Unnamed: 0,봄,여름,가을,겨울
2012,256.5,770.6,363.5,139.3
2013,264.3,567.5,231.2,59.9


In [42]:
df1[:'2013']

Unnamed: 0,봄,여름,가을,겨울
2012,256.5,770.6,363.5,139.3
2013,264.3,567.5,231.2,59.9


In [44]:
df1.loc['2013']

봄     264.3
여름    567.5
가을    231.2
겨울     59.9
Name: 2013, dtype: float64

In [45]:
df1.loc['2013']

봄     264.3
여름    567.5
가을    231.2
겨울     59.9
Name: 2013, dtype: float64

In [46]:
df1.loc['2013':'2016']

Unnamed: 0,봄,여름,가을,겨울
2013,264.3,567.5,231.2,59.9
2014,215.9,599.8,293.1,76.9
2015,223.2,387.1,247.7,109.1
2016,312.8,446.2,381.6,108.1


### 결합

In [None]:
df1['봄'][:3]

In [None]:
df1['여름'][:'2014']

In [50]:
df1.loc['2015':, '봄':"가을"]

Unnamed: 0,봄,여름,가을
2015,223.2,387.1,247.7
2016,312.8,446.2,381.6


In [None]:
df1.loc['2016']

### 데이터프레임 전치

In [51]:
df1

Unnamed: 0,봄,여름,가을,겨울
2012,256.5,770.6,363.5,139.3
2013,264.3,567.5,231.2,59.9
2014,215.9,599.8,293.1,76.9
2015,223.2,387.1,247.7,109.1
2016,312.8,446.2,381.6,108.1


In [52]:
df1.T

Unnamed: 0,2012,2013,2014,2015,2016
봄,256.5,264.3,215.9,223.2,312.8
여름,770.6,567.5,599.8,387.1,446.2
가을,363.5,231.2,293.1,247.7,381.6
겨울,139.3,59.9,76.9,109.1,108.1


### 열 순서 변경

In [53]:
df1

Unnamed: 0,봄,여름,가을,겨울
2012,256.5,770.6,363.5,139.3
2013,264.3,567.5,231.2,59.9
2014,215.9,599.8,293.1,76.9
2015,223.2,387.1,247.7,109.1
2016,312.8,446.2,381.6,108.1


In [54]:
df1[['겨울', '봄', '여름', '가을']]

Unnamed: 0,겨울,봄,여름,가을
2012,139.3,256.5,770.6,363.5
2013,59.9,264.3,567.5,231.2
2014,76.9,215.9,599.8,293.1
2015,109.1,223.2,387.1,247.7
2016,108.1,312.8,446.2,381.6


### 행 변경

In [55]:
df1.loc[['2016', '2015', '2014']]

Unnamed: 0,봄,여름,가을,겨울
2016,312.8,446.2,381.6,108.1
2015,223.2,387.1,247.7,109.1
2014,215.9,599.8,293.1,76.9


In [56]:
df2 = pd.DataFrame({
    '봄': [302.9, 256.9],
    '여름': [692.6, 1053.6],
    '가을': [307.6, 225.5],
    '겨울': [98.7, 45.6]},
    index=[2010,2011]
)
df2

Unnamed: 0,봄,여름,가을,겨울
2010,302.9,692.6,307.6,98.7
2011,256.9,1053.6,225.5,45.6


## 데이터 통합하기

In [57]:
df1 = pd.DataFrame({'Class1': [95, 92, 98, 100],
                    'Class2': [91, 93, 98, 100]})
df1

Unnamed: 0,Class1,Class2
0,95,91
1,92,93
2,98,98
3,100,100


In [58]:
df2 = pd.DataFrame({'Class1': [76, 88],
                    'Class2': [100, 100]})
df2

Unnamed: 0,Class1,Class2
0,76,100
1,88,100


### 세로방향 통합

In [59]:
df1.append(df2) 

Unnamed: 0,Class1,Class2
0,95,91
1,92,93
2,98,98
3,100,100
0,76,100
1,88,100


In [60]:
# index 무시
df1.append(df2, ignore_index=True)

Unnamed: 0,Class1,Class2
0,95,91
1,92,93
2,98,98
3,100,100
4,76,100
5,88,100


In [61]:
df1.append(df2).reset_index(drop=True)

Unnamed: 0,Class1,Class2
0,95,91
1,92,93
2,98,98
3,100,100
4,76,100
5,88,100


#### 컬럼이 다 다를때

In [63]:
df3 = pd.DataFrame({'Class1': [80,10], 'Class3': [50, 60]})
df3

Unnamed: 0,Class1,Class3
0,80,50
1,10,60


In [64]:
df1.append(df3, ignore_index=True, sort=True)

Unnamed: 0,Class1,Class2,Class3
0,95,91.0,
1,92,93.0,
2,98,98.0,
3,100,100.0,
4,80,,50.0
5,10,,60.0


### 가로방향 통합하기

In [68]:
df1

Unnamed: 0,Class1,Class2
0,95,91
1,92,93
2,98,98
3,100,100


In [69]:
df4 = pd.DataFrame({'Class3': [92,93,94,91,99]})
df4

Unnamed: 0,Class3
0,92
1,93
2,94
3,91
4,99


In [71]:
df1.join(df4, how="inner")

Unnamed: 0,Class1,Class2,Class3
0,95,91,92
1,92,93,93
2,98,98,94
3,100,100,91


In [72]:
df1.join(df4, how="outer")

Unnamed: 0,Class1,Class2,Class3
0,95.0,91.0,92
1,92.0,93.0,93
2,98.0,98.0,94
3,100.0,100.0,91
4,,,99


In [73]:
df1.join(df4, how="left")

Unnamed: 0,Class1,Class2,Class3
0,95,91,92
1,92,93,93
2,98,98,94
3,100,100,91


In [74]:
df1.join(df4, how="right")

Unnamed: 0,Class1,Class2,Class3
0,95.0,91.0,92
1,92.0,93.0,93
2,98.0,98.0,94
3,100.0,100.0,91
4,,,99


#### index 지정시

In [None]:
df1

In [None]:
df1.index = ['a', 'b', 'c', 'd']
df1

In [None]:
df4

In [None]:
df4.index = list('abcde')
df4

In [None]:
df1.join(df4)

In [None]:
df4.join(df1)

### 특정 열을 기준으로 통합하기

In [75]:
import random
month_list = [str(i)+'월' for i in range(1,5)]

prod_A, prod_B, prod_C, prod_D = [np.random.randint(50, 100, size=4) for i in range(4)]


df_A_B = pd.DataFrame({'판매월': month_list, 
                       '제품A': prod_A,
                       '제품B': prod_B})
df_A_B

Unnamed: 0,판매월,제품A,제품B
0,1월,82,78
1,2월,81,68
2,3월,66,99
3,4월,70,52


In [76]:
df_C_D = pd.DataFrame({'판매월': month_list,
                       '제품C': prod_C,
                       '제품D': prod_D})
df_C_D

Unnamed: 0,판매월,제품C,제품D
0,1월,86,67
1,2월,93,85
2,3월,95,86
3,4월,72,69


### merge

In [77]:
df_A_B.merge(df_C_D)

Unnamed: 0,판매월,제품A,제품B,제품C,제품D
0,1월,82,78,86,67
1,2월,81,68,93,85
2,3월,66,99,95,86
3,4월,70,52,72,69


#### mefge - how

In [78]:
df_A_B

Unnamed: 0,판매월,제품A,제품B
0,1월,82,78
1,2월,81,68
2,3월,66,99
3,4월,70,52


In [79]:
prod_E, prod_F = [np.random.randint(50, 100, size=4) for i in range(2)]
df_E_F = pd.DataFrame({'판매월': ['3월', '4월', '5월', '6월'],
                       '제품E': prod_E,
                       '제품F': prod_F})
df_E_F

Unnamed: 0,판매월,제품E,제품F
0,3월,59,60
1,4월,66,75
2,5월,72,94
3,6월,71,79


In [80]:
df_A_B.merge(df_E_F, how='left')

Unnamed: 0,판매월,제품A,제품B,제품E,제품F
0,1월,82,78,,
1,2월,81,68,,
2,3월,66,99,59.0,60.0
3,4월,70,52,66.0,75.0


In [81]:
df_A_B.merge(df_E_F, how='right')

Unnamed: 0,판매월,제품A,제품B,제품E,제품F
0,3월,66.0,99.0,59,60
1,4월,70.0,52.0,66,75
2,5월,,,72,94
3,6월,,,71,79


In [82]:
df_A_B.merge(df_E_F, how='inner')

Unnamed: 0,판매월,제품A,제품B,제품E,제품F
0,3월,66,99,59,60
1,4월,70,52,66,75


In [83]:
df_A_B.merge(df_E_F, how='outer')

Unnamed: 0,판매월,제품A,제품B,제품E,제품F
0,1월,82.0,78.0,,
1,2월,81.0,68.0,,
2,3월,66.0,99.0,59.0,60.0
3,4월,70.0,52.0,66.0,75.0
4,5월,,,72.0,94.0
5,6월,,,71.0,79.0


In [84]:
df_A_B.merge(df_E_F, how='outer', on="판매월")

Unnamed: 0,판매월,제품A,제품B,제품E,제품F
0,1월,82.0,78.0,,
1,2월,81.0,68.0,,
2,3월,66.0,99.0,59.0,60.0
3,4월,70.0,52.0,66.0,75.0
4,5월,,,72.0,94.0
5,6월,,,71.0,79.0


In [None]:
# df_A_B.join(df_E_F)

# 데이터 파일 읽고 쓰기

## CSV 파일

In [None]:
import os
BASE_DIR = os.getcwd()
DATASET_PATH = os.path.join(BASE_DIR, "datasets\\201904")
print(DATASET_PATH)


In [None]:
file1 = os.path.join(DATASET_PATH, "2019-04-30.csv")
df = pd.read_csv(file1, error_bad_lines=False, header=None)
df

In [None]:
df.columns = ['수집일자', '상품ID', '품목ID', '품목명',  '상품명', '판매가격', '할인가격', "혜택가격"]

df.head()
df.describe()

In [None]:
df2 = df[["수집일자", "상품ID","상품명"]]
df2

In [None]:
df2.to_csv("temp.csv", encoding="cp949")

In [None]:
df1 = pd.read_csv('temp.csv')
# df1 = pd.read_csv('temp.csv', encoding='utf8') # 사실상 표준 # default
# df1 = pd.read_csv('temp.csv', encoding='cp949') # windows

## Excel 파일

In [None]:
df2 = pd.read_excel('file.xlsx')

# data type 지정
# df2 = pd.read_excel('tmp.xlsx', index_col=0, dtype={'Name': str, 'Value': float})  # doctest: +SKIP
# 
# na value 지정
# pd.read_excel('tmp.xlsx', index_col=0,
#               na_values=['string1', 'string2'])

In [None]:
day = "01"
file1 = os.path.join(DATASET_PATH, "2019-04-30.csv")

df = pd.read_csv(file1, error_bad_lines=False, header=None)
df

# 연습문제?

In [None]:
import os
BASE_DIR = os.getcwd()
DATASET_PATH = os.path.join(BASE_DIR, "datasets\\201904")



In [None]:
initial_file = os.path.join(DATASET_PATH, "2019-04-01.csv")

o_df = pd.read_csv(initial_file, header=None, error_bad_lines=False)
o_df.info()

In [None]:
for x in pd.date_range('2019-04-02', periods=29):
    date = x.strftime('%Y-%m-%d')
    filename = "{}.csv".format(date)
    
    filepath = os.path.join(DATASET_PATH, filename)
    df = pd.read_csv(filepath, header=None, error_bad_lines=False)
    o_df = o_df.append(df)

In [None]:
o_df.info()

In [None]:
o_df.head()

In [None]:
o_df.columns = ['수집일자', '상품ID', '품목ID', '품목명',  '상품명', '판매가격', '할인가격', "혜택가격"]
o_df.head()

In [None]:
o_df.to_csv("완성본.csv", encoding="cp949")

In [None]:
if not os.path.exists(DATASET_PATH):
    os.makedirs(DATASET_PATH)

In [None]:
df = pd.read_csv("완성본.csv", encoding="cp949", index_col=0)


In [None]:
df.head()

In [None]:
df_E_F

In [None]:
df1.to_frame()

In [None]:
pd.Panel()