# 파이썬 리뷰 : 10 minutes to pandas
- 영문 : https://pandas.pydata.org/pandas-docs/stable/user_guide/10min.html
- 한글 : https://dataitgirls2.github.io/10minutes2pandas/

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## 1. Object Creation (객체 생성)

In [None]:
[1,3,5,np.nan,6,8]

In [None]:
s = pd.Series([1,3,5,np.nan,6,8])

In [None]:
s

In [None]:
dates = pd.date_range('20130101', periods=6)

In [None]:
dates

In [None]:
list('ABCD')

In [None]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))

In [None]:
df

In [None]:
1.

In [None]:
pd.Timestamp('20130102')

In [None]:
list(range(4))

In [None]:
pd.Series(1,index=list(range(4)),dtype='float32')

In [None]:
list(range(4))

In [None]:
df2 = pd.DataFrame({'A' : 1.,
                    'B' : pd.Timestamp('20130102'),
                    'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                    'D' : np.array([3] * 4,dtype='int32'),
                    'E' : pd.Categorical(["test","train","test","train"]),
                    'F' : 'foo' })

In [None]:
df2

In [None]:
df

In [None]:
df.dtypes

In [None]:
df2

In [None]:
df2.dtypes

In [None]:
type(df)

## 2. Viewing Data (데이터 확인하기)

In [None]:
df

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.tail(10)

In [None]:
df.index

In [None]:
df.columns

In [None]:
df.values

In [None]:
df

In [None]:
df.describe()

In [None]:
df2.describe()

In [None]:
df2

In [None]:
df.T

In [None]:
df

In [None]:
df.sort_index(axis=1, ascending=True)

In [None]:
df.sort_values(by='A')

## 3. Selection (선택)

In [None]:
df['A']

In [None]:
df

In [None]:
df[0:3]

In [None]:
df['20130102':'20130104']

In [None]:
df.loc[dates[0]]

In [None]:
dates[0]

In [None]:
df.loc[:,['A','B']]

In [None]:
df.loc[dates[0],['A','B']]

In [None]:
df.loc['20130102':'20130104', ['A','B']]

In [None]:
df.loc['20130102',['A','B']]

In [None]:
df.loc[dates[0],'A']

In [None]:
df.iloc[3]

In [None]:
df

In [None]:
df.iloc[3:5,0:2]

In [None]:
df.iloc[[1,2,4],[0,2]]

In [None]:
df.iloc[1:3,:]

In [None]:
df.iloc[:,1:3]

In [None]:
df.iloc[1,1]

In [None]:
df

In [None]:
df[df['A']>0]

In [None]:
df[df.A > 0]

In [None]:
df[df['A'] < 0]

In [None]:
df[df > 0]

In [None]:
df1 = df

In [None]:
df1

In [None]:
df1['E'] = ['one', 'one', 'two', 'three', 'four', 'three']

In [None]:
df1

In [None]:
df

In [None]:
df2 = df.copy()

In [None]:
df2['F'] = ['one', 'one', 'two', 'three', 'four', 'three']

In [None]:
df2

In [None]:
df1

In [None]:
df2['E'].isin(['two','four'])

In [None]:
df2[df2['E'].isin(['two','four'])]

In [None]:
s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6))

In [None]:
s1

In [None]:
df['F'] = s1

In [None]:
df

In [None]:
df.loc[dates[0],'A'] = 0

In [None]:
df

In [None]:
df.iloc[0,1] = 0

In [None]:
df

In [None]:
df.loc[:,'D']

In [None]:
df.loc[:,'D'] = np.array([5] * len(df))

In [None]:
df

In [None]:
df['E'] = ""

In [None]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))

In [None]:
df

In [None]:
df2 = df.copy()

In [None]:
df2

In [None]:
df2[df2 > 0]

In [None]:
-df2

In [None]:
df2[df2 > 0] = -df2

In [None]:
df2

In [None]:
df

In [None]:
df5 = df2[df2 > 0]

In [None]:
df5.iloc[0,0]

In [None]:
df5

## 4. Missing Data (결측치)

In [None]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])

In [None]:
df1

In [None]:
df1.loc[dates[0]:dates[1],'E']

In [None]:
df1.loc[dates[0]:dates[1],'E'] = 1

In [None]:
df1

In [None]:
df1.dropna(how='any')

In [None]:
df1.fillna(value=5)

In [None]:
pd.isna(df1)

## 5. Operation (연산)

In [None]:
df.mean()

In [None]:
df

In [None]:
df.mean(1)

In [None]:
s = pd.Series([1,3,5,np.nan,6,8], index=dates)

In [None]:
s

In [None]:
s.shift(2)

In [None]:
df

In [None]:
df.sub(s, axis='index')

In [None]:
df

In [None]:
df.apply(np.cumsum)

In [None]:
s = pd.Series(np.random.randint(0, 7, size=10))

In [None]:
s

In [None]:
s.value_counts()

In [None]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])

In [None]:
s

In [None]:
s.str.lower()

In [None]:
s.str.upper()

## 6. Merge (병합)

In [None]:
df = pd.DataFrame(np.random.randn(10, 4))

In [None]:
df

In [None]:
# break it into pieces
pieces = [df[:3], df[3:7], df[7:]]

In [None]:
pieces

In [None]:
pieces[2]

In [None]:
pd.concat(pieces)

### Join (결합)

In [None]:
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})

In [None]:
left

In [None]:
right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})

In [None]:
right

In [None]:
pd.merge(left, right, on= 'key')

In [None]:
left = pd.DataFrame({'key' : ['foo', 'bar'], 'lval' : [1, 2]})

In [None]:
right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})

In [None]:
left

In [None]:
right

In [None]:
pd.merge(left, right, on= 'key')

### Append (추가)

In [None]:
df = pd.DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D'])

In [None]:
df

In [None]:
s = df.iloc[3]

In [None]:
s

In [None]:
df.append(s, ignore_index=True)

In [None]:
df.append(s).reset_index()

## 7. Grouping (그룹화)

In [None]:
df = pd.DataFrame(
    {
        'A' : ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
        'B' : ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
        'C' : np.random.randn(8),
        'D' : np.random.randn(8)
    })

In [None]:
df

In [None]:
df.groupby('A').sum()

In [None]:
df.groupby(['A','B']).sum()

## 8. Reshaping (변형)

In [None]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
                     'foo', 'foo', 'qux', 'qux'],
                    ['one', 'two', 'one', 'two',
                     'one', 'two', 'one', 'two']]))

In [None]:
tuples

In [None]:
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])

In [None]:
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])

In [None]:
df

In [None]:
df2  =  df[:4]

In [None]:
df2

In [None]:
stacked = df2.stack()

In [None]:
stacked

In [None]:
stacked.unstack()

In [None]:
stacked.unstack(1)

In [None]:
stacked.unstack(0)

### Pivot Tables (피봇 테이블)

In [None]:
df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
                   'B' : ['A', 'B', 'C'] * 4,
                   'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
                   'D' : np.random.randn(12),
                   'E' : np.random.randn(12)})

In [None]:
df

In [None]:
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])

## 9. Time Series (시계열)

In [None]:
rng = pd.date_range('1/1/2012', periods=100, freq='S')

In [None]:
rng

In [None]:
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)

In [None]:
ts

In [None]:
ts.resample('5Min').sum()

In [None]:
rng = pd.date_range('3/6/2012 00:00', periods=5, freq='D')

In [None]:
rng

In [None]:
ts = pd.Series(np.random.randn(len(rng)), rng)

In [None]:
ts

In [None]:
ts_utc = ts.tz_localize('UTC')

In [None]:
ts_utc

In [None]:
ts_utc.tz_convert('US/Eastern')

## 10. Categoricals (범주화)

In [None]:
df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})

In [None]:
df

In [None]:
df["raw_grade"].astype("category")

In [None]:
df["grade"] = df["raw_grade"].astype("category")

In [None]:
df["grade"].cat.categories = ["very good", "good", "very bad"]

In [None]:
df

In [None]:
df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", "good", "very good"])

In [None]:
df

In [None]:
df.sort_values(by="grade")

In [None]:
df.groupby("grade").size()

## 11. Plotting (그래프)

In [None]:
ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))

In [None]:
ts = ts.cumsum()

In [None]:
ts

In [None]:
ts.plot()

In [None]:
df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index,
                  columns=['A', 'B', 'C', 'D'])  

In [None]:
df = df.cumsum()

In [None]:
plt.figure(); df.plot(); plt.legend(loc='best')

## 12. Getting Data In / Out (데이터 입 / 출력)

In [None]:
df

In [None]:
df.to_csv('output/foo.csv')

In [None]:
pd.read_csv('output/foo.csv')

In [None]:
df.to_hdf('output/foo.h5','df')

In [None]:
pd.read_hdf('output/foo.h5','df')

In [None]:
df.to_excel('output/foo.xlsx', sheet_name='Sheet1')

In [None]:
pd.read_excel('output/foo.xlsx', 'Sheet1', index_col=None, na_values=['NA'])

## 13. Gotchas (잡았다!)

In [None]:
pd.Series([False, True, False])

In [None]:
### 오류 발생
# if pd.Series([False, True, False]):
#     print("I was true")

In [None]:
if pd.Series([False, True, False])is not None:
      print("I was not None")