In [1]:
import pandas as pd

### 1. 데이터 읽어오기

In [2]:
# 데이터를 읽어올 웹 페이지의 주소
site = 'https://www.basketball-reference.com/leagues/NBA_2018_games.html'

In [3]:
# 지정된 웹 문서를 통해 table 태그를 데이터프레임으로 생성한다.
tables = pd.read_html(site)
tables

[                  Date Start (ET)        Visitor/Neutral  PTS  \
 0    Tue, Oct 17, 2017      8:01p         Boston Celtics   99   
 1    Tue, Oct 17, 2017     10:30p        Houston Rockets  122   
 2    Wed, Oct 18, 2017      7:00p      Charlotte Hornets   90   
 3    Wed, Oct 18, 2017      7:00p          Brooklyn Nets  131   
 4    Wed, Oct 18, 2017      7:00p             Miami Heat  109   
 ..                 ...        ...                    ...  ...   
 99   Mon, Oct 30, 2017     10:30p  Golden State Warriors  141   
 100  Tue, Oct 31, 2017      7:00p       Sacramento Kings   83   
 101  Tue, Oct 31, 2017      7:30p           Phoenix Suns  122   
 102  Tue, Oct 31, 2017      8:00p  Oklahoma City Thunder  110   
 103  Tue, Oct 31, 2017     10:30p        Detroit Pistons   93   
 
               Home/Neutral  PTS.1 Unnamed: 6 Unnamed: 7  Attend.  Notes  
 0      Cleveland Cavaliers    102  Box Score        NaN    20562    NaN  
 1    Golden State Warriors    121  Box Score        NaN

In [4]:
tables[0]

Unnamed: 0,Date,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Unnamed: 6,Unnamed: 7,Attend.,Notes
0,"Tue, Oct 17, 2017",8:01p,Boston Celtics,99,Cleveland Cavaliers,102,Box Score,,20562,
1,"Tue, Oct 17, 2017",10:30p,Houston Rockets,122,Golden State Warriors,121,Box Score,,19596,
2,"Wed, Oct 18, 2017",7:00p,Charlotte Hornets,90,Detroit Pistons,102,Box Score,,20491,
3,"Wed, Oct 18, 2017",7:00p,Brooklyn Nets,131,Indiana Pacers,140,Box Score,,15008,
4,"Wed, Oct 18, 2017",7:00p,Miami Heat,109,Orlando Magic,116,Box Score,,18846,
...,...,...,...,...,...,...,...,...,...,...
99,"Mon, Oct 30, 2017",10:30p,Golden State Warriors,141,Los Angeles Clippers,113,Box Score,,19068,
100,"Tue, Oct 31, 2017",7:00p,Sacramento Kings,83,Indiana Pacers,101,Box Score,,12245,
101,"Tue, Oct 31, 2017",7:30p,Phoenix Suns,122,Brooklyn Nets,114,Box Score,,12936,
102,"Tue, Oct 31, 2017",8:00p,Oklahoma City Thunder,110,Milwaukee Bucks,91,Box Score,,16713,


In [5]:
# 저장한다.
# 데이터 프레임 저장시 한글이 포함되어 있다면
# encoding='utf-8-sig' 로 설정을 추천한다.
tables[0].to_csv('data/nba_2018.csv', index=False)

### 데이터 파악하기

In [6]:
df1 = pd.read_csv('data/nba_2018.csv')
df1.head()

Unnamed: 0,Date,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Unnamed: 6,Unnamed: 7,Attend.,Notes
0,"Tue, Oct 17, 2017",8:01p,Boston Celtics,99,Cleveland Cavaliers,102,Box Score,,20562,
1,"Tue, Oct 17, 2017",10:30p,Houston Rockets,122,Golden State Warriors,121,Box Score,,19596,
2,"Wed, Oct 18, 2017",7:00p,Charlotte Hornets,90,Detroit Pistons,102,Box Score,,20491,
3,"Wed, Oct 18, 2017",7:00p,Brooklyn Nets,131,Indiana Pacers,140,Box Score,,15008,
4,"Wed, Oct 18, 2017",7:00p,Miami Heat,109,Orlando Magic,116,Box Score,,18846,


In [7]:
# 데이터 프레임 요약정보 확인
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Date             104 non-null    object 
 1   Start (ET)       104 non-null    object 
 2   Visitor/Neutral  104 non-null    object 
 3   PTS              104 non-null    int64  
 4   Home/Neutral     104 non-null    object 
 5   PTS.1            104 non-null    int64  
 6   Unnamed: 6       104 non-null    object 
 7   Unnamed: 7       2 non-null      object 
 8   Attend.          104 non-null    int64  
 9   Notes            0 non-null      float64
dtypes: float64(1), int64(3), object(6)
memory usage: 8.2+ KB


In [8]:
# 로우와 컬럼의 수
df1.shape

(104, 10)

### 데이터 전처리

In [9]:
# 데이터 프레임의 컬럼 이름을 확인한다.
df1.columns

Index(['Date', 'Start (ET)', 'Visitor/Neutral', 'PTS', 'Home/Neutral', 'PTS.1',
       'Unnamed: 6', 'Unnamed: 7', 'Attend.', 'Notes'],
      dtype='object')

In [10]:
# 컬럼 이름을 변경한다.
df1.columns = ['경기일자','경기시간', '방문팀', '방문팀점수',
               '홈팀', '홈팀점수', 'Box', '연장전여부', '관중수', 'Note']
df1.columns

Index(['경기일자', '경기시간', '방문팀', '방문팀점수', '홈팀', '홈팀점수', 'Box', '연장전여부', '관중수',
       'Note'],
      dtype='object')

In [11]:
# 결측치확인
df1.isna().sum()

경기일자       0
경기시간       0
방문팀        0
방문팀점수      0
홈팀         0
홈팀점수       0
Box        0
연장전여부    102
관중수        0
Note     104
dtype: int64

In [12]:
# 연장전여부 컬럼을 확인한다.
df1['연장전여부'].value_counts()

OT    2
Name: 연장전여부, dtype: int64

In [13]:
# 연장전여부가 결측인 것은 NOT으로 채워준다.
df1['연장전여부'].fillna('NOT', inplace=True)
df1['연장전여부'].value_counts()

NOT    102
OT       2
Name: 연장전여부, dtype: int64

In [14]:
df1.isna().sum()

경기일자       0
경기시간       0
방문팀        0
방문팀점수      0
홈팀         0
홈팀점수       0
Box        0
연장전여부      0
관중수        0
Note     104
dtype: int64

In [15]:
# 날짜 데이터를 원하는 양식으로 변경한다.
# https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior

df1['경기일자'] = pd.to_datetime(df1['경기일자'], format='%a, %b %d, %Y')
df1['경기일자']

0     2017-10-17
1     2017-10-17
2     2017-10-18
3     2017-10-18
4     2017-10-18
         ...    
99    2017-10-30
100   2017-10-31
101   2017-10-31
102   2017-10-31
103   2017-10-31
Name: 경기일자, Length: 104, dtype: datetime64[ns]

In [16]:
# Note 컬럼은 제거
df1_na = df1.drop('Note', axis=1)
df1_na

Unnamed: 0,경기일자,경기시간,방문팀,방문팀점수,홈팀,홈팀점수,Box,연장전여부,관중수
0,2017-10-17,8:01p,Boston Celtics,99,Cleveland Cavaliers,102,Box Score,NOT,20562
1,2017-10-17,10:30p,Houston Rockets,122,Golden State Warriors,121,Box Score,NOT,19596
2,2017-10-18,7:00p,Charlotte Hornets,90,Detroit Pistons,102,Box Score,NOT,20491
3,2017-10-18,7:00p,Brooklyn Nets,131,Indiana Pacers,140,Box Score,NOT,15008
4,2017-10-18,7:00p,Miami Heat,109,Orlando Magic,116,Box Score,NOT,18846
...,...,...,...,...,...,...,...,...,...
99,2017-10-30,10:30p,Golden State Warriors,141,Los Angeles Clippers,113,Box Score,NOT,19068
100,2017-10-31,7:00p,Sacramento Kings,83,Indiana Pacers,101,Box Score,NOT,12245
101,2017-10-31,7:30p,Phoenix Suns,122,Brooklyn Nets,114,Box Score,NOT,12936
102,2017-10-31,8:00p,Oklahoma City Thunder,110,Milwaukee Bucks,91,Box Score,NOT,16713


### 필요한 컬럼만 추출해서 정리한다.

In [17]:
a1 = ['경기일자', '방문팀', '방문팀점수', '홈팀', '홈팀점수']
games_nae = df1_na[a1]
games_nae

Unnamed: 0,경기일자,방문팀,방문팀점수,홈팀,홈팀점수
0,2017-10-17,Boston Celtics,99,Cleveland Cavaliers,102
1,2017-10-17,Houston Rockets,122,Golden State Warriors,121
2,2017-10-18,Charlotte Hornets,90,Detroit Pistons,102
3,2017-10-18,Brooklyn Nets,131,Indiana Pacers,140
4,2017-10-18,Miami Heat,109,Orlando Magic,116
...,...,...,...,...,...
99,2017-10-30,Golden State Warriors,141,Los Angeles Clippers,113
100,2017-10-31,Sacramento Kings,83,Indiana Pacers,101
101,2017-10-31,Phoenix Suns,122,Brooklyn Nets,114
102,2017-10-31,Oklahoma City Thunder,110,Milwaukee Bucks,91


In [20]:
# 경기일자 컬럼을 인덱스로 지정한다.
# set_index : 지정된 컬럼을 인덱스로 지정한다.
# append=True : 이전 인덱스를 유지하고 추가로 인덱스를 설정한다.
games_naes = games_nae.set_index('경기일자', append=True)
games_naes

Unnamed: 0_level_0,Unnamed: 1_level_0,방문팀,방문팀점수,홈팀,홈팀점수
Unnamed: 0_level_1,경기일자,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2017-10-17,Boston Celtics,99,Cleveland Cavaliers,102
1,2017-10-17,Houston Rockets,122,Golden State Warriors,121
2,2017-10-18,Charlotte Hornets,90,Detroit Pistons,102
3,2017-10-18,Brooklyn Nets,131,Indiana Pacers,140
4,2017-10-18,Miami Heat,109,Orlando Magic,116
...,...,...,...,...,...
99,2017-10-30,Golden State Warriors,141,Los Angeles Clippers,113
100,2017-10-31,Sacramento Kings,83,Indiana Pacers,101
101,2017-10-31,Phoenix Suns,122,Brooklyn Nets,114
102,2017-10-31,Oklahoma City Thunder,110,Milwaukee Bucks,91


In [22]:
# 인덱스의 이름을 설정한다.
games_naesr = games_naes.rename_axis(['게임', '경기일자'])
games_naesr

Unnamed: 0_level_0,Unnamed: 1_level_0,방문팀,방문팀점수,홈팀,홈팀점수
게임,경기일자,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2017-10-17,Boston Celtics,99,Cleveland Cavaliers,102
1,2017-10-17,Houston Rockets,122,Golden State Warriors,121
2,2017-10-18,Charlotte Hornets,90,Detroit Pistons,102
3,2017-10-18,Brooklyn Nets,131,Indiana Pacers,140
4,2017-10-18,Miami Heat,109,Orlando Magic,116
...,...,...,...,...,...
99,2017-10-30,Golden State Warriors,141,Los Angeles Clippers,113
100,2017-10-31,Sacramento Kings,83,Indiana Pacers,101
101,2017-10-31,Phoenix Suns,122,Brooklyn Nets,114
102,2017-10-31,Oklahoma City Thunder,110,Milwaukee Bucks,91


### 데이터 전처리 체이닝
- 작업순서 예시
- 파일에서 데이터를 읽어와 데이터 프레임을 생성
- 컬럼 이름 변경
- 원하는 컬럼만 겨져오기
- 날짜를 원하는 양식으로 만들어 다시 설정
- 경기날짜를 인덱스로 추가
- 인덱스 이름을 재설정

In [30]:
# 변경될 컬럼 이름 정보
a1 = {
    'Date' : '경기일자',
    'Start (ET)' : '경기시간',
    'Visitor/Neutral' : '방문팀',
    'PTS' : '방문팀점수',
    'Home/Neutral' : '홈팀',
    'PTS.1' : '홈팀점수',
    'Unnamed: 6' : 'Box',
    'Unnamed: 7' : '연장전여부', 
    'Attend.' : '관중수',
    'Notes' : 'Notes'
}

In [37]:
games_all = pd.read_csv('data/nba_2018.csv') \
               .rename(columns=a1) \
              [['경기일자', '방문팀', '방문팀점수', '홈팀', '홈팀점수']] \
               .assign(경기일자=lambda x : pd.to_datetime(x['경기일자'], \
                                           format='%a, %b %d, %Y')) \
               .set_index('경기일자', append=True) \
               .rename_axis(['게임', '경기일자'])

games_all.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,방문팀,방문팀점수,홈팀,홈팀점수
게임,경기일자,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2017-10-17,Boston Celtics,99,Cleveland Cavaliers,102
1,2017-10-17,Houston Rockets,122,Golden State Warriors,121
2,2017-10-18,Charlotte Hornets,90,Detroit Pistons,102
3,2017-10-18,Brooklyn Nets,131,Indiana Pacers,140
4,2017-10-18,Miami Heat,109,Orlando Magic,116


In [42]:
games_all = pd.read_csv('data/nba_2018.csv')
games_all = games_all.rename(columns=a1)
games_all = games_all[['경기일자', '방문팀', '방문팀점수', '홈팀', '홈팀점수']]
games_all['경기일자'] = pd.to_datetime(games_all['경기일자'], format='%a, %b %d, %Y')
games_all.set_index('경기일자', append=True, inplace=True)
games_all.rename_axis(['게임', '경기일자'], inplace=True)
games_all

Unnamed: 0_level_0,Unnamed: 1_level_0,방문팀,방문팀점수,홈팀,홈팀점수
게임,경기일자,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2017-10-17,Boston Celtics,99,Cleveland Cavaliers,102
1,2017-10-17,Houston Rockets,122,Golden State Warriors,121
2,2017-10-18,Charlotte Hornets,90,Detroit Pistons,102
3,2017-10-18,Brooklyn Nets,131,Indiana Pacers,140
4,2017-10-18,Miami Heat,109,Orlando Magic,116
...,...,...,...,...,...
99,2017-10-30,Golden State Warriors,141,Los Angeles Clippers,113
100,2017-10-31,Sacramento Kings,83,Indiana Pacers,101
101,2017-10-31,Phoenix Suns,122,Brooklyn Nets,114
102,2017-10-31,Oklahoma City Thunder,110,Milwaukee Bucks,91


### 데이터 조정해서 확장하기

In [43]:
df = pd.DataFrame({
    'A' : ['a1', 'a2', 'a3', 'a4'],
    'B' : ['b1', 'b2', 'b3', 'b4'],
    'C' : [1, 2, 3, 4],
    'D' : [100, 200, 300, 400, ]
})

df

Unnamed: 0,A,B,C,D
0,a1,b1,1,100
1,a2,b2,2,200
2,a3,b3,3,300
3,a4,b4,4,400


In [44]:
df.melt(id_vars='A', value_vars=['B', 'C'])

Unnamed: 0,A,variable,value
0,a1,B,b1
1,a2,B,b2
2,a3,B,b3
3,a4,B,b4
4,a1,C,1
5,a2,C,2
6,a3,C,3
7,a4,C,4


In [48]:
# 인덱스를 다시 설정한다.
a1 = games_all.reset_index()
a1

Unnamed: 0,게임,경기일자,방문팀,방문팀점수,홈팀,홈팀점수
0,0,2017-10-17,Boston Celtics,99,Cleveland Cavaliers,102
1,1,2017-10-17,Houston Rockets,122,Golden State Warriors,121
2,2,2017-10-18,Charlotte Hornets,90,Detroit Pistons,102
3,3,2017-10-18,Brooklyn Nets,131,Indiana Pacers,140
4,4,2017-10-18,Miami Heat,109,Orlando Magic,116
...,...,...,...,...,...,...
99,99,2017-10-30,Golden State Warriors,141,Los Angeles Clippers,113
100,100,2017-10-31,Sacramento Kings,83,Indiana Pacers,101
101,101,2017-10-31,Phoenix Suns,122,Brooklyn Nets,114
102,102,2017-10-31,Oklahoma City Thunder,110,Milwaukee Bucks,91


In [51]:
tidy = pd.melt(a1, id_vars=['게임', '경기일자'], 
                   value_vars=['방문팀', '홈팀'],
                   var_name='팀구분', value_name='팀')
tidy.query('경기일자 == "2017-10-17"')

Unnamed: 0,게임,경기일자,팀구분,팀
0,0,2017-10-17,방문팀,Boston Celtics
1,1,2017-10-17,방문팀,Houston Rockets
104,0,2017-10-17,홈팀,Cleveland Cavaliers
105,1,2017-10-17,홈팀,Golden State Warriors


In [52]:
# 날짜별 경기 수
# 경기 일자 타입을 파악
tidy['경기일자'].dtype

dtype('<M8[ns]')

In [87]:
# diff 함수 테스트
# diff 함수는 현재 값이 이전 값보다 얼마나 등락을 했을지 계산해서 반환한다.
a1 = tidy.query('팀 == "Boston Celtics"')
# 경기 일자를 기존으로 오름차순 정렬한다.
a2 = a1.sort_values('경기일자')
# 날짜 간격을 구한다.
a2['경기일자'].diff()

0        NaT
110   1 days
19    2 days
154   4 days
66    2 days
79    2 days
195   2 days
Name: 경기일자, dtype: timedelta64[ns]

In [101]:
# 각 팀별로 그룹을 묶고 그 안에서 전날보다 몇일 후에 경기를 했는지
# 계산하여 가져온다.
a1 = tidy.groupby('팀')['경기일자'].diff()
# 날자 차이값을 정수로 가져온다.
a2 = a1.dt.days
# 날짜 차이값이 1일 날짜(연속으로 경기가 있었던 날짜)를 가져온다.
tidy[['경기일자', '팀']][a2 == 1]

Unnamed: 0,경기일자,팀
12,2017-10-18,Houston Rockets
28,2017-10-21,Orlando Magic
30,2017-10-21,Golden State Warriors
32,2017-10-21,Detroit Pistons
33,2017-10-21,Portland Trail Blazers
34,2017-10-21,Sacramento Kings
41,2017-10-23,Atlanta Hawks
59,2017-10-25,Indiana Pacers
61,2017-10-25,Utah Jazz
77,2017-10-28,Oklahoma City Thunder


### 각팀의 휴식일을 구해본다.

In [108]:
# 몇일씩 쉬었는지 계산한다.
ab = tidy.groupby('팀')['경기일자'].diff().dt.days - 1
ab

0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
      ... 
203    1.0
204    1.0
205    1.0
206    4.0
207    3.0
Name: 경기일자, Length: 208, dtype: float64

In [109]:
ab.isna().sum()

30

In [111]:
# 경기 일자를 기준으로 정렬한다.
a1 = tidy.sort_values('경기일자')
# 팀을 기준으로 그룹을 묶는다
a2 = a1.groupby('팀')
# 경기일자를 기준으로 차이값을 구한다.
a3 = a2['경기일자'].diff()
# 1을 빼서 휴식일을 구한다.
tidy['휴식일'] = a3.dt.days - 1
# 결측치(첫경기)를 0으로 채워준다.
tidy.fillna(0, inplace=True)
tidy

Unnamed: 0,게임,경기일자,팀구분,팀,휴식일
0,0,2017-10-17,방문팀,Boston Celtics,0.0
1,1,2017-10-17,방문팀,Houston Rockets,0.0
2,2,2017-10-18,방문팀,Charlotte Hornets,0.0
3,3,2017-10-18,방문팀,Brooklyn Nets,0.0
4,4,2017-10-18,방문팀,Miami Heat,0.0
...,...,...,...,...,...
203,99,2017-10-30,홈팀,Los Angeles Clippers,1.0
204,100,2017-10-31,홈팀,Indiana Pacers,1.0
205,101,2017-10-31,홈팀,Brooklyn Nets,1.0
206,102,2017-10-31,홈팀,Milwaukee Bucks,1.0


In [121]:
# index : 인덱스로 사용할 컬럼을 지정한다.
# columns : 피벗 테이블의 컬럼으로 사용할 컬럼을 지정한다.
# values : 컬럼에 들어갈 값

# index 로 지정된것이 똑같은 것들 끼리 묶고 colums에 지정된
# 컬럼에 값으로 피벗테이블의 컬럼이 지정된다.
# values에 지정된 컬럼의 값이 각 컬럼에 담기 된다.

# 예)
# 	   게임	경기일자	팀구분	    팀	          휴식일
# 0	    0	2017-10-17	   방문팀	Boston Celtics	   0.0
# 104	0	2017-10-17 	   홈팀	Cleveland Cavaliers	0.0

# 게임과 경기일자의 조합이 똑같은 것들 끼리 묶인다.
# 그 안에서 팀구분 컬럼에 들어가 있는 값의 종류만큼
# 컬럼이 생성된다. 생성된 컬럼안에는 휴식일 값이 각각
# 들어가게 된다.

by_game = pd.pivot_table(tidy, values='휴식일', 
                         index=['게임', '경기일자'],
                         columns=['팀구분'])
by_game

Unnamed: 0_level_0,팀구분,방문팀,홈팀
게임,경기일자,Unnamed: 2_level_1,Unnamed: 3_level_1
0,2017-10-17,0.0,0.0
1,2017-10-17,0.0,0.0
2,2017-10-18,0.0,0.0
3,2017-10-18,0.0,0.0
4,2017-10-18,0.0,0.0
...,...,...,...
99,2017-10-30,0.0,1.0
100,2017-10-31,1.0,1.0
101,2017-10-31,2.0,1.0
102,2017-10-31,2.0,1.0


In [122]:
tidy.query('게임 == 0')

Unnamed: 0,게임,경기일자,팀구분,팀,휴식일
0,0,2017-10-17,방문팀,Boston Celtics,0.0
104,0,2017-10-17,홈팀,Cleveland Cavaliers,0.0


In [123]:
# 컬럼 이름 변경
a1 = {
    '방문팀' : '방문팀휴식일',
    '홈팀' : '홈팀휴식일'
}
by_game = by_game.rename(columns=a1)
by_game

Unnamed: 0_level_0,팀구분,방문팀휴식일,홈팀휴식일
게임,경기일자,Unnamed: 2_level_1,Unnamed: 3_level_1
0,2017-10-17,0.0,0.0
1,2017-10-17,0.0,0.0
2,2017-10-18,0.0,0.0
3,2017-10-18,0.0,0.0
4,2017-10-18,0.0,0.0
...,...,...,...
99,2017-10-30,0.0,1.0
100,2017-10-31,1.0,1.0
101,2017-10-31,2.0,1.0
102,2017-10-31,2.0,1.0


In [125]:
# 원본과 합친다.
df = pd.concat([games_all, by_game], axis=1)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,방문팀,방문팀점수,홈팀,홈팀점수,방문팀휴식일,홈팀휴식일
게임,경기일자,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,2017-10-17,Boston Celtics,99,Cleveland Cavaliers,102,0.0,0.0
1,2017-10-17,Houston Rockets,122,Golden State Warriors,121,0.0,0.0
2,2017-10-18,Charlotte Hornets,90,Detroit Pistons,102,0.0,0.0
3,2017-10-18,Brooklyn Nets,131,Indiana Pacers,140,0.0,0.0
4,2017-10-18,Miami Heat,109,Orlando Magic,116,0.0,0.0
...,...,...,...,...,...,...,...
99,2017-10-30,Golden State Warriors,141,Los Angeles Clippers,113,0.0,1.0
100,2017-10-31,Sacramento Kings,83,Indiana Pacers,101,1.0,1.0
101,2017-10-31,Phoenix Suns,122,Brooklyn Nets,114,2.0,1.0
102,2017-10-31,Oklahoma City Thunder,110,Milwaukee Bucks,91,2.0,1.0


### Pivot 테이블 연습 예제

In [126]:
df1 = pd.DataFrame({
    '이름' : ['홍길동', '홍길동', '홍길동', '김길동', '최길동', '최길동'],
    '과목' : ['국어', '영어', '수학', '국어', '영어', '수학'],
    '점수' : [100, 90, 80, 99, 88, 77]
})
df1

Unnamed: 0,이름,과목,점수
0,홍길동,국어,100
1,홍길동,영어,90
2,홍길동,수학,80
3,김길동,국어,99
4,최길동,영어,88
5,최길동,수학,77


In [129]:
a1 = pd.pivot_table(df1, index='이름', columns='과목', values='점수')
a1.fillna(0, inplace=True)
a1

과목,국어,수학,영어
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
김길동,99.0,0.0,0.0
최길동,0.0,77.0,88.0
홍길동,100.0,80.0,90.0
