In [1]:
import pandas as pd
pd.options.display.max_columns = 30    # 출력할 최대 컬럼개수. 이보다 많을 경우 줄여서 출력
pd.options.display.max_rows = 100         # 출력할 최대 행수.
pd.options.display.max_colwidth = 100          # 출력할 cell의 최대 글자수.

# DataFrame 개요

-   **표(table-행렬)** 를 다루는 Pandas의 타입.
    -   Database의 Table이나 Excel의 표와 동일한 역할을 한다.
-   분석할 데이터를 가지는 판다스의 가장 핵심적인 클래스이다.
-   **행(row)와 열(column)** 으로 구성되 있다.
-   각 행과 각 열은 식별자를 가지며 Series와 같이 두가지 종류가 있다.
    -   **순번**
        -   양수, 음수 index 두가지를 가진다.
        -   컬럼도 내부적으로는 순번으로 관리되지만 우리가 조회할 때 사용할 수는 없다.
    -   **이름**
        -   명시적으로 지정한 행과 열의 이름을 말한다.
        -   행의 이름은 **index name** 이라고 하고 열의 이름은 **column name**이라고 한다.
        -   index name과 column name은 **중복될 수 있다.**
        -   명시적으로 지정하지 않으면 양수 순번이 index, column 이름으로 설정된다.
-   하나의 행과 하나의 열은 Series로 구성된다.
-   DataFrame 객체는 직접 데이터를 넣어 생성하거나 데이터 셋을 파일(csv, 엑셀, DB 등)로 부터 읽어와 생성한다.

# DataFrame 생성

## 직접 생성

-   `pd.DataFrame(data [, index=None, columns=None])`
    -   data
        -   DataFrame을 구성할 값을 설정
            -   Series, List, ndarray를 담은 2차원 배열
            -   열이름을 key로 컬럼의 값 value로 하는 딕션어리(사전)
    -   index
        -   index명으로 사용할 값 배열로 설정
    -   columns
        -   컬럼명으로 사용할 값 배열로 설정

In [2]:
import pandas as pd
import numpy as np

# 딕셔너리를 이용해서 생성
## key: 컬럼(열) 이름, value: 각 행에 들어갈 값들을 가지는 1차원 자료구조

d = {
    "id": ["id-"+str(i) for i in range(1,6)],
    "korean": [100,50,70,60,90],
    "english": [90,80,100,100,40]
}
grade = pd.DataFrame(d)
print(grade)
grade

     id  korean  english
0  id-1     100       90
1  id-2      50       80
2  id-3      70      100
3  id-4      60      100
4  id-5      90       40


Unnamed: 0,id,korean,english
0,id-1,100,90
1,id-2,50,80
2,id-3,70,100
3,id-4,60,100
4,id-5,90,40


```
index이름(행이름)    id  korean  english   <- column name(컬럼명)
0                         id-1     100       90
1                         id-2      50       80
2                         id-3      70      100
3                         id-4      60      100
4                         id-5      90       40
```

In [3]:
# 2차원 자료구조(리스트,튜블)를 이용
l = [
    [10,20,30,40],
    [100,200,300,400],
    range(1000,4001,1000),
]
df = pd.DataFrame(l)
df

Unnamed: 0,0,1,2,3
0,10,20,30,40
1,100,200,300,400
2,1000,2000,3000,4000


In [4]:
df2 = pd.DataFrame(
                    l,
                    columns=["col1", "col2", "col3", "col4"], # 컬럼명 지정.
                    index=["row1", "row2", "row3"]    # 행이름 지정
)
df2

Unnamed: 0,col1,col2,col3,col4
row1,10,20,30,40
row2,100,200,300,400
row3,1000,2000,3000,4000


## DataFrame에 저장된 값들을 파일에 저장

-   DataFrame객체는 다양한 형식의 파일로 저장할 수 있다.
-   기본구문
    -   **`DataFrame객체.to_저장형식()`**

### CSV 파일로 저장

-   `DataFrame객체.to_csv(파일경로,sep=',', index=True, header=True)`
    -   텍스트 파일로 저장
    -   파일경로: 저장할 파일경로(경로/파일명)
    -   sep : 데이터 구분자
    -   index, header: 인덱스/헤더 저장 여부
-   encoding방식: UTF-8 로 저장된다.

> -   csv (comma separate value)
>     -   표(table)을 text파일에 작성하는 형식
>     -   한행에 한개의 데이터를 입력
>     -   속성값들을 `,` 로 구분
>
> ```csv
> 10,20,30
> 100,10,5
> 1,40,70
> ```

In [5]:
import os
# 디렉토리 생성
os.makedirs("data", exist_ok=True)

In [6]:
grade.to_csv("data/grade1.csv")

In [7]:
# index name은 저장 안하기. (양수 index를 index name으로 사용한 경우)
grade.to_csv("data/grade2.csv", index=False)

In [8]:
# Header name (컬럼명) 을 저장하지 않기
grade.to_csv("data/grade3.csv", 
            index=False, # index name 저장안하기
            header=False # column name 저장안함.
            )
# index name, column name을 저장하지 않는 경우: 자동증가 index로 생성된 경우.

In [9]:
# 인코딩 방식 설정 -> 기본: utf-8
grade.to_csv("data/grade4.csv",encoding="cp949")

In [10]:
# value 구분를 "," 대신 다른 것을 사용하는 경우.
grade.to_csv("data/grade5.csv", sep='\t', index=False)

### 엑셀로 저장

-   `DataFrame객체.to_excel(파일경로, index=True, header=True)`

In [11]:
# !pip install openpyxl

In [12]:
grade.to_excel("data/grade_1.xlsx")

### 기타 형식

In [13]:
grade.to_pickle("data/grade.pickle")

In [14]:
grade.to_html("data/grade.html",index=False)

In [15]:
grade.to_json("data/grade.json")

## 파일로 부터 데이터셋을 읽어와 생성하기

### csv 파일 등 텍스트 파일로 부터 읽어와 생성

-   `pd.read_csv(파일경로, sep=',', header, index_col, na_values)`
    -   **파일경로**
        -   읽어올 파일의 경로
    -   **sep**=","
        -   데이터 구분자.
        -   기본값: 쉼표
    -   **header**=정수
        -   열이름(컬럼이름)으로 사용할 행 지정
        -   기본값: 첫번째 행
        -   None을 설정하면 Header는 없다는 것으로 파일의 첫번째 행부터 값으로 사용하고 컬럼명은 0부터 자동증가하는 값을 붙인다.
    -   **index_col**=정수,컬럼명
        -   index 명으로 사용할 열이름(문자열)이나 열의 순번(정수)을 지정.
        -   생략시 0부터 자동증가하는 값을 붙인다.
    -   **na_values**
        -   읽어올 데이터셋의 값 중 결측치로 처리할 문자열 지정.

In [16]:
# 특정 컬럼을 index name으로 사용
df2 = pd.read_csv("data/grade1.csv",index_col=0)
df2

Unnamed: 0,id,korean,english
0,id-1,100,90
1,id-2,50,80
2,id-3,70,100
3,id-4,60,100
4,id-5,90,40


In [17]:
df3 = pd.read_csv("data/grade2.csv")
df3

Unnamed: 0,id,korean,english
0,id-1,100,90
1,id-2,50,80
2,id-3,70,100
3,id-4,60,100
4,id-5,90,40


In [18]:
df4 = pd.read_csv("data/grade2.csv", index_col="id")
df4

Unnamed: 0_level_0,korean,english
id,Unnamed: 1_level_1,Unnamed: 2_level_1
id-1,100,90
id-2,50,80
id-3,70,100
id-4,60,100
id-5,90,40


In [19]:
df5 = pd.read_csv("data/grade3.csv",
                        header=None, 
                        names=["ID", "국어", "영어"]) # 컬럼명 지정. (생략 - 0, 1, 2)
df5

Unnamed: 0,ID,국어,영어
0,id-1,100,90
1,id-2,50,80
2,id-3,70,100
3,id-4,60,100
4,id-5,90,40


In [20]:
# , 이외의 구분자를 쓴 경우 sep="구분자" 로 지정한다.
df7 = pd.read_csv("data/grade5.csv", sep="\t")
df7

Unnamed: 0,id,korean,english
0,id-1,100,90
1,id-2,50,80
2,id-3,70,100
3,id-4,60,100
4,id-5,90,40


In [21]:
# 결측치가 있는 dataframe 생성
g = {
    "id": ["id-"+str(i) for i in range(1, 6)], 
    "korean": [100, "", 70, 60, 90],
    "english": [90, 80, 100, 100, ""]
}

gf = pd.DataFrame(g)
gf.to_csv("data/grade6.csv",index=False)

In [22]:
df8 = pd.read_csv("data/grade6.csv")
df8

Unnamed: 0,id,korean,english
0,id-1,100.0,90.0
1,id-2,,80.0
2,id-3,70.0,100.0
3,id-4,60.0,100.0
4,id-5,90.0,


In [23]:
df9 = pd.read_csv("data/grade6.csv", na_values="모름")
df9
# 결측치로 읽는 문자열 - NA, N/A

Unnamed: 0,id,korean,english
0,id-1,100.0,90.0
1,id-2,,80.0
2,id-3,70.0,100.0
3,id-4,60.0,100.0
4,id-5,90.0,


In [24]:
df9.isna()

Unnamed: 0,id,korean,english
0,False,False,False
1,False,True,False
2,False,False,False
3,False,False,False
4,False,False,True


### 기타 다른 형식

In [25]:
# html 
# !pip install lxml

In [26]:
# html
result = pd.read_html("data/grade.html")
print(type(result), len(result))
result[0]

<class 'list'> 1


Unnamed: 0,id,korean,english
0,id-1,100,90
1,id-2,50,80
2,id-3,70,100
3,id-4,60,100
4,id-5,90,40


In [27]:
url = "https://search.naver.com/search.naver?where=nexearch&sm=top_hty&fbm=0&ie=utf8&query=%EC%95%BC%EA%B5%AC&ackey=taakczwf"
result2 = pd.read_html(url)
print(len(result2))

10


In [28]:
result2[0]

Unnamed: 0,경기시간,경기상태,경기장소,중계채널,중계&기록
0,18:30,롯데박세웅패 2 : 3 LG승치리노스,잠실,-,
1,18:30,KIA김도현패 3 : 21 한화승류현진,대전,-,
2,18:30,키움메르세데스패 1 : 6 SSG승앤더슨,문학,-,
3,18:30,NC손주환승 9 : 4 KT패고영표,수원,-,


In [29]:
result2[1]

Unnamed: 0,경기시간,경기상태,경기장소,중계채널,중계&기록
0,18:30,SSG박시후승 2 : 1 KIA패네일,광주,중계채널,
1,18:30,롯데김원중패 8 : 9 KT승박영현,수원,중계채널,
2,18:30,키움오석주승 4 : 3 삼성패이승민,대구,중계채널,
3,18:30,NC이준혁패 5 : 6 한화승정우주,대전,중계채널,


## 주요 메소드, 속성
- **T** : 행/열을 바꾼다
  
- **head()/tail()** : 데이터 정수 행만큼 조회. 기본값: 5
- **shape, size** : shape 행렬의 수를 튜플로 리턴. size: 원소의 총 개수
- **columns / index** : 열의 이름/index 이름을 조회 및 변경
- **describe()** : 요약 통계량 제공, 수치형: 기술 통계값 / 범주형(문자열): 고유 값 개수 등 빈도수관련 정보
            include나 exclude 매개변수로 특정 타입만 선택가능
- **info()** : 각 열 별 데이터 타입과 결측치 개수를 조회
- **isin([값리스트])** : 데이터 프레임 내 각 원소가 값 리스트에 있는 값과 같으면 True 다르면 False 체크
- **count()** : 열별 결측치를 제외한 원소 개수
- **min(), max(), sum(), mean(), median(), std(), var(), mode(), idxmax(), idxmin()**
- **nunique()** : 열별 고유 값의 개수 조회
- **quantile(q=분위)** : 열별 분위수 계산. q 생략 시 0.5
- **isnull()-notnull() / isna()-notna()** : 열별 결측치 체크, 각 원소의 결측치 여부를 Series로 반환
- **fillna(변환값)** : 결측치를 한 번에 특정 값으로 변환
- **dropna()** : 결측치가 있는 행/열 제거(Series는 결측치인 원소 제거)
- **sort_values(기준열 이름리스트, ascending=True)** : 전달할 열 이름을 기준으로 정렬


## 데이터 프레임의 기본 정보 조회

-   csv 파일 읽기
-   shape
-   info()
-   head()
-   tail()
-   isnull().sum()
    -   컬럼별 null 체크 (sum() 한번 더 하면 총개수)
-   describe() : 숫자형-기술통계값, 범주형-총개수, 고유값들, 최빈값

In [30]:
import pandas as pd
df = pd.read_csv("data/movie.csv")
df.shape # 차원(축:axis)별 size(데이터수) - DataFrame 형태
# (4916: 행수, 28: 열)
# 행수 X 열수 -> 4916 X 28

(4916, 28)

In [31]:
# DataFrame 자체에 대한 정보를 제공
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4916 entries, 0 to 4915
Data columns (total 28 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   color                      4897 non-null   object 
 1   director_name              4814 non-null   object 
 2   num_critic_for_reviews     4867 non-null   float64
 3   duration                   4901 non-null   float64
 4   director_facebook_likes    4814 non-null   float64
 5   actor_3_facebook_likes     4893 non-null   float64
 6   actor_2_name               4903 non-null   object 
 7   actor_1_facebook_likes     4909 non-null   float64
 8   gross                      4054 non-null   float64
 9   genres                     4916 non-null   object 
 10  actor_1_name               4909 non-null   object 
 11  movie_title                4916 non-null   object 
 12  num_voted_users            4916 non-null   int64  
 13  cast_total_facebook_likes  4916 non-null   int64

In [32]:
df.head()

Unnamed: 0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,movie_title,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
0,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,Action|Adventure|Fantasy|Sci-Fi,CCH Pounder,Avatar,886204,4834,Wes Studi,0.0,avatar|future|marine|native|paraplegic,http://www.imdb.com/title/tt0499549/?ref_=fn_tt_tt_1,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
1,Color,Gore Verbinski,302.0,169.0,563.0,1000.0,Orlando Bloom,40000.0,309404152.0,Action|Adventure|Fantasy,Johnny Depp,Pirates of the Caribbean: At World's End,471220,48350,Jack Davenport,0.0,goddess|marriage ceremony|marriage proposal|pirate|singapore,http://www.imdb.com/title/tt0449088/?ref_=fn_tt_tt_1,1238.0,English,USA,PG-13,300000000.0,2007.0,5000.0,7.1,2.35,0
2,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,Action|Adventure|Thriller,Christoph Waltz,Spectre,275868,11700,Stephanie Sigman,1.0,bomb|espionage|sequel|spy|terrorist,http://www.imdb.com/title/tt2379713/?ref_=fn_tt_tt_1,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000
3,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,Tom Hardy,The Dark Knight Rises,1144337,106759,Joseph Gordon-Levitt,0.0,deception|imprisonment|lawlessness|police officer|terrorist plot,http://www.imdb.com/title/tt1345836/?ref_=fn_tt_tt_1,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
4,,Doug Walker,,,131.0,,Rob Walker,131.0,,Documentary,Doug Walker,Star Wars: Episode VII - The Force Awakens,8,143,,0.0,,http://www.imdb.com/title/tt5289954/?ref_=fn_tt_tt_1,,,,,,,12.0,7.1,,0


In [33]:
df.tail()

Unnamed: 0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,movie_title,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
4911,Color,Scott Smith,1.0,87.0,2.0,318.0,Daphne Zuniga,637.0,,Comedy|Drama,Eric Mabius,Signed Sealed Delivered,629,2283,Crystal Lowe,2.0,fraud|postal worker|prison|theft|trial,http://www.imdb.com/title/tt3000844/?ref_=fn_tt_tt_1,6.0,English,Canada,,,2013.0,470.0,7.7,,84
4912,Color,,43.0,43.0,,319.0,Valorie Curry,841.0,,Crime|Drama|Mystery|Thriller,Natalie Zea,The Following,73839,1753,Sam Underwood,1.0,cult|fbi|hideout|prison escape|serial killer,http://www.imdb.com/title/tt2071645/?ref_=fn_tt_tt_1,359.0,English,USA,TV-14,,,593.0,7.5,16.0,32000
4913,Color,Benjamin Roberds,13.0,76.0,0.0,0.0,Maxwell Moody,0.0,,Drama|Horror|Thriller,Eva Boehnke,A Plague So Pleasant,38,0,David Chandler,0.0,,http://www.imdb.com/title/tt2107644/?ref_=fn_tt_tt_1,3.0,English,USA,,1400.0,2013.0,0.0,6.3,,16
4914,Color,Daniel Hsia,14.0,100.0,0.0,489.0,Daniel Henney,946.0,10443.0,Comedy|Drama|Romance,Alan Ruck,Shanghai Calling,1255,2386,Eliza Coupe,5.0,,http://www.imdb.com/title/tt2070597/?ref_=fn_tt_tt_1,9.0,English,USA,PG-13,,2012.0,719.0,6.3,2.35,660
4915,Color,Jon Gunn,43.0,90.0,16.0,16.0,Brian Herzlinger,86.0,85222.0,Documentary,John August,My Date with Drew,4285,163,Jon Gunn,0.0,actress name in title|crush|date|four word title|video camera,http://www.imdb.com/title/tt0378407/?ref_=fn_tt_tt_1,84.0,English,USA,PG,1100.0,2004.0,23.0,6.6,1.85,456


In [34]:
# 결측치 확인 -> 컬럼별로 결측치 개수
# df.isna() # cell 별로 확인
# bool(논리값)으로 산술연산(sum() +) -> True: 1, False: 0
##  bool 타입 Series.sum(): True개수, Series.mean(): 전체 데이터 중 True 비율
##  DataFrame객체.집계함수() => 열(컬럼) 단위로 집계
df.isna().sum()

color                         19
director_name                102
num_critic_for_reviews        49
duration                      15
director_facebook_likes      102
actor_3_facebook_likes        23
actor_2_name                  13
actor_1_facebook_likes         7
gross                        862
genres                         0
actor_1_name                   7
movie_title                    0
num_voted_users                0
cast_total_facebook_likes      0
actor_3_name                  23
facenumber_in_poster          13
plot_keywords                152
movie_imdb_link                0
num_user_for_reviews          21
language                      14
country                        5
content_rating               300
budget                       484
title_year                   106
actor_2_facebook_likes        13
imdb_score                     0
aspect_ratio                 326
movie_facebook_likes           0
dtype: int64

In [35]:
df.isnull().mean() # 결측치 비율

color                        0.003865
director_name                0.020749
num_critic_for_reviews       0.009967
duration                     0.003051
director_facebook_likes      0.020749
actor_3_facebook_likes       0.004679
actor_2_name                 0.002644
actor_1_facebook_likes       0.001424
gross                        0.175346
genres                       0.000000
actor_1_name                 0.001424
movie_title                  0.000000
num_voted_users              0.000000
cast_total_facebook_likes    0.000000
actor_3_name                 0.004679
facenumber_in_poster         0.002644
plot_keywords                0.030919
movie_imdb_link              0.000000
num_user_for_reviews         0.004272
language                     0.002848
country                      0.001017
content_rating               0.061025
budget                       0.098454
title_year                   0.021562
actor_2_facebook_likes       0.002644
imdb_score                   0.000000
aspect_ratio

In [36]:
### 기본적인 컬럼별 통계량들을 확인
df.describe()  #수치형 타입(int, float) 컬럼들의 통계량을 묶어줌.

Unnamed: 0,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_1_facebook_likes,gross,num_voted_users,cast_total_facebook_likes,facenumber_in_poster,num_user_for_reviews,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
count,4867.0,4901.0,4814.0,4893.0,4909.0,4054.0,4916.0,4916.0,4903.0,4895.0,4432.0,4810.0,4903.0,4916.0,4590.0,4916.0
mean,137.988905,107.090798,691.014541,631.276313,6494.488491,47644510.0,82644.92,9579.815907,1.37732,267.668846,36547490.0,2002.447609,1621.923516,6.437429,2.222349,7348.294142
std,120.239379,25.286015,2832.954125,1625.874802,15106.986884,67372550.0,138322.2,18164.31699,2.023826,372.934839,100242700.0,12.453977,4011.299523,1.127802,1.40294,19206.016458
min,1.0,7.0,0.0,0.0,0.0,162.0,5.0,0.0,0.0,1.0,218.0,1916.0,0.0,1.6,1.18,0.0
25%,49.0,93.0,7.0,132.0,607.0,5019656.0,8361.75,1394.75,0.0,64.0,6000000.0,1999.0,277.0,5.8,1.85,0.0
50%,108.0,103.0,48.0,366.0,982.0,25043960.0,33132.5,3049.0,1.0,153.0,19850000.0,2005.0,593.0,6.6,2.35,159.0
75%,191.0,118.0,189.75,633.0,11000.0,61108410.0,93772.75,13616.75,2.0,320.5,43000000.0,2011.0,912.0,7.2,2.35,2000.0
max,813.0,511.0,23000.0,23000.0,640000.0,760505800.0,1689764.0,656730.0,43.0,5060.0,4200000000.0,2016.0,137000.0,9.5,16.0,349000.0


In [37]:
df.describe(include=["object"]) #include=['보려는타입지정', ...]

Unnamed: 0,color,director_name,actor_2_name,genres,actor_1_name,movie_title,actor_3_name,plot_keywords,movie_imdb_link,language,country,content_rating
count,4897,4814,4903,4916,4909,4916,4893,4764,4916,4902,4911,4616
unique,2,2397,3030,914,2095,4916,3519,4756,4916,46,65,18
top,Color,Steven Spielberg,Morgan Freeman,Drama,Robert De Niro,Avatar,Steve Coogan,based on novel,http://www.imdb.com/title/tt0499549/?ref_=fn_tt_tt_1,English,USA,R
freq,4693,26,18,233,48,1,8,4,1,4582,3710,2067


In [38]:
df.describe(include=['int64'])

Unnamed: 0,num_voted_users,cast_total_facebook_likes,movie_facebook_likes
count,4916.0,4916.0,4916.0
mean,82644.92,9579.815907,7348.294142
std,138322.2,18164.31699,19206.016458
min,5.0,0.0,0.0
25%,8361.75,1394.75,0.0
50%,33132.5,3049.0,159.0
75%,93772.75,13616.75,2000.0
max,1689764.0,656730.0,349000.0


In [39]:
df.describe().T
# 행과 열의 위치를 변경. (Transpose, 전치)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
num_critic_for_reviews,4867.0,137.9889,120.2394,1.0,49.0,108.0,191.0,813.0
duration,4901.0,107.0908,25.28602,7.0,93.0,103.0,118.0,511.0
director_facebook_likes,4814.0,691.0145,2832.954,0.0,7.0,48.0,189.75,23000.0
actor_3_facebook_likes,4893.0,631.2763,1625.875,0.0,132.0,366.0,633.0,23000.0
actor_1_facebook_likes,4909.0,6494.488,15106.99,0.0,607.0,982.0,11000.0,640000.0
gross,4054.0,47644510.0,67372550.0,162.0,5019656.25,25043962.0,61108412.75,760505800.0
num_voted_users,4916.0,82644.92,138322.2,5.0,8361.75,33132.5,93772.75,1689764.0
cast_total_facebook_likes,4916.0,9579.816,18164.32,0.0,1394.75,3049.0,13616.75,656730.0
facenumber_in_poster,4903.0,1.37732,2.023826,0.0,0.0,1.0,2.0,43.0
num_user_for_reviews,4895.0,267.6688,372.9348,1.0,64.0,153.0,320.5,5060.0


# 컬럼이름/행이름 조회 및 변경

## 컬럼이름/행이름 조회

-   **DataFrame객체.columns**
    -   컬럼명 조회
    -   컬럼명은 차후 조회를 위해 따로 변수에 저장하는 것이 좋다.
-   **DataFrame객체.index**
    -   행명 조회

In [40]:
df.columns # 컬럼명

Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',
       'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
       'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
       'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
       'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
       'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
       'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
       'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],
      dtype='object')

In [41]:
df.columns[0]

'color'

In [42]:
df.index # index name

RangeIndex(start=0, stop=4916, step=1)

## 컬럼이름/행이름 변경

-   columns와 index 속성으로는 통째로 바꾸는 것은 가능하나 일부만 선택해서 변경하는 것은 안된다.
    -   `df.columns = ['새이름','새이름', ... , '새이름']`
    -   `df.columns[1] = '새이름'`
        -   이런식으로 개별적으로 변경은 안된다.

In [43]:
grade = pd.read_csv("data/grade2.csv")
grade

Unnamed: 0,id,korean,english
0,id-1,100,90
1,id-2,50,80
2,id-3,70,100
3,id-4,60,100
4,id-5,90,40


In [44]:
grade.columns

Index(['id', 'korean', 'english'], dtype='object')

In [45]:
# 컬럼명 변경
grade.columns = ["ID","국어","영어"]

In [46]:
# 행이름 변경
grade.index = [f'{i+1}번'for i in range(5)]
grade

Unnamed: 0,ID,국어,영어
1번,id-1,100,90
2번,id-2,50,80
3번,id-3,70,100
4번,id-4,60,100
5번,id-5,90,40


### 컬럼이름/행이름 변경 관련 메소드

-   `DataFrame객체.rename(index=행이름변경설정, columns=열이름변경설정, inplace=False)`
    -   **개별 컬럼이름/행이름 변경** 하는 메소드
    -   변경한 DataFrame을 반환
    -   변경설정: 딕셔너리 사용
        -   {'기존이름':'새이름', ..}
        -   inplace: 원본을 변경할지 여부(boolean)


In [47]:
# 특정(개별) 컬럼명들, 행이름들을 변경. - rename()
## dictionary : {원래이름:바꿀이름}
new_columns = {
    "ID": "학생 이름", 
    "국어": "국어1"
}
new_index = {
    "1번": "일번", 
    "3번": "삼번"
}

grade.rename(index=new_index, columns=new_columns,inplace=True)

-   `DataFrame객체.set_index(컬럼이름, inplace=False)`
    -   특정 컬럼을 행의 index 명으로 사용
    -   열이 index명이 되면서 그 컬럼은 Data Set 에서 제거된다.
-   `DataFrame객체.reset_index(inplace=False)`
    -   index를 첫번째 컬럼으로 복원

In [48]:
# 특정 컬럼을 index로 만들기.  - set_index()
### 컬럼 중 행 식별자 값들을 가진 컬럼을  index로 뺀다. 
grade2 = grade.set_index("학생 이름")
grade2

Unnamed: 0_level_0,국어1,영어
학생 이름,Unnamed: 1_level_1,Unnamed: 2_level_1
id-1,100,90
id-2,50,80
id-3,70,100
id-4,60,100
id-5,90,40


In [49]:
## reset_index()
### 1. index name을 컬럼으로 이동
### 2. index name를 제거하고 양수 index로 변경.
grade2.reset_index()

Unnamed: 0,학생 이름,국어1,영어
0,id-1,100,90
1,id-2,50,80
2,id-3,70,100
3,id-4,60,100
4,id-5,90,40


In [50]:
grade2.reset_index(drop=True)

Unnamed: 0,국어1,영어
0,100,90
1,50,80
2,70,100
3,60,100
4,90,40


# 행과 열의 값 변경

## 특정 행 또는 열 삭제

-   DataFrame객체.drop(columns, index, inplace=False)
    -   columns : 삭제할 열이름 또는 열이름 리스트
    -   index : 삭제할 index명 또는 index 리스트
    -   inplace: 원본을 변경할지 여부(boolean)

In [51]:
grade.drop(index=['일번','삼번'])

Unnamed: 0,학생 이름,국어1,영어
2번,id-2,50,80
4번,id-4,60,100
5번,id-5,90,40


In [52]:
grade.drop(columns=['국어1','학생 이름'])

Unnamed: 0,영어
일번,90
2번,80
삼번,100
4번,100
5번,40


In [53]:
grade.drop(labels="일번", axis=0)
# axis=0: 행, axis=1: 열

Unnamed: 0,학생 이름,국어1,영어
2번,id-2,50,80
삼번,id-3,70,100
4번,id-4,60,100
5번,id-5,90,40


## 열 추가

-   새로운 열을 지정 후 값을 대입하면 새로운 열을 추가할 수 있다.
    -   보통 **파생변수**를 만들 때 사용한다.
-   **열 추가**
    -   `df['새열명'] = 값`
    -   마지막 열로 추가된다.
    -   하나의 값을 대입하면 모든 행에 그 값이 대입된다.
    -   다른 값을 주려면 배열에 담아서 대입한다.
-   **열 삽입**
    -   `df.insert(삽입할 위치 index, 삽입할 열이름, 값)`
-   **파생변수생성**
    -   **기존 열들의 값을 이용해서 만든 열을 파생변수라고 한다.**
    -   벡터화 연산을 이용하여 값 대입한다.
    -   df\['새열이름'\] = 기존 열들을 이용한 연산

In [54]:
grade = pd.read_csv("data/grade2.csv") #, index_col=0)
grade.set_index("id", inplace=True)
grade

Unnamed: 0_level_0,korean,english
id,Unnamed: 1_level_1,Unnamed: 2_level_1
id-1,100,90
id-2,50,80
id-3,70,100
id-4,60,100
id-5,90,40


In [55]:
grade['math'] = 100 # grade['없는컬럼명'] = 값 : 새로운 컬럼을 생성
grade

Unnamed: 0_level_0,korean,english,math
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
id-1,100,90,100
id-2,50,80,100
id-3,70,100,100
id-4,60,100,100
id-5,90,40,100


In [56]:
grade['history'] = [100, 90, 95, 80, 70]  # 행수와 동일한 개수의 값들
grade

Unnamed: 0_level_0,korean,english,math,history
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
id-1,100,90,100,100
id-2,50,80,100,90
id-3,70,100,100,95
id-4,60,100,100,80
id-5,90,40,100,70


In [57]:
# DataFrame['컬럼명'] => 컬럼의 값 조회
grade['math'] = 80 #DF["있는컬럼명"] = 값 : 변경
grade

Unnamed: 0_level_0,korean,english,math,history
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
id-1,100,90,80,100
id-2,50,80,80,90
id-3,70,100,80,95
id-4,60,100,80,80
id-5,90,40,80,70


In [58]:
grade.insert(1,'korean2',90)

In [59]:
# 컬럼을 중간에 삽입
grade.insert(4,"math2",[90,80,100,65,70])
grade

Unnamed: 0_level_0,korean,korean2,english,math,math2,history
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
id-1,100,90,90,80,90,100
id-2,50,90,80,80,80,90
id-3,70,90,100,80,100,95
id-4,60,90,100,80,65,80
id-5,90,90,40,80,70,70


In [60]:
# 한개 열(컬럼) 조회 결과 타입 : Series
# 여러개 컬럼 조회 결과 타입 : DataFrame
### 총점 -> 파생변수 (기존 컬럼의 값들을 처리한 결과로 만들어진 컬럼.)
grade['총점'] = grade['korean'] + grade['korean2'] + grade['english'] + grade["math"] + grade["math2"] + grade['history']
grade

Unnamed: 0_level_0,korean,korean2,english,math,math2,history,총점
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
id-1,100,90,90,80,90,100,550
id-2,50,90,80,80,80,90,470
id-3,70,90,100,80,100,95,535
id-4,60,90,100,80,65,80,475
id-5,90,90,40,80,70,70,440


In [61]:
grade['평균'] = round(grade['총점']/6,2)
grade

Unnamed: 0_level_0,korean,korean2,english,math,math2,history,총점,평균
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
id-1,100,90,90,80,90,100,550,91.67
id-2,50,90,80,80,80,90,470,78.33
id-3,70,90,100,80,100,95,535,89.17
id-4,60,90,100,80,65,80,475,79.17
id-5,90,90,40,80,70,70,440,73.33


In [62]:
grade2 = grade.drop(columns=['총점','평균'])
total = grade2.sum(axis=1)
avg = round(grade2.mean(axis=1),2)
grade2['total'] = total
grade2['avg'] = avg
grade2

Unnamed: 0_level_0,korean,korean2,english,math,math2,history,total,avg
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
id-1,100,90,90,80,90,100,550,91.67
id-2,50,90,80,80,80,90,470,78.33
id-3,70,90,100,80,100,95,535,89.17
id-4,60,90,100,80,65,80,475,79.17
id-5,90,90,40,80,70,70,440,73.33


<b style='font-size:2.2em'>TODO</b>

-   패스 여부를 boolean값으로 저장하는 파생변수 열을 추가
    -   열이름: 통과여부
    -   평균점수가 80미만이면 False,이상이면 True가 나오도록 처리

In [63]:
grade2['pass'] = grade2['avg'] >= 80
grade2


Unnamed: 0_level_0,korean,korean2,english,math,math2,history,total,avg,pass
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
id-1,100,90,90,80,90,100,550,91.67,True
id-2,50,90,80,80,80,90,470,78.33,False
id-3,70,90,100,80,100,95,535,89.17,True
id-4,60,90,100,80,65,80,475,79.17,False
id-5,90,90,40,80,70,70,440,73.33,False


In [64]:
import numpy as np
grade['통과여부'] = np.where(grade['평균'] >= 80, "성공", "실패")
grade

Unnamed: 0_level_0,korean,korean2,english,math,math2,history,총점,평균,통과여부
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
id-1,100,90,90,80,90,100,550,91.67,성공
id-2,50,90,80,80,80,90,470,78.33,실패
id-3,70,90,100,80,100,95,535,89.17,성공
id-4,60,90,100,80,65,80,475,79.17,실패
id-5,90,90,40,80,70,70,440,73.33,실패


# 행, 열의 값 조회

-   indexer 연산자를 이용한다.
    -   열 조회는 indexer 연산자를 사용한다.
    -   행 조회는 loc-indexer(행이름으로 조회), iloc-indexer(행순번으로조회)를 사용한다.
-   열은 slicing 조회는 안된다.
-   행은 indexing, slicing 모두 지원한다.

## 열의 값 조회

-   **df['열이름']**
    -   열이름의 열 조회
-   **df.열이름**
    -   열이름이 Python 식별자 규칙에 맞으면 `. 표기법` 을 사용할 수 있다.
-   **Fancy indexing**
    -   여러개의 열들을 한번에 조회할 경우 조회할 **열 이름들을 리스트**로 묶어서 전달한다.
-   **주의**
    -   열은 **순번으로는 조회할 수 없다.**
    -   열 조회 indexer에서 슬라이싱을 하면 **행 조회 Slicing이다.**
        -   **만약 indexing이나 slicing을 이용해 열들을 조회하려면 columns 속성을 이용한다.**
            -   `df[df.columns[:3]]`

In [65]:
result = grade["평균"]
result
# 한 개 컬럼 => Series => index name: 행의이름, value: 조회한 컬럼값

id
id-1    91.67
id-2    78.33
id-3    89.17
id-4    79.17
id-5    73.33
Name: 평균, dtype: float64

In [66]:
print(result.name, result.dtype)

평균 float64


In [67]:
# 한번에 여러 컬럼 조회: fancy indexing -> 결과: DataFrame
grade[['평균', '총점']]

Unnamed: 0_level_0,평균,총점
id,Unnamed: 1_level_1,Unnamed: 2_level_1
id-1,91.67,550
id-2,78.33,470
id-3,89.17,535
id-4,79.17,475
id-5,73.33,440


In [68]:
# slicing => df.columns 이용
# grade[0:6]  # 행조회
grade[grade.columns[:6]]

Unnamed: 0_level_0,korean,korean2,english,math,math2,history
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
id-1,100,90,90,80,90,100
id-2,50,90,80,80,80,90
id-3,70,90,100,80,100,95
id-4,60,90,100,80,65,80
id-5,90,90,40,80,70,70


In [69]:
# 열, 행 -> 컬럼조회는 한번에 못함
grade['korean']['id-2']

np.int64(50)

In [70]:
grade[['korean', 'math']].loc['id-3']  #df.loc[] -> 행이름(index name)으로 조회.

korean    70
math      80
Name: id-3, dtype: int64

<b style='font-size:2.2em'>TODO</b>

-   `data/movie.csv` 을 DataFrame으로 읽은 뒤 다음 문제를 푸세요.

In [71]:
movie = pd.read_csv("data/movie.csv")

In [72]:
movie.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4916 entries, 0 to 4915
Data columns (total 28 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   color                      4897 non-null   object 
 1   director_name              4814 non-null   object 
 2   num_critic_for_reviews     4867 non-null   float64
 3   duration                   4901 non-null   float64
 4   director_facebook_likes    4814 non-null   float64
 5   actor_3_facebook_likes     4893 non-null   float64
 6   actor_2_name               4903 non-null   object 
 7   actor_1_facebook_likes     4909 non-null   float64
 8   gross                      4054 non-null   float64
 9   genres                     4916 non-null   object 
 10  actor_1_name               4909 non-null   object 
 11  movie_title                4916 non-null   object 
 12  num_voted_users            4916 non-null   int64  
 13  cast_total_facebook_likes  4916 non-null   int64

In [73]:
# 1. director_name 컬럼의 값들 조회
movie['director_name']

0           James Cameron
1          Gore Verbinski
2              Sam Mendes
3       Christopher Nolan
4             Doug Walker
              ...        
4911          Scott Smith
4912                  NaN
4913     Benjamin Roberds
4914          Daniel Hsia
4915             Jon Gunn
Name: director_name, Length: 4916, dtype: object

In [74]:
# 2. actor_1_name, actor_2_name, actor_3_name 컬럼의 값들
movie[['actor_1_name','actor_2_name','actor_3_name']]

Unnamed: 0,actor_1_name,actor_2_name,actor_3_name
0,CCH Pounder,Joel David Moore,Wes Studi
1,Johnny Depp,Orlando Bloom,Jack Davenport
2,Christoph Waltz,Rory Kinnear,Stephanie Sigman
3,Tom Hardy,Christian Bale,Joseph Gordon-Levitt
4,Doug Walker,Rob Walker,
...,...,...,...
4911,Eric Mabius,Daphne Zuniga,Crystal Lowe
4912,Natalie Zea,Valorie Curry,Sam Underwood
4913,Eva Boehnke,Maxwell Moody,David Chandler
4914,Alan Ruck,Daniel Henney,Eliza Coupe


In [75]:
# 3. 1, 3, 4, 7 번 컬럼 조회('director_name', 'duration', 'director_facebook_likes', 'actor_1_facebook_likes')
movie[movie.columns[[3,1,3,4,7]]]

Unnamed: 0,duration,director_name,duration.1,director_facebook_likes,actor_1_facebook_likes
0,178.0,James Cameron,178.0,0.0,1000.0
1,169.0,Gore Verbinski,169.0,563.0,40000.0
2,148.0,Sam Mendes,148.0,0.0,11000.0
3,164.0,Christopher Nolan,164.0,22000.0,27000.0
4,,Doug Walker,,131.0,131.0
...,...,...,...,...,...
4911,87.0,Scott Smith,87.0,2.0,637.0
4912,43.0,,43.0,,841.0
4913,76.0,Benjamin Roberds,76.0,0.0,0.0
4914,100.0,Daniel Hsia,100.0,0.0,946.0


In [76]:
# 4. 1 ~ 5 번 컬럼 조회('director_name', 'num_critic_for_reviews', 'duration', director_facebook_likes', 'actor_3_facebook_likes')
movie[movie.columns[:6]]

Unnamed: 0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes
0,Color,James Cameron,723.0,178.0,0.0,855.0
1,Color,Gore Verbinski,302.0,169.0,563.0,1000.0
2,Color,Sam Mendes,602.0,148.0,0.0,161.0
3,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0
4,,Doug Walker,,,131.0,
...,...,...,...,...,...,...
4911,Color,Scott Smith,1.0,87.0,2.0,318.0
4912,Color,,43.0,43.0,,319.0
4913,Color,Benjamin Roberds,13.0,76.0,0.0,0.0
4914,Color,Daniel Hsia,14.0,100.0,0.0,489.0


### 열조회 기능 메소드들

-   **`select_dtypes(include=[데이터타입,..], exclude=[데이터타입,..])`**
    -   **데이터 타입으로 열 조회**
    -   include : 조회할 열 데이터 타입
    -   exclude : 제외하고 조회할 열 데이터 타입
-   **`filter (items=[], like='', regex='')`**
    -   세가지 열 조회방식을 제공한다.
        -   각 방식을 같이 사용할 수 없다. (한번에 한가지 방식만 사용가능하다.)
    -   파라미터
        -   **items = \[컬럼명들, ..\]**
            -   리스트와 일치하는 열들 조회
            -   이름이 일치 하지 않아도 Error 발생안함.
        -   **like = '부분일치문자열'**
            -   전달한 문자열이 들어간 열들 조회
            -   부분일치 개념
        -   **regex = '정규표현식'**
            -   **정규 표현식을 포함한 컬럼명**으로 조회

In [77]:
grade3 = grade.reset_index()
grade3

Unnamed: 0,id,korean,korean2,english,math,math2,history,총점,평균,통과여부
0,id-1,100,90,90,80,90,100,550,91.67,성공
1,id-2,50,90,80,80,80,90,470,78.33,실패
2,id-3,70,90,100,80,100,95,535,89.17,성공
3,id-4,60,90,100,80,65,80,475,79.17,실패
4,id-5,90,90,40,80,70,70,440,73.33,실패


In [78]:
grade.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, id-1 to id-5
Data columns (total 9 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   korean   5 non-null      int64  
 1   korean2  5 non-null      int64  
 2   english  5 non-null      int64  
 3   math     5 non-null      int64  
 4   math2    5 non-null      int64  
 5   history  5 non-null      int64  
 6   총점       5 non-null      int64  
 7   평균       5 non-null      float64
 8   통과여부     5 non-null      object 
dtypes: float64(1), int64(7), object(1)
memory usage: 572.0+ bytes


In [79]:
grade3.select_dtypes(include=['object','bool'])

Unnamed: 0,id,통과여부
0,id-1,성공
1,id-2,실패
2,id-3,성공
3,id-4,실패
4,id-5,실패


In [80]:
grade3.select_dtypes(exclude=['object','bool'])

Unnamed: 0,korean,korean2,english,math,math2,history,총점,평균
0,100,90,90,80,90,100,550,91.67
1,50,90,80,80,80,90,470,78.33
2,70,90,100,80,100,95,535,89.17
3,60,90,100,80,65,80,475,79.17
4,90,90,40,80,70,70,440,73.33


In [81]:
grade3.filter(like="통과")

Unnamed: 0,통과여부
0,성공
1,실패
2,성공
3,실패
4,실패


In [82]:
grade3.insert(3,'korean3',80)
grade3

Unnamed: 0,id,korean,korean2,korean3,english,math,math2,history,총점,평균,통과여부
0,id-1,100,90,80,90,80,90,100,550,91.67,성공
1,id-2,50,90,80,80,80,80,90,470,78.33,실패
2,id-3,70,90,80,100,80,100,95,535,89.17,성공
3,id-4,60,90,80,100,80,65,80,475,79.17,실패
4,id-5,90,90,80,40,80,70,70,440,73.33,실패


In [83]:
# \d : 숫자 1개   $ 끝.
# ^: 시작, \w: 정수 또는 글자 또는 공백
# {n} : n 글자수
grade3.filter(regex=r"\d$")  # 정수로 끝나는 이름의 컬럼들을 조회

Unnamed: 0,korean2,korean3,math2
0,90,80,90
1,90,80,80
2,90,80,100
3,90,80,65
4,90,80,70


In [84]:
grade3.filter(regex=r"^\w{4}$") # 4글자인 컬럼

Unnamed: 0,math,통과여부
0,80,성공
1,80,실패
2,80,성공
3,80,실패
4,80,실패


In [85]:
grade3.filter(items=["math", "math2", "music"]) # 없는 컬럼 조회시 무시

Unnamed: 0,math,math2
0,80,90
1,80,80
2,80,100
3,80,65
4,80,70


<b style='font-size:2.2em'>TODO</b>

다음은 movie dataframe을 이용해 아래 코드를 작성하시오.

In [86]:
# 1. 정수형(int64) 컬럼만 조회
movie.select_dtypes(include='int64')

Unnamed: 0,num_voted_users,cast_total_facebook_likes,movie_facebook_likes
0,886204,4834,33000
1,471220,48350,0
2,275868,11700,85000
3,1144337,106759,164000
4,8,143,0
...,...,...,...
4911,629,2283,84
4912,73839,1753,32000
4913,38,0,16
4914,1255,2386,660


In [87]:
# 2. 정수형(int64)과 실수형(float64) 타입을 제외한 컬럼들만 조회
movie.select_dtypes(exclude=['int64','float64'])

Unnamed: 0,color,director_name,actor_2_name,genres,actor_1_name,movie_title,actor_3_name,plot_keywords,movie_imdb_link,language,country,content_rating
0,Color,James Cameron,Joel David Moore,Action|Adventure|Fantasy|Sci-Fi,CCH Pounder,Avatar,Wes Studi,avatar|future|marine|native|paraplegic,http://www.imdb.com/title/tt0499549/?ref_=fn_tt_tt_1,English,USA,PG-13
1,Color,Gore Verbinski,Orlando Bloom,Action|Adventure|Fantasy,Johnny Depp,Pirates of the Caribbean: At World's End,Jack Davenport,goddess|marriage ceremony|marriage proposal|pirate|singapore,http://www.imdb.com/title/tt0449088/?ref_=fn_tt_tt_1,English,USA,PG-13
2,Color,Sam Mendes,Rory Kinnear,Action|Adventure|Thriller,Christoph Waltz,Spectre,Stephanie Sigman,bomb|espionage|sequel|spy|terrorist,http://www.imdb.com/title/tt2379713/?ref_=fn_tt_tt_1,English,UK,PG-13
3,Color,Christopher Nolan,Christian Bale,Action|Thriller,Tom Hardy,The Dark Knight Rises,Joseph Gordon-Levitt,deception|imprisonment|lawlessness|police officer|terrorist plot,http://www.imdb.com/title/tt1345836/?ref_=fn_tt_tt_1,English,USA,PG-13
4,,Doug Walker,Rob Walker,Documentary,Doug Walker,Star Wars: Episode VII - The Force Awakens,,,http://www.imdb.com/title/tt5289954/?ref_=fn_tt_tt_1,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
4911,Color,Scott Smith,Daphne Zuniga,Comedy|Drama,Eric Mabius,Signed Sealed Delivered,Crystal Lowe,fraud|postal worker|prison|theft|trial,http://www.imdb.com/title/tt3000844/?ref_=fn_tt_tt_1,English,Canada,
4912,Color,,Valorie Curry,Crime|Drama|Mystery|Thriller,Natalie Zea,The Following,Sam Underwood,cult|fbi|hideout|prison escape|serial killer,http://www.imdb.com/title/tt2071645/?ref_=fn_tt_tt_1,English,USA,TV-14
4913,Color,Benjamin Roberds,Maxwell Moody,Drama|Horror|Thriller,Eva Boehnke,A Plague So Pleasant,David Chandler,,http://www.imdb.com/title/tt2107644/?ref_=fn_tt_tt_1,English,USA,
4914,Color,Daniel Hsia,Daniel Henney,Comedy|Drama|Romance,Alan Ruck,Shanghai Calling,Eliza Coupe,,http://www.imdb.com/title/tt2070597/?ref_=fn_tt_tt_1,English,USA,PG-13


In [88]:
# 3. actor_1_name, actor_2_name, actor_3_name 컬럼의 값을 조회
movie.filter(regex=r"actor_\d_name")
movie.filter(regex=r"actor_[123]_name") #[123] : 1 or 2 or 3 한글자

Unnamed: 0,actor_2_name,actor_1_name,actor_3_name
0,Joel David Moore,CCH Pounder,Wes Studi
1,Orlando Bloom,Johnny Depp,Jack Davenport
2,Rory Kinnear,Christoph Waltz,Stephanie Sigman
3,Christian Bale,Tom Hardy,Joseph Gordon-Levitt
4,Rob Walker,Doug Walker,
...,...,...,...
4911,Daphne Zuniga,Eric Mabius,Crystal Lowe
4912,Valorie Curry,Natalie Zea,Sam Underwood
4913,Maxwell Moody,Eva Boehnke,David Chandler
4914,Daniel Henney,Alan Ruck,Eliza Coupe


In [89]:
# 4. actor_1_facebook_likes, actor_1_name 컬럼의 값을 조회
movie.filter(like="actor_1")

Unnamed: 0,actor_1_facebook_likes,actor_1_name
0,1000.0,CCH Pounder
1,40000.0,Johnny Depp
2,11000.0,Christoph Waltz
3,27000.0,Tom Hardy
4,131.0,Doug Walker
...,...,...
4911,637.0,Eric Mabius
4912,841.0,Natalie Zea
4913,0.0,Eva Boehnke
4914,946.0,Alan Ruck


In [90]:
# 5. movie가 들어가는 컬럼들을 조회.
movie.filter(like='movie')

Unnamed: 0,movie_title,movie_imdb_link,movie_facebook_likes
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_tt_tt_1,33000
1,Pirates of the Caribbean: At World's End,http://www.imdb.com/title/tt0449088/?ref_=fn_tt_tt_1,0
2,Spectre,http://www.imdb.com/title/tt2379713/?ref_=fn_tt_tt_1,85000
3,The Dark Knight Rises,http://www.imdb.com/title/tt1345836/?ref_=fn_tt_tt_1,164000
4,Star Wars: Episode VII - The Force Awakens,http://www.imdb.com/title/tt5289954/?ref_=fn_tt_tt_1,0
...,...,...,...
4911,Signed Sealed Delivered,http://www.imdb.com/title/tt3000844/?ref_=fn_tt_tt_1,84
4912,The Following,http://www.imdb.com/title/tt2071645/?ref_=fn_tt_tt_1,32000
4913,A Plague So Pleasant,http://www.imdb.com/title/tt2107644/?ref_=fn_tt_tt_1,16
4914,Shanghai Calling,http://www.imdb.com/title/tt2070597/?ref_=fn_tt_tt_1,660


## 행 조회

-   **loc** : index 이름으로 조회
-   **iloc** : 행 순번으로 조회

### loc indexer

-   index name으로 조회
-   **`DF.loc[ index이름 ]`**
    -   한 행 조회.
    -   조회할 행 index 이름(레이블) 전달
    -   이름이 문자열이면 " " 문자열표기법으로 전달. 정수이며 정수표기법으로 전달한다.
-   **`DF.loc[ index이름 리스트 ]`**
    -   여러 행 조회.
    -   팬시 인덱스
    -   조회할 행 index 이름(레이블) 리스트 전달
-   **`DF.loc[start index이름 : end index이름: step]`**
    -   슬라이싱 지원
    -   end index 이름의 행까지 포함한다.
-   **`DF.loc[index이름 , 컬럼이름]`**
    -   행과 열 조회
    -   둘다 이름으로 지정해야 함.

In [91]:
# 한 행: Series 반환. index name : 컬럼명
grade.loc['id-1']

korean       100
korean2       90
english       90
math          80
math2         90
history      100
총점           550
평균         91.67
통과여부          성공
Name: id-1, dtype: object

In [92]:
# 여러행: fancy indexing => DataFrame으로 반환
grade.loc[['id-1','id-4','id-2']]

Unnamed: 0_level_0,korean,korean2,english,math,math2,history,총점,평균,통과여부
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
id-1,100,90,90,80,90,100,550,91.67,성공
id-4,60,90,100,80,65,80,475,79.17,실패
id-2,50,90,80,80,80,90,470,78.33,실패


In [93]:
grade['id-2' : 'id-4']  # loc indexer로 slicing 하면 stop 포함

Unnamed: 0_level_0,korean,korean2,english,math,math2,history,총점,평균,통과여부
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
id-2,50,90,80,80,80,90,470,78.33,실패
id-3,70,90,100,80,100,95,535,89.17,성공
id-4,60,90,100,80,65,80,475,79.17,실패


In [94]:
grade['id-4':'id-1':-1] # 역순 조회

Unnamed: 0_level_0,korean,korean2,english,math,math2,history,총점,평균,통과여부
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
id-4,60,90,100,80,65,80,475,79.17,실패
id-3,70,90,100,80,100,95,535,89.17,성공
id-2,50,90,80,80,80,90,470,78.33,실패
id-1,100,90,90,80,90,100,550,91.67,성공


In [95]:
# 행, 열을 모두 지정 - df.loc[ 행 , 열]
grade.loc['id-2':'id-4', ['korean', 'math', 'history']]    #id-2 ~ id-4 인 사람들의 korean, math, history


Unnamed: 0_level_0,korean,math,history
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
id-2,50,80,90
id-3,70,80,95
id-4,60,80,80


### iloc

-   index(행 순번)으로 조회
-   **`DF.iloc[행번호]`**
    -   한 행 조회.
    -   조회할 행 번호 전달
-   **`DF.iloc[ 행번호 리스트 ]`**
    -   여러 행 조회.
    -   조회할 행 번호 리스트 전달
-   **`DF.iloc[start 행번호: stop 행번호: step]`**
    -   슬라이싱 지원
    -   stop 행번호 포함 안함.
-   **`DF.iloc[행번호 , 열번호]`**
    -   행과 열 조회
    -   행열 모두 순번으로 지정

In [96]:
grade.iloc[1]

korean        50
korean2       90
english       80
math          80
math2         80
history       90
총점           470
평균         78.33
통과여부          실패
Name: id-2, dtype: object

In [97]:
grade.iloc[[0, -2, 1]]

Unnamed: 0_level_0,korean,korean2,english,math,math2,history,총점,평균,통과여부
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
id-1,100,90,90,80,90,100,550,91.67,성공
id-4,60,90,100,80,65,80,475,79.17,실패
id-2,50,90,80,80,80,90,470,78.33,실패


In [98]:
grade.iloc[1:3] # stop은 포함 안함.
grade.iloc[:3]
grade.iloc[4:1:-1]

Unnamed: 0_level_0,korean,korean2,english,math,math2,history,총점,평균,통과여부
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
id-5,90,90,40,80,70,70,440,73.33,실패
id-4,60,90,100,80,65,80,475,79.17,실패
id-3,70,90,100,80,100,95,535,89.17,성공


In [99]:
# 행, 열
grade.iloc[1, 6]  # 1번 index 행과, 6번 index 컬럼 (행,  열 모두 순번.)
grade.iloc[[0, 3], 2 : 6]

Unnamed: 0_level_0,english,math,math2,history
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
id-1,90,80,90,100
id-4,100,80,65,80


<b style='font-size:2.2em'>TODO</b>

-   movie dataframe을 이용해 loc과 iloc관련해 다음을 작성

In [100]:
# 1.  movie_title 컬럼을 index 이름으로 설정.
movie.set_index('movie_title', inplace=True)

In [101]:
movie.head()

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
Avatar,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,Action|Adventure|Fantasy|Sci-Fi,CCH Pounder,886204,4834,Wes Studi,0.0,avatar|future|marine|native|paraplegic,http://www.imdb.com/title/tt0499549/?ref_=fn_tt_tt_1,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,169.0,563.0,1000.0,Orlando Bloom,40000.0,309404152.0,Action|Adventure|Fantasy,Johnny Depp,471220,48350,Jack Davenport,0.0,goddess|marriage ceremony|marriage proposal|pirate|singapore,http://www.imdb.com/title/tt0449088/?ref_=fn_tt_tt_1,1238.0,English,USA,PG-13,300000000.0,2007.0,5000.0,7.1,2.35,0
Spectre,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,Action|Adventure|Thriller,Christoph Waltz,275868,11700,Stephanie Sigman,1.0,bomb|espionage|sequel|spy|terrorist,http://www.imdb.com/title/tt2379713/?ref_=fn_tt_tt_1,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000
The Dark Knight Rises,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,Tom Hardy,1144337,106759,Joseph Gordon-Levitt,0.0,deception|imprisonment|lawlessness|police officer|terrorist plot,http://www.imdb.com/title/tt1345836/?ref_=fn_tt_tt_1,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
Star Wars: Episode VII - The Force Awakens,,Doug Walker,,,131.0,,Rob Walker,131.0,,Documentary,Doug Walker,8,143,,0.0,,http://www.imdb.com/title/tt5289954/?ref_=fn_tt_tt_1,,,,,,,12.0,7.1,,0


In [102]:
# 3.  행이름이 Spider-Man 3, The Avengers, Titanic 인 행 조회
movie.loc[['Spider-Man 3','The Avengers', 'Titanic']]

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
Spider-Man 3,Color,Sam Raimi,392.0,156.0,0.0,4000.0,James Franco,24000.0,336530303.0,Action|Adventure|Romance,J.K. Simmons,383056,46055,Kirsten Dunst,0.0,sandman|spider man|symbiote|venom|villain,http://www.imdb.com/title/tt0413300/?ref_=fn_tt_tt_1,1902.0,English,USA,PG-13,258000000.0,2007.0,11000.0,6.2,2.35,0
The Avengers,Color,Joss Whedon,703.0,173.0,0.0,19000.0,Robert Downey Jr.,26000.0,623279547.0,Action|Adventure|Sci-Fi,Chris Hemsworth,995415,87697,Scarlett Johansson,3.0,alien invasion|assassin|battle|iron man|soldier,http://www.imdb.com/title/tt0848228/?ref_=fn_tt_tt_1,1722.0,English,USA,PG-13,220000000.0,2012.0,21000.0,8.1,1.85,123000
Titanic,Color,James Cameron,315.0,194.0,0.0,794.0,Kate Winslet,29000.0,658672302.0,Drama|Romance,Leonardo DiCaprio,793059,45223,Gloria Stuart,0.0,artist|love|ship|titanic|wet,http://www.imdb.com/title/tt0120338/?ref_=fn_tt_tt_1,2528.0,English,USA,PG-13,200000000.0,1997.0,14000.0,7.7,2.35,26000


In [103]:
# 4.  행이름 Spectre ~ Robin Hood 까지 범위로 조회
movie.loc['Spectre':'Robin Hood']

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
Spectre,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,Action|Adventure|Thriller,Christoph Waltz,275868,11700,Stephanie Sigman,1.0,bomb|espionage|sequel|spy|terrorist,http://www.imdb.com/title/tt2379713/?ref_=fn_tt_tt_1,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000
The Dark Knight Rises,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,Tom Hardy,1144337,106759,Joseph Gordon-Levitt,0.0,deception|imprisonment|lawlessness|police officer|terrorist plot,http://www.imdb.com/title/tt1345836/?ref_=fn_tt_tt_1,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
Star Wars: Episode VII - The Force Awakens,,Doug Walker,,,131.0,,Rob Walker,131.0,,Documentary,Doug Walker,8,143,,0.0,,http://www.imdb.com/title/tt5289954/?ref_=fn_tt_tt_1,,,,,,,12.0,7.1,,0
John Carter,Color,Andrew Stanton,462.0,132.0,475.0,530.0,Samantha Morton,640.0,73058679.0,Action|Adventure|Sci-Fi,Daryl Sabara,212204,1873,Polly Walker,1.0,alien|american civil war|male nipple|mars|princess,http://www.imdb.com/title/tt0401729/?ref_=fn_tt_tt_1,738.0,English,USA,PG-13,263700000.0,2012.0,632.0,6.6,2.35,24000
Spider-Man 3,Color,Sam Raimi,392.0,156.0,0.0,4000.0,James Franco,24000.0,336530303.0,Action|Adventure|Romance,J.K. Simmons,383056,46055,Kirsten Dunst,0.0,sandman|spider man|symbiote|venom|villain,http://www.imdb.com/title/tt0413300/?ref_=fn_tt_tt_1,1902.0,English,USA,PG-13,258000000.0,2007.0,11000.0,6.2,2.35,0
Tangled,Color,Nathan Greno,324.0,100.0,15.0,284.0,Donna Murphy,799.0,200807262.0,Adventure|Animation|Comedy|Family|Fantasy|Musical|Romance,Brad Garrett,294810,2036,M.C. Gainey,1.0,17th century|based on fairy tale|disney|flower|tower,http://www.imdb.com/title/tt0398286/?ref_=fn_tt_tt_1,387.0,English,USA,PG,260000000.0,2010.0,553.0,7.8,1.85,29000
Avengers: Age of Ultron,Color,Joss Whedon,635.0,141.0,0.0,19000.0,Robert Downey Jr.,26000.0,458991599.0,Action|Adventure|Sci-Fi,Chris Hemsworth,462669,92000,Scarlett Johansson,4.0,artificial intelligence|based on comic book|captain america|marvel cinematic universe|superhero,http://www.imdb.com/title/tt2395427/?ref_=fn_tt_tt_1,1117.0,English,USA,PG-13,250000000.0,2015.0,21000.0,7.5,2.35,118000
Harry Potter and the Half-Blood Prince,Color,David Yates,375.0,153.0,282.0,10000.0,Daniel Radcliffe,25000.0,301956980.0,Adventure|Family|Fantasy|Mystery,Alan Rickman,321795,58753,Rupert Grint,3.0,blood|book|love|potion|professor,http://www.imdb.com/title/tt0417741/?ref_=fn_tt_tt_1,973.0,English,UK,PG,250000000.0,2009.0,11000.0,7.5,2.35,10000
Batman v Superman: Dawn of Justice,Color,Zack Snyder,673.0,183.0,0.0,2000.0,Lauren Cohan,15000.0,330249062.0,Action|Adventure|Sci-Fi,Henry Cavill,371639,24450,Alan D. Purwin,0.0,based on comic book|batman|sequel to a reboot|superhero|superman,http://www.imdb.com/title/tt2975590/?ref_=fn_tt_tt_1,3018.0,English,USA,PG-13,250000000.0,2016.0,4000.0,6.9,2.35,197000
Superman Returns,Color,Bryan Singer,434.0,169.0,0.0,903.0,Marlon Brando,18000.0,200069408.0,Action|Adventure|Sci-Fi,Kevin Spacey,240396,29991,Frank Langella,0.0,crystal|epic|lex luthor|lois lane|return to earth,http://www.imdb.com/title/tt0348150/?ref_=fn_tt_tt_1,2367.0,English,USA,PG-13,209000000.0,2006.0,10000.0,6.1,2.35,0


In [104]:
# 5.  행이름이 John Carter 이고 열이름이 director_name 인 값 조회 - John Carter의 감독이름
movie.loc['John Carter','director_name']

'Andrew Stanton'

In [105]:
# 6.  1번행 조회
movie.iloc[0]

color                                                                       Color
director_name                                                       James Cameron
num_critic_for_reviews                                                      723.0
duration                                                                    178.0
director_facebook_likes                                                       0.0
actor_3_facebook_likes                                                      855.0
actor_2_name                                                     Joel David Moore
actor_1_facebook_likes                                                     1000.0
gross                                                                 760505847.0
genres                                            Action|Adventure|Fantasy|Sci-Fi
actor_1_name                                                          CCH Pounder
num_voted_users                                                            886204
cast_total_faceb

In [106]:
# 7.  마지막 행 조회
movie.iloc[-1]

color                                                                                Color
director_name                                                                     Jon Gunn
num_critic_for_reviews                                                                43.0
duration                                                                              90.0
director_facebook_likes                                                               16.0
actor_3_facebook_likes                                                                16.0
actor_2_name                                                              Brian Herzlinger
actor_1_facebook_likes                                                                86.0
gross                                                                              85222.0
genres                                                                         Documentary
actor_1_name                                                                   John August

In [107]:
# 8.  1, 2, 5, 6, 9 번행 조회
movie.iloc[[0,1,4,5,8]]

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
Avatar,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,Action|Adventure|Fantasy|Sci-Fi,CCH Pounder,886204,4834,Wes Studi,0.0,avatar|future|marine|native|paraplegic,http://www.imdb.com/title/tt0499549/?ref_=fn_tt_tt_1,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,169.0,563.0,1000.0,Orlando Bloom,40000.0,309404152.0,Action|Adventure|Fantasy,Johnny Depp,471220,48350,Jack Davenport,0.0,goddess|marriage ceremony|marriage proposal|pirate|singapore,http://www.imdb.com/title/tt0449088/?ref_=fn_tt_tt_1,1238.0,English,USA,PG-13,300000000.0,2007.0,5000.0,7.1,2.35,0
Star Wars: Episode VII - The Force Awakens,,Doug Walker,,,131.0,,Rob Walker,131.0,,Documentary,Doug Walker,8,143,,0.0,,http://www.imdb.com/title/tt5289954/?ref_=fn_tt_tt_1,,,,,,,12.0,7.1,,0
John Carter,Color,Andrew Stanton,462.0,132.0,475.0,530.0,Samantha Morton,640.0,73058679.0,Action|Adventure|Sci-Fi,Daryl Sabara,212204,1873,Polly Walker,1.0,alien|american civil war|male nipple|mars|princess,http://www.imdb.com/title/tt0401729/?ref_=fn_tt_tt_1,738.0,English,USA,PG-13,263700000.0,2012.0,632.0,6.6,2.35,24000
Avengers: Age of Ultron,Color,Joss Whedon,635.0,141.0,0.0,19000.0,Robert Downey Jr.,26000.0,458991599.0,Action|Adventure|Sci-Fi,Chris Hemsworth,462669,92000,Scarlett Johansson,4.0,artificial intelligence|based on comic book|captain america|marvel cinematic universe|superhero,http://www.imdb.com/title/tt2395427/?ref_=fn_tt_tt_1,1117.0,English,USA,PG-13,250000000.0,2015.0,21000.0,7.5,2.35,118000


In [108]:
# 9.  10 ~ 20 행 조회
movie.iloc[10:21]

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
Batman v Superman: Dawn of Justice,Color,Zack Snyder,673.0,183.0,0.0,2000.0,Lauren Cohan,15000.0,330249062.0,Action|Adventure|Sci-Fi,Henry Cavill,371639,24450,Alan D. Purwin,0.0,based on comic book|batman|sequel to a reboot|superhero|superman,http://www.imdb.com/title/tt2975590/?ref_=fn_tt_tt_1,3018.0,English,USA,PG-13,250000000.0,2016.0,4000.0,6.9,2.35,197000
Superman Returns,Color,Bryan Singer,434.0,169.0,0.0,903.0,Marlon Brando,18000.0,200069408.0,Action|Adventure|Sci-Fi,Kevin Spacey,240396,29991,Frank Langella,0.0,crystal|epic|lex luthor|lois lane|return to earth,http://www.imdb.com/title/tt0348150/?ref_=fn_tt_tt_1,2367.0,English,USA,PG-13,209000000.0,2006.0,10000.0,6.1,2.35,0
Quantum of Solace,Color,Marc Forster,403.0,106.0,395.0,393.0,Mathieu Amalric,451.0,168368427.0,Action|Adventure,Giancarlo Giannini,330784,2023,Rory Kinnear,1.0,action hero|attempted rape|bond girl|official james bond series|revenge,http://www.imdb.com/title/tt0830515/?ref_=fn_tt_tt_1,1243.0,English,UK,PG-13,200000000.0,2008.0,412.0,6.7,2.35,0
Pirates of the Caribbean: Dead Man's Chest,Color,Gore Verbinski,313.0,151.0,563.0,1000.0,Orlando Bloom,40000.0,423032628.0,Action|Adventure|Fantasy,Johnny Depp,522040,48486,Jack Davenport,2.0,box office hit|giant squid|heart|liar's dice|monster,http://www.imdb.com/title/tt0383574/?ref_=fn_tt_tt_1,1832.0,English,USA,PG-13,225000000.0,2006.0,5000.0,7.3,2.35,5000
The Lone Ranger,Color,Gore Verbinski,450.0,150.0,563.0,1000.0,Ruth Wilson,40000.0,89289910.0,Action|Adventure|Western,Johnny Depp,181792,45757,Tom Wilkinson,1.0,horse|outlaw|texas|texas ranger|train,http://www.imdb.com/title/tt1210819/?ref_=fn_tt_tt_1,711.0,English,USA,PG-13,215000000.0,2013.0,2000.0,6.5,2.35,48000
Man of Steel,Color,Zack Snyder,733.0,143.0,0.0,748.0,Christopher Meloni,15000.0,291021565.0,Action|Adventure|Fantasy|Sci-Fi,Henry Cavill,548573,20495,Harry Lennix,0.0,based on comic book|british actor playing american character|final battle|origin of hero|reboot,http://www.imdb.com/title/tt0770828/?ref_=fn_tt_tt_1,2536.0,English,USA,PG-13,225000000.0,2013.0,3000.0,7.2,2.35,118000
The Chronicles of Narnia: Prince Caspian,Color,Andrew Adamson,258.0,150.0,80.0,201.0,Pierfrancesco Favino,22000.0,141614023.0,Action|Adventure|Family|Fantasy,Peter Dinklage,149922,22697,Damián Alcázar,4.0,brother brother relationship|brother sister relationship|good versus evil|king|narnia,http://www.imdb.com/title/tt0499448/?ref_=fn_tt_tt_1,438.0,English,USA,PG,225000000.0,2008.0,216.0,6.6,2.35,0
The Avengers,Color,Joss Whedon,703.0,173.0,0.0,19000.0,Robert Downey Jr.,26000.0,623279547.0,Action|Adventure|Sci-Fi,Chris Hemsworth,995415,87697,Scarlett Johansson,3.0,alien invasion|assassin|battle|iron man|soldier,http://www.imdb.com/title/tt0848228/?ref_=fn_tt_tt_1,1722.0,English,USA,PG-13,220000000.0,2012.0,21000.0,8.1,1.85,123000
Pirates of the Caribbean: On Stranger Tides,Color,Rob Marshall,448.0,136.0,252.0,1000.0,Sam Claflin,40000.0,241063875.0,Action|Adventure|Fantasy,Johnny Depp,370704,54083,Stephen Graham,4.0,blackbeard|captain|pirate|revenge|soldier,http://www.imdb.com/title/tt1298650/?ref_=fn_tt_tt_1,484.0,English,USA,PG-13,250000000.0,2011.0,11000.0,6.7,2.35,58000
Men in Black 3,Color,Barry Sonnenfeld,451.0,106.0,188.0,718.0,Michael Stuhlbarg,10000.0,179020854.0,Action|Adventure|Comedy|Family|Fantasy|Sci-Fi,Will Smith,268154,12572,Nicole Scherzinger,1.0,alien|criminal|m.i.b.|maximum security prison|prison,http://www.imdb.com/title/tt1409024/?ref_=fn_tt_tt_1,341.0,English,USA,PG-13,225000000.0,2012.0,816.0,6.8,1.85,40000


## Boolean indexing을 이용한 조회

-   원하는 조건을 만족하는 행, 열을 조회한다.

-   **`DataFrame객체[조건], DataFrame객체.loc[조건]`**
    -   조건이 True인 행만 조회
    -   열까지 선택시
        -   `DataFrame객체[조건][열]`
        -   `DataFrame객체.loc[조건, 열]`

> -   논리연산자
>     |논리연산자|설명|
>     |:-:|-|
>     |&|and연산|
>     |\||or연산|
>     |~|not 연산|

> -   논리연산자의 피연산자들은 반드시 ( )로 묶어준다.
> -   파이썬과는 다르게 `and`, `or`, `not` 예약어는 사용할 수 없다.

In [110]:
grade[[True, False, False, False, True]]  # True index의 행이 조회

Unnamed: 0_level_0,korean,korean2,english,math,math2,history,총점,평균,통과여부
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
id-1,100,90,90,80,90,100,550,91.67,성공
id-5,90,90,40,80,70,70,440,73.33,실패


In [111]:
grade[grade['통과여부'] == '성공']["평균"]

id
id-1    91.67
id-3    89.17
Name: 평균, dtype: float64

In [112]:
grade.loc[grade['통과여부'] == '성공', "평균"]

id
id-1    91.67
id-3    89.17
Name: 평균, dtype: float64

In [114]:
# 통과한 사람중 평균이 90이상인 행 
grade.loc[(grade['통과여부'] == '성공') & (grade['평균'] >= 90)]

Unnamed: 0_level_0,korean,korean2,english,math,math2,history,총점,평균,통과여부
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
id-1,100,90,90,80,90,100,550,91.67,성공


<b style='font-size:2.2em'>TODO</b>


In [115]:
# movie dataframe의 index명을 컬럼 설정한다.
movie.reset_index(inplace=True)
movie.head()

Unnamed: 0,movie_title,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
0,Avatar,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,Action|Adventure|Fantasy|Sci-Fi,CCH Pounder,886204,4834,Wes Studi,0.0,avatar|future|marine|native|paraplegic,http://www.imdb.com/title/tt0499549/?ref_=fn_tt_tt_1,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
1,Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,169.0,563.0,1000.0,Orlando Bloom,40000.0,309404152.0,Action|Adventure|Fantasy,Johnny Depp,471220,48350,Jack Davenport,0.0,goddess|marriage ceremony|marriage proposal|pirate|singapore,http://www.imdb.com/title/tt0449088/?ref_=fn_tt_tt_1,1238.0,English,USA,PG-13,300000000.0,2007.0,5000.0,7.1,2.35,0
2,Spectre,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,Action|Adventure|Thriller,Christoph Waltz,275868,11700,Stephanie Sigman,1.0,bomb|espionage|sequel|spy|terrorist,http://www.imdb.com/title/tt2379713/?ref_=fn_tt_tt_1,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000
3,The Dark Knight Rises,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,Tom Hardy,1144337,106759,Joseph Gordon-Levitt,0.0,deception|imprisonment|lawlessness|police officer|terrorist plot,http://www.imdb.com/title/tt1345836/?ref_=fn_tt_tt_1,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
4,Star Wars: Episode VII - The Force Awakens,,Doug Walker,,,131.0,,Rob Walker,131.0,,Documentary,Doug Walker,8,143,,0.0,,http://www.imdb.com/title/tt5289954/?ref_=fn_tt_tt_1,,,,,,,12.0,7.1,,0


In [116]:
# 1.  상영시간 (duration)이 300 이상인 영화들 조회
movie[movie["duration"] >= 300]

Unnamed: 0,movie_title,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
1134,Heaven's Gate,Color,Michael Cimino,102.0,325.0,517.0,678.0,Sam Waterston,12000.0,1500000.0,Adventure|Drama|Western,Jeff Bridges,9830,14255,Isabelle Huppert,0.0,1890s|hired gun|immigrant|johnson county war|sheriff,http://www.imdb.com/title/tt0080855/?ref_=fn_tt_tt_1,189.0,English,USA,R,44000000.0,1980.0,849.0,6.8,2.35,1000
1487,"Blood In, Blood Out",Color,Taylor Hackford,12.0,330.0,138.0,672.0,Jesse Borrego,848.0,4496583.0,Crime|Drama,Delroy Lindo,23181,3227,Raymond Cruz,2.0,1970s|1980s|barrio|gang war|mexican,http://www.imdb.com/title/tt0106469/?ref_=fn_tt_tt_1,129.0,English,USA,R,35000000.0,1993.0,674.0,8.0,1.66,6000
1694,Trapped,Color,,16.0,511.0,,51.0,Ingvar Eggert Sigurðsson,147.0,,Crime|Drama|Thriller,Ólafur Darri Ólafsson,2308,307,Björn Hlynur Haraldsson,0.0,coastal town|iceland|police|snowstorm|winter storm,http://www.imdb.com/title/tt3561180/?ref_=fn_tt_tt_1,19.0,Icelandic,Iceland,,,,63.0,8.2,16.0,0
2436,Carlos,Color,,108.0,334.0,,30.0,Nora von Waldstätten,897.0,145118.0,Biography|Crime|Drama|Thriller,Edgar Ramírez,10111,1032,Katharina Schüttler,0.0,opec|pubic hair|revolutionary|terrorism|true crime,http://www.imdb.com/title/tt1321865/?ref_=fn_tt_tt_1,36.0,English,France,Not Rated,,,30.0,7.7,2.35,0
3254,The Legend of Suriyothai,Color,Chatrichalerm Yukol,31.0,300.0,6.0,6.0,Chatchai Plengpanich,7.0,454255.0,Action|Adventure|Drama|History|War,Sarunyu Wongkrachang,1666,32,Mai Charoenpura,3.0,16th century|burmese|invasion|queen|thailand,http://www.imdb.com/title/tt0290879/?ref_=fn_tt_tt_1,47.0,Thai,Thailand,R,400000000.0,2001.0,6.0,6.6,1.85,124


In [117]:
# 2.  상영시간 (duration)이 300 이상인 영화들의 
#       영화제목(movie_title)과 감독이름(director_name) 조회
movie[movie["duration"] >= 300][['movie_title','director_name']]

Unnamed: 0,movie_title,director_name
1134,Heaven's Gate,Michael Cimino
1487,"Blood In, Blood Out",Taylor Hackford
1694,Trapped,
2436,Carlos,
3254,The Legend of Suriyothai,Chatrichalerm Yukol


In [118]:
movie.loc[movie['duration'] >= 300, ["movie_title", "director_name"]]

Unnamed: 0,movie_title,director_name
1134,Heaven's Gate,Michael Cimino
1487,"Blood In, Blood Out",Taylor Hackford
1694,Trapped,
2436,Carlos,
3254,The Legend of Suriyothai,Chatrichalerm Yukol


In [119]:
# 3.  감독이름(director_name)이 'Quentin Tarantino' 의 영화들만 조회
movie[movie["director_name"] == 'Quentin Tarantino']

Unnamed: 0,movie_title,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
293,Django Unchained,Color,Quentin Tarantino,765.0,165.0,16000.0,265.0,Christoph Waltz,29000.0,162804648.0,Drama|Western,Leonardo DiCaprio,955174,40978,Ato Essandoh,1.0,dynamite|historically inaccurate|ku klux klan|n word|slavery,http://www.imdb.com/title/tt1853728/?ref_=fn_tt_tt_1,1193.0,English,USA,R,100000000.0,2012.0,11000.0,8.5,2.35,199000
582,Inglourious Basterds,Color,Quentin Tarantino,486.0,153.0,16000.0,11000.0,Brad Pitt,13000.0,120523073.0,Adventure|Drama|War,Michael Fassbender,885175,36741,Christoph Waltz,1.0,france|german|nazis|revenge|scalping,http://www.imdb.com/title/tt0361748/?ref_=fn_tt_tt_1,1527.0,English,USA,R,75000000.0,2009.0,11000.0,8.3,2.35,42000
698,The Hateful Eight,Color,Quentin Tarantino,596.0,187.0,16000.0,1000.0,Jennifer Jason Leigh,46000.0,54116191.0,Crime|Drama|Mystery|Thriller|Western,Craig Stark,272839,49912,Zoë Bell,1.0,blizzard|blood vomiting|bounty hunter|murder|shot in the crotch,http://www.imdb.com/title/tt3460252/?ref_=fn_tt_tt_1,1018.0,English,USA,R,44000000.0,2015.0,1000.0,7.9,2.76,114000
846,Kill Bill: Vol. 1,Black and White,Quentin Tarantino,354.0,111.0,16000.0,640.0,Vivica A. Fox,926.0,70098138.0,Action,David Carradine,735784,3983,Chiaki Kuriyama,0.0,bride|coma|japan|revenge|sword,http://www.imdb.com/title/tt0266697/?ref_=fn_tt_tt_1,2105.0,English,USA,R,30000000.0,2003.0,890.0,8.1,2.35,13000
849,Kill Bill: Vol. 2,Black and White,Quentin Tarantino,304.0,137.0,16000.0,348.0,Michael Parks,890.0,66207920.0,Action|Crime|Drama|Thriller,Vivica A. Fox,512749,1959,Michael Bowen,0.0,assassin|assassination|bride|death|vengeance,http://www.imdb.com/title/tt0378194/?ref_=fn_tt_tt_1,935.0,English,USA,R,30000000.0,2004.0,387.0,8.0,2.35,0
2883,Jackie Brown,Color,Quentin Tarantino,140.0,154.0,16000.0,889.0,Sid Haig,22000.0,39647595.0,Crime|Thriller,Robert De Niro,239540,25522,Robert Forster,5.0,arms dealer|atf|bail|money|multiple perspectives,http://www.imdb.com/title/tt0119396/?ref_=fn_tt_tt_1,462.0,English,USA,R,12000000.0,1997.0,1000.0,7.5,1.85,0
3295,Pulp Fiction,Color,Quentin Tarantino,215.0,178.0,16000.0,857.0,Eric Stoltz,13000.0,107930000.0,Crime|Drama,Bruce Willis,1324680,16557,Phil LaMarr,1.0,black comedy|cunnilingus|neo noir|nonlinear timeline|postmodern,http://www.imdb.com/title/tt0110912/?ref_=fn_tt_tt_1,2195.0,English,USA,R,8000000.0,1994.0,902.0,8.9,2.35,45000
4395,Reservoir Dogs,Color,Quentin Tarantino,173.0,99.0,16000.0,455.0,Steve Buscemi,16000.0,2812029.0,Crime|Drama|Thriller,Quentin Tarantino,664719,28994,Chris Penn,0.0,gang|heist|heist gone wrong|nonlinear timeline|robbery,http://www.imdb.com/title/tt0105236/?ref_=fn_tt_tt_1,931.0,English,USA,R,1200000.0,1992.0,12000.0,8.4,2.35,19000


## query() 를 이용한 boolean indexing

-   query(조회조건)
    -   sql의 where 절의 조건 처럼 문자열의 query statement를 이용해 조건으로 조회
    -   boolean index에 비해
        -   장점: 편의성(문자열로 query statement를 만들므로 동적 구문 생성등 다양한 처리가 가능)과 가독성이 좋다.
        -   단점: 속도가 느리다.
-   조회조건 구문
    -   `"컬럼명 연산자 비교값"`
-   외부변수를 이용해 query문의 비교값을 지정할 수 있다.
    -   query 문자열 안에서 @변수명 사용
    -   f string이나 format() 함수를 이용해 query를 만들 수도 있다.

### query 함수 연산자

-   **비교 연산자**
    -   ==, \>, \>=, \<, \<=, !=
-   **결측치 비교**
    -   컬럼.isna(), isnull()
    -   컬럼.notna(), notnull()
-   **논리 연산자**
    -   and, or, not
-   **in 연산자**
    -   in, ==
    -   not in, !=
    -   비교 대상값은 리스트에 넣는다.
-   **Index name으로 검색**
    -   행의 index 이름으로 검색
-   **문자열 부분검색(sql의 like)**
    -   컬럼명.str.contains(문자열): 문자열을 포함하고 있는
    -   컬럼명.str.startswith(문자열): 문자열로 시작하는
    -   컬럼명.str.endswith(문자열): 문자열로 끝나는
    -   **문자열 부분검색을 할 컬럼에 결측치(NaN)이 있으면 안된다.**


In [120]:
import pandas as pd
import numpy as np

data_dict = {
    "name": ["김영수", "박영희", "오준호", "조민경", "박영희", "김영수"],
    "age": [23, 17, 28, 31, 23, 17],
    "email": [
        "kys@gmail.com",
        "pyh@gmail.com",
        "ojh@daum.net",
        "cmk@naver.com",
        "pyh@daum.net",
        np.nan,
    ],
}
df = pd.DataFrame(data_dict)
df

Unnamed: 0,name,age,email
0,김영수,23,kys@gmail.com
1,박영희,17,pyh@gmail.com
2,오준호,28,ojh@daum.net
3,조민경,31,cmk@naver.com
4,박영희,23,pyh@daum.net
5,김영수,17,


In [121]:
# 비교연산
## 나이가 17인 행 조회
# df[df['age'] == 17]
df.query("age == 17")
df.query('age > 25')
df.query('age != 25')

Unnamed: 0,name,age,email
0,김영수,23,kys@gmail.com
1,박영희,17,pyh@gmail.com
2,오준호,28,ojh@daum.net
3,조민경,31,cmk@naver.com
4,박영희,23,pyh@daum.net
5,김영수,17,


In [None]:
df.query("name == '김영수'") # 문자열은 "  ", ' '  감싼다.

Unnamed: 0,name,age,email
0,김영수,23,kys@gmail.com
5,김영수,17,


In [123]:
df.query('email.isna()') # 결측치인 행
df.query('email.notnull()')  #결측치가 아닌 행

Unnamed: 0,name,age,email
0,김영수,23,kys@gmail.com
1,박영희,17,pyh@gmail.com
2,오준호,28,ojh@daum.net
3,조민경,31,cmk@naver.com
4,박영희,23,pyh@daum.net


In [125]:
# 조건 묶어주기 => 논리연산 (and, or, not,  &, | , ~ 다 사용가능. 피연산자 () 안묶어도 됨.) 
df.query('not age > 25')
df.query('~(age > 25)')

Unnamed: 0,name,age,email
0,김영수,23,kys@gmail.com
1,박영희,17,pyh@gmail.com
4,박영희,23,pyh@daum.net
5,김영수,17,


In [127]:
df.query("name == '박영희' and age > 20")
df.query("name == '박영희' & age > 20")

Unnamed: 0,name,age,email
4,박영희,23,pyh@daum.net


In [128]:
df.query('name=="박영희"  or  email.isnull()')
df.query('name=="박영희"  |  email.isnull()')

Unnamed: 0,name,age,email
1,박영희,17,pyh@gmail.com
4,박영희,23,pyh@daum.net
5,김영수,17,


In [132]:
df.query('age in [17,20,23]')
df.query('age == [17, 20, 23, 28]')

Unnamed: 0,name,age,email
0,김영수,23,kys@gmail.com
1,박영희,17,pyh@gmail.com
2,오준호,28,ojh@daum.net
4,박영희,23,pyh@daum.net
5,김영수,17,


In [133]:
df.query('age not in [17, 20, 23]')
df.query('age != [17, 20, 23, 28]')

Unnamed: 0,name,age,email
3,조민경,31,cmk@naver.com


In [134]:
df.query("index >= 3")  # index -> index name으로 조회
df.query("index == [1, 5]")

Unnamed: 0,name,age,email
1,박영희,17,pyh@gmail.com
5,김영수,17,


In [None]:
# 부분일치
df.query("name.str.startswith('김')") # 김 으로 시작
df.query("name.str.endswith('희')")  # 희 로 끝
df.query('name.str.contains("영")')   # 영 을 포함

Unnamed: 0,name,age,email
0,김영수,23,kys@gmail.com
1,박영희,17,pyh@gmail.com
4,박영희,23,pyh@daum.net
5,김영수,17,


In [137]:
# 부분일치 -> 결측치가 있는 행으로 조회하면 예외발생
# df.query("email.str.endswith('com') ")
## 결측치가 아닌 행등 조회하고 이어서 부분일치 조회
df.query("email.notnull()").query("email.str.endswith('com')")

Unnamed: 0,name,age,email
0,김영수,23,kys@gmail.com
1,박영희,17,pyh@gmail.com
3,조민경,31,cmk@naver.com


In [138]:
name = input('조회할 이름:')
df.query("name == @name")

Unnamed: 0,name,age,email
0,김영수,23,kys@gmail.com
5,김영수,17,


In [139]:
f"name == '{name}'"

"name == '김영수'"

In [141]:
df.query(f"name == '{name}'")

Unnamed: 0,name,age,email
0,김영수,23,kys@gmail.com
5,김영수,17,


<b style='font-size:2.2em'>TODO</b>

-   movie dataframe을 이용해 query() 메소드를 사용해서 아래 문제를 푸세요.

In [143]:
# 1. 상영시간이 300분 이상인 영화들만 조회
movie.query("duration >= 300")

Unnamed: 0,movie_title,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
1134,Heaven's Gate,Color,Michael Cimino,102.0,325.0,517.0,678.0,Sam Waterston,12000.0,1500000.0,Adventure|Drama|Western,Jeff Bridges,9830,14255,Isabelle Huppert,0.0,1890s|hired gun|immigrant|johnson county war|sheriff,http://www.imdb.com/title/tt0080855/?ref_=fn_tt_tt_1,189.0,English,USA,R,44000000.0,1980.0,849.0,6.8,2.35,1000
1487,"Blood In, Blood Out",Color,Taylor Hackford,12.0,330.0,138.0,672.0,Jesse Borrego,848.0,4496583.0,Crime|Drama,Delroy Lindo,23181,3227,Raymond Cruz,2.0,1970s|1980s|barrio|gang war|mexican,http://www.imdb.com/title/tt0106469/?ref_=fn_tt_tt_1,129.0,English,USA,R,35000000.0,1993.0,674.0,8.0,1.66,6000
1694,Trapped,Color,,16.0,511.0,,51.0,Ingvar Eggert Sigurðsson,147.0,,Crime|Drama|Thriller,Ólafur Darri Ólafsson,2308,307,Björn Hlynur Haraldsson,0.0,coastal town|iceland|police|snowstorm|winter storm,http://www.imdb.com/title/tt3561180/?ref_=fn_tt_tt_1,19.0,Icelandic,Iceland,,,,63.0,8.2,16.0,0
2436,Carlos,Color,,108.0,334.0,,30.0,Nora von Waldstätten,897.0,145118.0,Biography|Crime|Drama|Thriller,Edgar Ramírez,10111,1032,Katharina Schüttler,0.0,opec|pubic hair|revolutionary|terrorism|true crime,http://www.imdb.com/title/tt1321865/?ref_=fn_tt_tt_1,36.0,English,France,Not Rated,,,30.0,7.7,2.35,0
3254,The Legend of Suriyothai,Color,Chatrichalerm Yukol,31.0,300.0,6.0,6.0,Chatchai Plengpanich,7.0,454255.0,Action|Adventure|Drama|History|War,Sarunyu Wongkrachang,1666,32,Mai Charoenpura,3.0,16th century|burmese|invasion|queen|thailand,http://www.imdb.com/title/tt0290879/?ref_=fn_tt_tt_1,47.0,Thai,Thailand,R,400000000.0,2001.0,6.0,6.6,1.85,124


In [148]:
# 2. 상영시간이 250분 ~ 300분 인 영화들 조회
movie.query("(duration >= 250) & (duration <= 300)")

Unnamed: 0,movie_title,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
874,Gods and Generals,Color,Ron Maxwell,84.0,280.0,33.0,67.0,Bruce Boxleitner,789.0,12870569.0,Drama|History|War,Billy Campbell,13215,1671,John Castle,0.0,confederacy|hero|prequel|stonewall jackson|war hero,http://www.imdb.com/title/tt0279111/?ref_=fn_tt_tt_1,497.0,English,USA,PG-13,56000000.0,2003.0,640.0,6.3,2.35,953
1150,Cleopatra,Color,Joseph L. Mankiewicz,72.0,251.0,311.0,595.0,Richard Burton,940.0,57750000.0,Biography|Drama|History|Romance,Martin Landau,21554,2957,Roddy McDowall,2.0,cleopatra|egypt|epic|queen|roman empire,http://www.imdb.com/title/tt0056937/?ref_=fn_tt_tt_1,192.0,English,UK,Approved,31115000.0,1963.0,726.0,7.0,2.2,0
1556,Apocalypse Now,Color,Francis Ford Coppola,261.0,289.0,0.0,3000.0,Marlon Brando,11000.0,78800000.0,Drama|War,Harrison Ford,450676,25313,Robert Duvall,1.0,army|green beret|insanity|jungle|vietnam,http://www.imdb.com/title/tt0078788/?ref_=fn_tt_tt_1,983.0,English,USA,R,31500000.0,1979.0,10000.0,8.5,2.35,19000
1698,Once Upon a Time in America,Color,Sergio Leone,111.0,251.0,0.0,642.0,Burt Young,22000.0,5300000.0,Crime|Drama,Robert De Niro,221000,24719,Treat Williams,8.0,1920s|ambiguity|childhood friend|new york|speakeasy,http://www.imdb.com/title/tt0087843/?ref_=fn_tt_tt_1,495.0,English,Italy,R,30000000.0,1984.0,683.0,8.4,1.85,12000
1963,Gettysburg,Color,Ron Maxwell,22.0,271.0,33.0,251.0,William Morgan Sheppard,854.0,10769960.0,Drama|History|War,Tom Berenger,21940,2107,James Patrick Stuart,0.0,american civil war|battle|blood splatter|civil war|irish american,http://www.imdb.com/title/tt0107007/?ref_=fn_tt_tt_1,256.0,English,USA,PG,25000000.0,1993.0,702.0,7.7,1.85,0
2527,Arn: The Knight Templar,Color,Peter Flinth,34.0,270.0,5.0,292.0,Michael Nyqvist,908.0,,Action|Adventure|Drama|Romance|War,Gustaf Skarsgård,18041,2762,Vincent Perez,1.0,first part|holy land|knight templar|monastery|sword and shield,http://www.imdb.com/title/tt0837106/?ref_=fn_tt_tt_1,54.0,Swedish,Sweden,,25000000.0,2007.0,690.0,6.6,2.35,0
2687,The Company,Color,,9.0,286.0,,527.0,Tom Hollander,857.0,,Drama|History|Thriller,Anna Silk,3828,3809,Alessandro Nivola,3.0,cia|mole|revolution|spy|ussr,http://www.imdb.com/title/tt0488352/?ref_=fn_tt_tt_1,39.0,English,USA,,,,555.0,7.9,1.78,733
2922,Das Boot,Color,Wolfgang Petersen,96.0,293.0,249.0,18.0,Martin Semmelrogge,362.0,11433134.0,Adventure|Drama|Thriller|War,Jürgen Prochnow,168203,469,Herbert Grönemeyer,0.0,duty|submarine|submarine movie|tension|u boat,http://www.imdb.com/title/tt0082096/?ref_=fn_tt_tt_1,426.0,German,West Germany,R,14000000.0,1981.0,21.0,8.4,1.85,11000
3254,The Legend of Suriyothai,Color,Chatrichalerm Yukol,31.0,300.0,6.0,6.0,Chatchai Plengpanich,7.0,454255.0,Action|Adventure|Drama|History|War,Sarunyu Wongkrachang,1666,32,Mai Charoenpura,3.0,16th century|burmese|invasion|queen|thailand,http://www.imdb.com/title/tt0290879/?ref_=fn_tt_tt_1,47.0,Thai,Thailand,R,400000000.0,2001.0,6.0,6.6,1.85,124


In [149]:
movie[movie['duration'].between(250,300)]

Unnamed: 0,movie_title,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
874,Gods and Generals,Color,Ron Maxwell,84.0,280.0,33.0,67.0,Bruce Boxleitner,789.0,12870569.0,Drama|History|War,Billy Campbell,13215,1671,John Castle,0.0,confederacy|hero|prequel|stonewall jackson|war hero,http://www.imdb.com/title/tt0279111/?ref_=fn_tt_tt_1,497.0,English,USA,PG-13,56000000.0,2003.0,640.0,6.3,2.35,953
1150,Cleopatra,Color,Joseph L. Mankiewicz,72.0,251.0,311.0,595.0,Richard Burton,940.0,57750000.0,Biography|Drama|History|Romance,Martin Landau,21554,2957,Roddy McDowall,2.0,cleopatra|egypt|epic|queen|roman empire,http://www.imdb.com/title/tt0056937/?ref_=fn_tt_tt_1,192.0,English,UK,Approved,31115000.0,1963.0,726.0,7.0,2.2,0
1556,Apocalypse Now,Color,Francis Ford Coppola,261.0,289.0,0.0,3000.0,Marlon Brando,11000.0,78800000.0,Drama|War,Harrison Ford,450676,25313,Robert Duvall,1.0,army|green beret|insanity|jungle|vietnam,http://www.imdb.com/title/tt0078788/?ref_=fn_tt_tt_1,983.0,English,USA,R,31500000.0,1979.0,10000.0,8.5,2.35,19000
1698,Once Upon a Time in America,Color,Sergio Leone,111.0,251.0,0.0,642.0,Burt Young,22000.0,5300000.0,Crime|Drama,Robert De Niro,221000,24719,Treat Williams,8.0,1920s|ambiguity|childhood friend|new york|speakeasy,http://www.imdb.com/title/tt0087843/?ref_=fn_tt_tt_1,495.0,English,Italy,R,30000000.0,1984.0,683.0,8.4,1.85,12000
1963,Gettysburg,Color,Ron Maxwell,22.0,271.0,33.0,251.0,William Morgan Sheppard,854.0,10769960.0,Drama|History|War,Tom Berenger,21940,2107,James Patrick Stuart,0.0,american civil war|battle|blood splatter|civil war|irish american,http://www.imdb.com/title/tt0107007/?ref_=fn_tt_tt_1,256.0,English,USA,PG,25000000.0,1993.0,702.0,7.7,1.85,0
2527,Arn: The Knight Templar,Color,Peter Flinth,34.0,270.0,5.0,292.0,Michael Nyqvist,908.0,,Action|Adventure|Drama|Romance|War,Gustaf Skarsgård,18041,2762,Vincent Perez,1.0,first part|holy land|knight templar|monastery|sword and shield,http://www.imdb.com/title/tt0837106/?ref_=fn_tt_tt_1,54.0,Swedish,Sweden,,25000000.0,2007.0,690.0,6.6,2.35,0
2687,The Company,Color,,9.0,286.0,,527.0,Tom Hollander,857.0,,Drama|History|Thriller,Anna Silk,3828,3809,Alessandro Nivola,3.0,cia|mole|revolution|spy|ussr,http://www.imdb.com/title/tt0488352/?ref_=fn_tt_tt_1,39.0,English,USA,,,,555.0,7.9,1.78,733
2922,Das Boot,Color,Wolfgang Petersen,96.0,293.0,249.0,18.0,Martin Semmelrogge,362.0,11433134.0,Adventure|Drama|Thriller|War,Jürgen Prochnow,168203,469,Herbert Grönemeyer,0.0,duty|submarine|submarine movie|tension|u boat,http://www.imdb.com/title/tt0082096/?ref_=fn_tt_tt_1,426.0,German,West Germany,R,14000000.0,1981.0,21.0,8.4,1.85,11000
3254,The Legend of Suriyothai,Color,Chatrichalerm Yukol,31.0,300.0,6.0,6.0,Chatchai Plengpanich,7.0,454255.0,Action|Adventure|Drama|History|War,Sarunyu Wongkrachang,1666,32,Mai Charoenpura,3.0,16th century|burmese|invasion|queen|thailand,http://www.imdb.com/title/tt0290879/?ref_=fn_tt_tt_1,47.0,Thai,Thailand,R,400000000.0,2001.0,6.0,6.6,1.85,124


In [150]:
# 3. 컬러영화가 아닌 영화 조회
movie.color.value_counts()

color
Color              4693
Black and White     204
Name: count, dtype: int64

In [153]:
movie.query("color == 'Black and White'")

Unnamed: 0,movie_title,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
111,Pearl Harbor,Black and White,Michael Bay,191.0,184.0,0.0,691.0,Jaime King,3000.0,198539855.0,Action|Drama|History|Romance|War,Jennifer Garner,254111,5401,Mako,0.0,air raid|black smoke|japanese military|japanese navy|sunday,http://www.imdb.com/title/tt0213149/?ref_=fn_tt_tt_1,1999.0,English,USA,PG-13,140000000.0,2001.0,961.0,6.1,2.35,0
148,Die Another Day,Black and White,Lee Tamahori,264.0,133.0,93.0,746.0,Colin Salmon,769.0,160201106.0,Action|Adventure|Thriller,Toby Stephens,169914,2538,Rick Yune,0.0,catfight|clinic|colonel|diamond|patricide,http://www.imdb.com/title/tt0246460/?ref_=fn_tt_tt_1,1185.0,English,UK,PG-13,142000000.0,2002.0,766.0,6.1,2.35,0
254,The Aviator,Black and White,Martin Scorsese,267.0,170.0,17000.0,827.0,Adam Scott,29000.0,102608827.0,Biography|Drama,Leonardo DiCaprio,264318,34582,Frances Conroy,0.0,1920s|aviation|fight|spruce goose|test flight,http://www.imdb.com/title/tt0338751/?ref_=fn_tt_tt_1,799.0,English,USA,PG-13,110000000.0,2004.0,3000.0,7.5,2.35,0
269,Ali,Black and White,Michael Mann,174.0,165.0,0.0,780.0,Jada Pinkett Smith,10000.0,58183966.0,Biography|Drama|Sport,Will Smith,79186,14196,Joe Morton,1.0,african american protagonist|african americans|boxing gym|gym|rumble in the jungle,http://www.imdb.com/title/tt0248667/?ref_=fn_tt_tt_1,386.0,English,USA,R,107000000.0,2001.0,851.0,6.8,2.35,0
283,Casino Royale,Black and White,Martin Campbell,400.0,144.0,258.0,834.0,Tobias Menzies,6000.0,167007184.0,Action|Adventure|Thriller,Eva Green,470483,9125,Ivana Milicevic,1.0,casino|espionage|free running|james bond|terrorist,http://www.imdb.com/title/tt0381061/?ref_=fn_tt_tt_1,2301.0,English,UK,PG-13,150000000.0,2006.0,1000.0,8.0,2.35,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4879,Mutual Appreciation,Black and White,Andrew Bujalski,52.0,109.0,26.0,3.0,Kate Dollenmayer,26.0,,Comedy,Andrew Bujalski,1578,38,Justin Rice,0.0,friendship|guitarist|mumblecore|musician|new york,http://www.imdb.com/title/tt0446747/?ref_=fn_tt_tt_1,23.0,English,USA,R,,2005.0,6.0,6.9,1.66,91
4882,Clerks,Black and White,Kevin Smith,136.0,102.0,0.0,216.0,Brian O'Halloran,898.0,3151130.0,Comedy,Jason Mewes,181749,2103,Jeff Anderson,4.0,clerk|friend|hockey|video|video store,http://www.imdb.com/title/tt0109445/?ref_=fn_tt_tt_1,615.0,English,USA,R,230000.0,1994.0,657.0,7.8,1.37,0
4888,Slacker,Black and White,Richard Linklater,61.0,100.0,0.0,0.0,Richard Linklater,5.0,1227508.0,Comedy|Drama,Tommy Pallotta,15103,5,Jean Caffeine,0.0,austin texas|moon|pap smear|texas|twenty something,http://www.imdb.com/title/tt0102943/?ref_=fn_tt_tt_1,80.0,English,USA,R,23000.0,1991.0,0.0,7.1,1.37,2000
4895,Stories of Our Lives,Black and White,Jim Chuchu,6.0,60.0,0.0,4.0,Olwenya Maina,147.0,,Drama,Paul Ogola,70,170,Mugambi Nthiga,0.0,,http://www.imdb.com/title/tt3973612/?ref_=fn_tt_tt_1,1.0,Swahili,Kenya,,15000.0,2014.0,19.0,7.4,,45


In [154]:
movie.query("color.notnull()").query("color != 'Color'")

Unnamed: 0,movie_title,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
111,Pearl Harbor,Black and White,Michael Bay,191.0,184.0,0.0,691.0,Jaime King,3000.0,198539855.0,Action|Drama|History|Romance|War,Jennifer Garner,254111,5401,Mako,0.0,air raid|black smoke|japanese military|japanese navy|sunday,http://www.imdb.com/title/tt0213149/?ref_=fn_tt_tt_1,1999.0,English,USA,PG-13,140000000.0,2001.0,961.0,6.1,2.35,0
148,Die Another Day,Black and White,Lee Tamahori,264.0,133.0,93.0,746.0,Colin Salmon,769.0,160201106.0,Action|Adventure|Thriller,Toby Stephens,169914,2538,Rick Yune,0.0,catfight|clinic|colonel|diamond|patricide,http://www.imdb.com/title/tt0246460/?ref_=fn_tt_tt_1,1185.0,English,UK,PG-13,142000000.0,2002.0,766.0,6.1,2.35,0
254,The Aviator,Black and White,Martin Scorsese,267.0,170.0,17000.0,827.0,Adam Scott,29000.0,102608827.0,Biography|Drama,Leonardo DiCaprio,264318,34582,Frances Conroy,0.0,1920s|aviation|fight|spruce goose|test flight,http://www.imdb.com/title/tt0338751/?ref_=fn_tt_tt_1,799.0,English,USA,PG-13,110000000.0,2004.0,3000.0,7.5,2.35,0
269,Ali,Black and White,Michael Mann,174.0,165.0,0.0,780.0,Jada Pinkett Smith,10000.0,58183966.0,Biography|Drama|Sport,Will Smith,79186,14196,Joe Morton,1.0,african american protagonist|african americans|boxing gym|gym|rumble in the jungle,http://www.imdb.com/title/tt0248667/?ref_=fn_tt_tt_1,386.0,English,USA,R,107000000.0,2001.0,851.0,6.8,2.35,0
283,Casino Royale,Black and White,Martin Campbell,400.0,144.0,258.0,834.0,Tobias Menzies,6000.0,167007184.0,Action|Adventure|Thriller,Eva Green,470483,9125,Ivana Milicevic,1.0,casino|espionage|free running|james bond|terrorist,http://www.imdb.com/title/tt0381061/?ref_=fn_tt_tt_1,2301.0,English,UK,PG-13,150000000.0,2006.0,1000.0,8.0,2.35,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4879,Mutual Appreciation,Black and White,Andrew Bujalski,52.0,109.0,26.0,3.0,Kate Dollenmayer,26.0,,Comedy,Andrew Bujalski,1578,38,Justin Rice,0.0,friendship|guitarist|mumblecore|musician|new york,http://www.imdb.com/title/tt0446747/?ref_=fn_tt_tt_1,23.0,English,USA,R,,2005.0,6.0,6.9,1.66,91
4882,Clerks,Black and White,Kevin Smith,136.0,102.0,0.0,216.0,Brian O'Halloran,898.0,3151130.0,Comedy,Jason Mewes,181749,2103,Jeff Anderson,4.0,clerk|friend|hockey|video|video store,http://www.imdb.com/title/tt0109445/?ref_=fn_tt_tt_1,615.0,English,USA,R,230000.0,1994.0,657.0,7.8,1.37,0
4888,Slacker,Black and White,Richard Linklater,61.0,100.0,0.0,0.0,Richard Linklater,5.0,1227508.0,Comedy|Drama,Tommy Pallotta,15103,5,Jean Caffeine,0.0,austin texas|moon|pap smear|texas|twenty something,http://www.imdb.com/title/tt0102943/?ref_=fn_tt_tt_1,80.0,English,USA,R,23000.0,1991.0,0.0,7.1,1.37,2000
4895,Stories of Our Lives,Black and White,Jim Chuchu,6.0,60.0,0.0,4.0,Olwenya Maina,147.0,,Drama,Paul Ogola,70,170,Mugambi Nthiga,0.0,,http://www.imdb.com/title/tt3973612/?ref_=fn_tt_tt_1,1.0,Swahili,Kenya,,15000.0,2014.0,19.0,7.4,,45


In [160]:
# 4. 감독 이름에  James가 들어가는 영화조회
movie.query("director_name.notnull()").query("director_name.str.contains('James')")

Unnamed: 0,movie_title,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
0,Avatar,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,Action|Adventure|Fantasy|Sci-Fi,CCH Pounder,886204,4834,Wes Studi,0.0,avatar|future|marine|native|paraplegic,http://www.imdb.com/title/tt0499549/?ref_=fn_tt_tt_1,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
26,Titanic,Color,James Cameron,315.0,194.0,0.0,794.0,Kate Winslet,29000.0,658672302.0,Drama|Romance,Leonardo DiCaprio,793059,45223,Gloria Stuart,0.0,artist|love|ship|titanic|wet,http://www.imdb.com/title/tt0120338/?ref_=fn_tt_tt_1,2528.0,English,USA,PG-13,200000000.0,1997.0,14000.0,7.7,2.35,26000
45,Furious 7,Color,James Wan,424.0,140.0,0.0,14000.0,Paul Walker,26000.0,350034110.0,Action|Crime|Thriller,Jason Statham,278232,79150,Vin Diesel,0.0,car falling off a cliff|hospital|revenge|star died before release|terrorist,http://www.imdb.com/title/tt2820852/?ref_=fn_tt_tt_1,657.0,English,USA,PG-13,190000000.0,2015.0,23000.0,7.2,2.35,94000
95,Guardians of the Galaxy,Color,James Gunn,653.0,121.0,571.0,3000.0,Vin Diesel,14000.0,333130696.0,Action|Adventure|Sci-Fi,Bradley Cooper,682155,32438,Djimon Hounsou,3.0,bounty hunter|outer space|raccoon|talking animal|tree,http://www.imdb.com/title/tt2015381/?ref_=fn_tt_tt_1,1097.0,English,USA,PG-13,170000000.0,2014.0,14000.0,8.1,2.35,96000
106,Alice Through the Looking Glass,Color,James Bobin,218.0,113.0,33.0,11000.0,Alan Rickman,40000.0,76846624.0,Adventure|Family|Fantasy,Johnny Depp,21352,80806,Anne Hathaway,1.0,clock|dark fantasy|mad hatter|queen|sequel,http://www.imdb.com/title/tt2567026/?ref_=fn_tt_tt_1,131.0,English,USA,PG,170000000.0,2016.0,25000.0,6.4,1.85,30000
230,How Do You Know,Color,James L. Brooks,168.0,121.0,274.0,157.0,Domenick Lombardozzi,273.0,30212620.0,Comedy|Drama|Romance,Shelley Conn,35066,993,Teyonah Parris,4.0,embarrassment|falling in love|love|love triangle|revelation,http://www.imdb.com/title/tt1341188/?ref_=fn_tt_tt_1,196.0,English,USA,PG-13,120000000.0,2010.0,216.0,5.3,1.85,0
231,Knight and Day,Color,James Mangold,283.0,117.0,446.0,877.0,Marc Blucas,10000.0,76418654.0,Action|Comedy|Romance,Tom Cruise,148280,12731,Jordi Mollà,0.0,chase|fbi|garage|spy|surveillance,http://www.imdb.com/title/tt1013743/?ref_=fn_tt_tt_1,348.0,English,USA,PG-13,117000000.0,2010.0,973.0,6.3,2.35,11000
236,The Wolverine,Color,James Mangold,440.0,138.0,446.0,929.0,Tao Okamoto,20000.0,132550960.0,Action|Adventure|Sci-Fi|Thriller,Hugh Jackman,328067,23755,Rila Fukushima,1.0,healing power|marvel comics|mecha|regeneration|self healing,http://www.imdb.com/title/tt1430132/?ref_=fn_tt_tt_1,533.0,English,USA,PG-13,120000000.0,2013.0,992.0,6.7,2.35,68000
285,Terminator 2: Judgment Day,Color,James Cameron,210.0,153.0,0.0,539.0,Jenette Goldstein,780.0,204843350.0,Action|Sci-Fi,Joe Morton,744891,2829,S. Epatha Merkerson,0.0,future|liquid metal|multiple cameos|sexy woman|time travel,http://www.imdb.com/title/tt0103064/?ref_=fn_tt_tt_1,983.0,English,USA,R,102000000.0,1991.0,604.0,8.5,2.35,13000
288,True Lies,Color,James Cameron,94.0,141.0,0.0,618.0,Tia Carrere,2000.0,146282411.0,Action|Comedy|Thriller,Jamie Lee Curtis,190439,4528,Tom Arnold,0.0,espionage|secret agent|secret mission|spy|spy film,http://www.imdb.com/title/tt0111503/?ref_=fn_tt_tt_1,351.0,English,USA,R,115000000.0,1994.0,1000.0,7.2,2.35,0
