## pandasのデータ構造
* Series(シリーズ)
    * 一次元のデータ構造
* DataFrame(データフレーム)
    * 二次元のデータ構造
* 階層インデックス
    * より高次元のデータを扱うとき
    
## データフレームの作成(初期化)

pandasをロードし、pdという名前でアクセスできるようにする
```
import pandas as pd
```

* データフレームとして扱いたいデータを `pd.DataFrame` に渡す
    * 戻り値はデータフレームのオブジェクト

In [26]:
import pandas as pd

df = pd.DataFrame([1,2,3], columns=['value'])
df

Unnamed: 0,value
0,1
1,2
2,3


## データフレームにタプルのリストを渡す

In [4]:
pd.DataFrame([('apple',160),('orange',30),('kiwi',80)], columns=['name', 'price'])

Unnamed: 0,name,price
0,apple,160
1,orange,30
2,kiwi,80


## データフレームにディクショナリのリストを渡す

In [28]:
data = {
    'fruit' : ['apple', 'orange', 'kiwi'],
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data)
df

Unnamed: 0,fruit,order,price
0,apple,12,160
1,orange,30,30
2,kiwi,8,80


## 後から列を追加する

In [27]:
data = {
    'fruit' : ['apple', 'orange', 'kiwi'],
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data)
df['total'] = df['order'] * df['price']
df

Unnamed: 0,fruit,order,price,total
0,apple,12,160,1920
1,orange,30,30,900
2,kiwi,8,80,640


## インデックスを指定する

* デフォルトは行の検索は0から始まる数値
    * 任意の値をインデックスにセットできる

In [18]:
data = {
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data, index=['apple', 'orange', 'kiwi'])
df

Unnamed: 0,order,price
apple,12,160
orange,30,30
kiwi,8,80


In [15]:
data = {
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data, index=['apple', 'orange', 'kiwi'])
df.ix['kiwi']

order     8
price    80
Name: kiwi, dtype: int64

In [21]:
data = {
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data, index=['apple', 'orange', 'kiwi'])
df.shape

(3, 2)

In [29]:
data = {
    'fruit' : ['apple', 'orange', 'kiwi'],
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data)
df[['fruit','order']]

Unnamed: 0,fruit,order
0,apple,12
1,orange,30
2,kiwi,8


In [31]:
data = {
    'fruit' : ['apple', 'orange', 'kiwi'],
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data)
df.head(2)

Unnamed: 0,fruit,order,price
0,apple,12,160
1,orange,30,30


In [33]:
data = {
    'fruit' : ['apple', 'orange', 'kiwi'],
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data)
df.tail(2)

Unnamed: 0,fruit,order,price
1,orange,30,30
2,kiwi,8,80


In [45]:
data = {
    'fruit' : ['apple', 'orange', 'kiwi'],
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data)
df[0:2]

Unnamed: 0,fruit,order,price
0,apple,12,160
1,orange,30,30


In [47]:
data = {
    'fruit' : ['apple', 'orange', 'kiwi'],
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data)
df[1:]

Unnamed: 0,fruit,order,price
1,orange,30,30
2,kiwi,8,80


In [49]:
data = {
    'fruit' : ['apple', 'orange', 'kiwi'],
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data)
df.loc[[0,2]]

Unnamed: 0,fruit,order,price
0,apple,12,160
2,kiwi,8,80


In [55]:
data = {
    'fruit' : ['apple', 'orange', 'kiwi'],
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data)
df.sample(n=2)

Unnamed: 0,fruit,order,price
0,apple,12,160
2,kiwi,8,80


In [57]:
data = {
    'fruit' : ['apple', 'orange', 'kiwi'],
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data)
df[df.price > 50]

Unnamed: 0,fruit,order,price
0,apple,12,160
2,kiwi,8,80


In [60]:
data = {
    'fruit' : ['apple', 'orange', 'kiwi'],
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data)
df.where(df.price > 50)

Unnamed: 0,fruit,order,price
0,apple,12.0,160.0
1,,,
2,kiwi,8.0,80.0


In [63]:
data = {
    'fruit' : ['apple', 'orange', 'kiwi'],
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data)
df.where(df.price > 50).dropna()

Unnamed: 0,fruit,order,price
0,apple,12.0,160.0
2,kiwi,8.0,80.0


In [66]:
data = {
    'fruit' : ['apple', 'orange', 'kiwi'],
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data)
df.where(df.price > 50).fillna(df.mean())

Unnamed: 0,fruit,order,price
0,apple,12.0,160.0
1,,16.666667,90.0
2,kiwi,8.0,80.0


In [3]:
data = {
    'fruit' : ['apple', 'orange', 'kiwi'],
    'price' : [160,30,80],
    'order' : [12,30,8]
}

df = pd.DataFrame(data)
df.apply(lambda x: x.fruit.upper(), axis=1)

0     APPLE
1    ORANGE
2      KIWI
dtype: object

In [11]:
df = pd.read_csv('./ruby_1.csv')
df

Unnamed: 0,name,ruby,php,python,perl
0,A,100,40,70,80
1,B,60,90,80,10
2,C,90,60,60,60
3,D,80,70,70,80


In [14]:
sort_by_ruby = df.sort_values(by='ruby', ascending=False)
#sort_by_ruby = df.sort_values(by='ruby', ascending=True)
sort_by_ruby[['name', 'ruby']]

Unnamed: 0,name,ruby
0,A,100
2,C,90
3,D,80
1,B,60


In [17]:
df['mean'] = df.mean(axis=1)
df

Unnamed: 0,name,ruby,php,python,perl,mean
0,A,100,40,70,80,72.5
1,B,60,90,80,10,60.0
2,C,90,60,60,60,67.5
3,D,80,70,70,80,75.0


In [22]:
sort_by_mean = df.sort_values(by=['mean','ruby'], ascending=False)
sort_by_mean[['name', 'mean', 'ruby']]

Unnamed: 0,name,mean,ruby
3,D,75.0,80
0,A,72.5,100
2,C,67.5,90
1,B,60.0,60
