In [1]:
import pandas as pd
import numpy as np

<div style="font-size:20px; color:pink; margin:20px">
Series (1 次元の値のリスト) を作成する
</div>
<div style="font-size:14px; color:white; margin:20px">
pd.Series() を用いて、1 次元のリスト (Series, シリーズと呼ばれます) を作成します。
</div>

In [6]:
# 数値で構成されるSeriesを作成
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [7]:
# 日付のSeriesを作成
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

<div style="font-size:20px; color:pink; margin:20px">
データフレームを作成する
</div>
<div style="font-size:14px; color:white; margin:20px">
本例では、A～Fの各列に数値、文字列、日付、Numpy の行列などを格納します。
</div>

In [9]:
df = pd.DataFrame({
    'A' : 1.,
    'B' : pd.Timestamp('20130102'),
    'C' : pd.Series(1, index=list(range(4)), dtype='float32'),
    'D' : np.array([3] * 4, dtype='int32'),
    'E' : pd.Categorical(["test", "train", "test", "train"]),
    'F' : 'foo'
})
df

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


<div style="font-size:20px; color:pink; margin:20px">
Numpy の 行列からデータフレームを作成する
</div>
<div style="font-size:14px; color:white; margin:20px">
Numpy で作成した行列をデータフレームに変換することもできます。<br>
本例では、 6 x 4 の 2 次元の行列からデータフレームを作成し、各列に A, B, C, D という名前を付与します。<br>
</div>

In [11]:
matrix = np.random.randn(6,4)
matrix

array([[-2.1834165 ,  0.32500576, -0.22889135, -0.83121517],
       [-0.68752829,  0.09778893,  1.3524384 , -2.42973815],
       [ 0.51005315, -0.89387798, -0.58152641, -0.58137677],
       [ 0.41077997,  0.31005963,  0.03339636, -0.99423712],
       [-0.63529951,  0.63468166,  0.82877527, -1.03321644],
       [ 0.20224643, -1.43946221, -0.14375775, -0.78582368]])

In [12]:
df2 = pd.DataFrame(matrix, columns=list('ABCD'))
df2

Unnamed: 0,A,B,C,D
0,-2.183417,0.325006,-0.228891,-0.831215
1,-0.687528,0.097789,1.352438,-2.429738
2,0.510053,-0.893878,-0.581526,-0.581377
3,0.41078,0.31006,0.033396,-0.994237
4,-0.6353,0.634682,0.828775,-1.033216
5,0.202246,-1.439462,-0.143758,-0.785824


<div style="font-size:20px; color:pink; margin:20px">
ディクショナリからデータフレームを作成する
</div>
<div style="font-size:14px; color:white; margin:20px">
Pythonのディクショナリ(Python以外のプログラミング言語ではハッシュまたは連想配列とも呼ばれます)<br>
からデータフレームを作成には、from_dict() メソッドを利用します。<br>
<br>
</div>

In [14]:
a_values = [1, 2, 3, 4, 5]
b_values = np.random.rand(5)
c_values = ['apple', 'banana', 'strawberry', 'peach', 'orange']
my_dict = {"A": a_values, "B": b_values, "C": c_values}
my_dict

{'A': [1, 2, 3, 4, 5],
 'B': array([ 0.51192913,  0.55827645,  0.61596844,  0.22933609,  0.11208512]),
 'C': ['apple', 'banana', 'strawberry', 'peach', 'orange']}

In [15]:
my_df = pd.DataFrame.from_dict(my_dict)
my_df

Unnamed: 0,A,B,C
0,1,0.511929,apple
1,2,0.558276,banana
2,3,0.615968,strawberry
3,4,0.229336,peach
4,5,0.112085,orange


<div style="font-size:20px; color:pink; margin:20px">
先頭 N 行を表示する
</div>
<div style="font-size:14px; color:white; margin:20px">
head([表示する行数]) メソッドでデータフレームの先頭 N 行を切り出すことができます。<br>
</div>

In [16]:
# データフレームをdfを作成・表示
df = pd.DataFrame(np.random.randn(6,4), columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,1.531085,-0.647203,-0.21348,1.724237
1,1.325995,-0.190189,2.195453,-0.53665
2,0.419661,-1.161331,-1.648185,0.218613
3,-0.023383,2.267437,0.028358,-0.266701
4,-0.616398,1.355307,1.005018,1.153478
5,0.046643,0.958971,-0.443348,0.723653


In [17]:
# 先頭3行を表示
df.head(3)

Unnamed: 0,A,B,C,D
0,1.531085,-0.647203,-0.21348,1.724237
1,1.325995,-0.190189,2.195453,-0.53665
2,0.419661,-1.161331,-1.648185,0.218613


In [19]:
# 行数を省略した場合は、先頭5行表示
df.head()

Unnamed: 0,A,B,C,D
0,1.531085,-0.647203,-0.21348,1.724237
1,1.325995,-0.190189,2.195453,-0.53665
2,0.419661,-1.161331,-1.648185,0.218613
3,-0.023383,2.267437,0.028358,-0.266701
4,-0.616398,1.355307,1.005018,1.153478


<div style="font-size:20px; color:pink; margin:20px">
末尾 N 行を表示する
</div>
<div style="font-size:14px; color:white; margin:20px">
tail([表示する行数]) メソッドでデータフレームの末尾 N 行を切り出すことができます。<br>
</div>

In [20]:
# 末尾2行
df.tail(2)

Unnamed: 0,A,B,C,D
4,-0.616398,1.355307,1.005018,1.153478
5,0.046643,0.958971,-0.443348,0.723653


In [21]:
# 行数を省略した場合は、末尾5行表示
df.tail() 

Unnamed: 0,A,B,C,D
1,1.325995,-0.190189,2.195453,-0.53665
2,0.419661,-1.161331,-1.648185,0.218613
3,-0.023383,2.267437,0.028358,-0.266701
4,-0.616398,1.355307,1.005018,1.153478
5,0.046643,0.958971,-0.443348,0.723653


<div style="font-size:20px; color:pink; margin:20px">
基本統計量を算出する
</div>
<div style="font-size:14px; color:white; margin:20px">
describe()メソッドで、<br>
件数 (count)、<br>
平均値 (mean)、<br>
標準偏差 (std)、<br>
最小値(min)、<br>
第一四分位数 (25%)、<br>
中央値 (50%)、<br>
第三四分位数 (75%)、<br>
最大値 (max)<br>
を確認することができます。<br>
</div>

In [22]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.447267,0.430499,0.153969,0.502772
std,0.831983,1.310688,1.315088,0.862255
min,-0.616398,-1.161331,-1.648185,-0.53665
25%,-0.005877,-0.532949,-0.385881,-0.145372
50%,0.233152,0.384391,-0.092561,0.471133
75%,1.099411,1.256223,0.760853,1.046022
max,1.531085,2.267437,2.195453,1.724237


<div style="font-size:20px; color:pink; margin:20px">
各列の型を確認する
</div>
<div style="font-size:14px; color:white; margin:20px">
作成したデータフレームのdtypesアトリビュートにアクセスすることで、各列の型 (dtype) を確認することができます。<br>
</div>

In [24]:
# df2を作成
df2 = pd.DataFrame({
    'A' : 1.,
    'B' : pd.Timestamp('20130102'),
    'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
    'D' : np.array([3] * 4,dtype='int32'),
    'E' : pd.Categorical(["test","train","test","train"]),
    'F' : 'foo'
})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [25]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

<div style="font-size:20px; color:pink; margin:20px">
列名を表示する
</div>
<div style="font-size:14px; color:white; margin:20px">
データフレームの列名の一覧を取得するには、columns アトリビュートにアクセスします。<br>
</div>

In [27]:
dates = pd.date_range('20130101', periods=6)
df3 = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df3

Unnamed: 0,A,B,C,D
2013-01-01,0.956229,1.075041,0.444721,-0.776505
2013-01-02,0.895689,-1.10637,0.557399,-0.360762
2013-01-03,-0.444959,-0.42426,-0.200484,1.440919
2013-01-04,0.114687,0.758478,-0.232389,-1.33952
2013-01-05,-1.190827,-0.485768,-0.321105,-0.739574
2013-01-06,-0.806677,-0.674954,0.233081,1.016608


In [28]:
df3.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

<div style="font-size:20px; color:pink; margin:20px">
行名 (index) を表示する
</div>
<div style="font-size:14px; color:white; margin:20px">
データフレームの行名 (インデックス) の一覧を取得するには、index アトリビュートにアクセスします。<br>
</div>

In [29]:
df3.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

<div style="font-size:20px; color:pink; margin:20px">
値のみを 2 次元行列として表示する
</div>
<div style="font-size:14px; color:white; margin:20px">
value アトリビュートにアクセスすることで、列名、行名を除いた値のみの行列を取得できます。<br>
</div>

In [30]:
df.values

array([[ 1.53108529, -0.64720292, -0.21347971,  1.72423688],
       [ 1.32599467, -0.19018889,  2.19545318, -0.53665009],
       [ 0.41966088, -1.16133088, -1.64818517,  0.21861308],
       [-0.02338307,  2.26743682,  0.02835842, -0.26670091],
       [-0.61639795,  1.35530738,  1.00501812,  1.15347833],
       [ 0.04664267,  0.95897051, -0.44334841,  0.7236532 ]])

<div style="font-size:20px; color:pink; margin:20px">
行を追加する
</div>
<div style="font-size:14px; color:white; margin:20px">
<br>
</div>

In [31]:
df = pd.DataFrame([[1,2], [3,4]], columns=list('AB'))
df

Unnamed: 0,A,B
0,1,2
1,3,4


In [33]:
df2 = pd.DataFrame([[5,6]], columns=list('AB'))
df2

Unnamed: 0,A,B
0,5,6


In [34]:
df.append(df2)

Unnamed: 0,A,B
0,1,2
1,3,4
0,5,6


<div style="font-size:20px; color:pink; margin:20px">

</div>
<div style="font-size:14px; color:white; margin:20px">
複数行の追加は以下のようにして行います。<br>
</div>

In [35]:
df3 = pd.DataFrame([[5,6], [7,8]], columns=list('AB'))
df3

Unnamed: 0,A,B
0,5,6
1,7,8


In [36]:
df.append(df3)

Unnamed: 0,A,B
0,1,2
1,3,4
0,5,6
1,7,8


<div style="font-size:20px; color:pink; margin:20px">

</div>
<div style="font-size:14px; color:white; margin:20px">
上記の例では、元の行番号のまま追加が行われますが、<br>
ignore_index=True パラメータを指定することで、新たな行番号を割り当てることができます。<br>
</div>

In [38]:
# データフレーム df と df3 を結合 (元の行番号を無視)
df.append(df3, ignore_index=True)

Unnamed: 0,A,B
0,1,2
1,3,4
2,5,6
3,7,8


<div style="font-size:20px; color:pink; margin:20px">
列 (カラム) を追加する
</div>
<div style="font-size:14px; color:white; margin:20px">
作成済みのデータフレームに新しい列名を追加することで、列の追加ができます。<br>
追加するデータは Python のリストや Numpy の行列 (Array) を指定できます。<br>
</div>

In [39]:
df = pd.DataFrame([["0001", "John"], ["0002", "Lily"]], columns=['id', 'name'])
df

Unnamed: 0,id,name
0,1,John
1,2,Lily


In [40]:
# 列"job"を追加
df['job'] = ['Engineer', 'Sales']
df

Unnamed: 0,id,name,job
0,1,John,Engineer
1,2,Lily,Sales


In [41]:
# 列"age"を追加(Numpy Array を追加)
df['age'] = np.array([35, 25])
df

Unnamed: 0,id,name,job,age
0,1,John,Engineer,35
1,2,Lily,Sales,25


<div style="font-size:20px; color:pink; margin:20px">
特定の行を削除する
</div>
<div style="font-size:14px; color:white; margin:20px">
DataFrame.drop() メソッドを利用して、インデックスに基づいて特定の行の削除を行うことができます。<br>
リストを指定して、複数の行を一度に削除することもできます。<br>
</div>

In [42]:
df = pd.DataFrame(np.random.randn(6,4), columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,-1.169756,0.400613,-0.832303,1.061209
1,-0.673447,0.520877,0.487395,0.34407
2,1.519453,-0.522408,0.181654,-0.770757
3,0.790137,0.164818,1.174687,-0.487498
4,1.450296,-0.402057,2.023686,0.886987
5,-0.02542,0.78051,0.420141,-0.232168


In [46]:
# 行5を削除
df.drop(5)

Unnamed: 0,A,B,C,D
0,-1.169756,0.400613,-0.832303,1.061209
1,-0.673447,0.520877,0.487395,0.34407
2,1.519453,-0.522408,0.181654,-0.770757
3,0.790137,0.164818,1.174687,-0.487498
4,1.450296,-0.402057,2.023686,0.886987


In [47]:
# 行3,4を削除
df.drop([3,4])

Unnamed: 0,A,B,C,D
0,-1.169756,0.400613,-0.832303,1.061209
1,-0.673447,0.520877,0.487395,0.34407
2,1.519453,-0.522408,0.181654,-0.770757
5,-0.02542,0.78051,0.420141,-0.232168


<div style="font-size:20px; color:pink; margin:20px">
特定の列を削除する
</div>
<div style="font-size:14px; color:white; margin:20px">
列の削除は行と同様に、DataFrame.drop() メソッドを利用しますが、<br>
引数に、axis=1 を指定し、列の削除であることを指定します。<br>
</div>

In [49]:
# 列Aを削除
df.drop('A', axis=1)

Unnamed: 0,B,C,D
0,0.400613,-0.832303,1.061209
1,0.520877,0.487395,0.34407
2,-0.522408,0.181654,-0.770757
3,0.164818,1.174687,-0.487498
4,-0.402057,2.023686,0.886987
5,0.78051,0.420141,-0.232168


<div style="font-size:20px; color:pink; margin:20px">

</div>
<div style="font-size:14px; color:white; margin:20px">
より簡単な方法として、Python の del ステートメントを利用する方法もあります<br>
</div>

In [57]:
df = pd.DataFrame(np.random.randn(6,4), columns=list('ABCD'))
del df['A']
df

Unnamed: 0,B,C,D
0,-0.725425,-0.077739,0.400627
1,-0.752783,-0.393387,0.080767
2,-0.831585,0.153662,-0.878016
3,0.340381,-0.851347,-0.391232
4,0.010295,-0.085846,-0.352831
5,-1.211167,-0.691728,-2.578156


<div style="font-size:20px; color:pink; margin:20px">
行の長さを確認する
</div>
<div style="font-size:14px; color:white; margin:20px">
index アトリビュートでインデックスの一覧を取得し、len 関数でその長さを求めると行の長さが取得できます。
<br>
</div>

In [58]:
df = pd.DataFrame(np.random.randn(6,4), columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,0.370902,-0.474617,-0.016801,0.591075
1,0.118255,0.436234,-0.000237,0.719359
2,0.668384,-1.417625,0.169167,0.156969
3,-1.382907,0.024202,1.919004,-1.160415
4,-1.747311,2.064187,0.114275,-0.573807
5,-0.853661,-0.397523,-0.219555,0.595659


In [59]:
# 行の長さを取得する
len(df.index)

6

<div style="font-size:20px; color:pink; margin:20px">
列の長さを確認する
</div>
<div style="font-size:14px; color:white; margin:20px">
shape アトリビュートでカラム名の一覧を取得し、len 関数でその長さを求めると列の長さが取得できます。<br>
</div>

In [60]:
# 列の長さを取得する
len(df.columns)

4

<div style="font-size:20px; color:pink; margin:20px">
行と列の長さを確認する
</div>
<div style="font-size:14px; color:white; margin:20px">
shape アトリビュートにアクセスすることで、行と列の長さを配列で取得できます。<br>
</div>

In [62]:
# 行と列の長さを取得する
df.shape

(6, 4)

<div style="font-size:20px; color:pink; margin:20px">
行⇔列を転置する
</div>
<div style="font-size:14px; color:white; margin:20px">
データフレームの T アトリビュートにアクセスすると、<br>
データフレームの縦、横を入れ替えたデータフレームを取得できます。<br>
なお、T は Transpose の頭文字です。<br>
</div>

In [63]:
dates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-2.343168,1.613013,0.452348,-0.292967
2013-01-02,1.019054,0.012202,0.005376,0.328679
2013-01-03,-1.263932,-0.370876,-0.703605,0.405581
2013-01-04,-0.966605,-1.381711,1.439222,-1.249433
2013-01-05,-0.770852,-0.118034,0.142976,0.385132
2013-01-06,-0.018214,-1.139524,0.776272,0.846979


In [64]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-2.343168,1.019054,-1.263932,-0.966605,-0.770852,-0.018214
B,1.613013,0.012202,-0.370876,-1.381711,-0.118034,-1.139524
C,0.452348,0.005376,-0.703605,1.439222,0.142976,0.776272
D,-0.292967,0.328679,0.405581,-1.249433,0.385132,0.846979


<div style="font-size:20px; color:pink; margin:20px">
インデックス (行名・列名) に基づいてソートする
</div>
<div style="font-size:14px; color:white; margin:20px">
sort_index() メソッドを利用して、インデックス（カラム名、行名）に基づいてソートを行うことができます。<br>
ascending=False は、降順にソートすることを示します。なお、ascending=False を省略すると、昇順でのソートとなります。<br>
axis=1 が行方向のソートを意味し、省略した場合は、行名に基づくソートとなります<br>
</div>

In [65]:
dates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.329581,0.929931,-0.005782,-0.942937
2013-01-02,-1.305875,-0.416381,-0.313595,-2.352993
2013-01-03,-1.699662,0.044158,-0.358884,-0.904168
2013-01-04,-0.909263,-0.421862,-0.207443,-0.53354
2013-01-05,1.755351,0.236819,2.109468,1.158924
2013-01-06,0.665263,0.809805,-1.354646,-0.040172


In [67]:
# 行名の降順
df.sort_index(ascending=False)

Unnamed: 0,A,B,C,D
2013-01-06,0.665263,0.809805,-1.354646,-0.040172
2013-01-05,1.755351,0.236819,2.109468,1.158924
2013-01-04,-0.909263,-0.421862,-0.207443,-0.53354
2013-01-03,-1.699662,0.044158,-0.358884,-0.904168
2013-01-02,-1.305875,-0.416381,-0.313595,-2.352993
2013-01-01,-0.329581,0.929931,-0.005782,-0.942937


In [69]:
# カラム名の降順
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.942937,-0.005782,0.929931,-0.329581
2013-01-02,-2.352993,-0.313595,-0.416381,-1.305875
2013-01-03,-0.904168,-0.358884,0.044158,-1.699662
2013-01-04,-0.53354,-0.207443,-0.421862,-0.909263
2013-01-05,1.158924,2.109468,0.236819,1.755351
2013-01-06,-0.040172,-1.354646,0.809805,0.665263


<div style="font-size:20px; color:pink; margin:20px">
値に基づいてソートする
</div>
<div style="font-size:14px; color:white; margin:20px">
sort_values() メソッドを利用して、データフレームを値に基づいて並び替えを行うことができます。<br>
</div>

In [70]:
# B列の昇順
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-04,-0.909263,-0.421862,-0.207443,-0.53354
2013-01-02,-1.305875,-0.416381,-0.313595,-2.352993
2013-01-03,-1.699662,0.044158,-0.358884,-0.904168
2013-01-05,1.755351,0.236819,2.109468,1.158924
2013-01-06,0.665263,0.809805,-1.354646,-0.040172
2013-01-01,-0.329581,0.929931,-0.005782,-0.942937


In [71]:
# C列の降順
df.sort_values(by='C', ascending=False)

Unnamed: 0,A,B,C,D
2013-01-05,1.755351,0.236819,2.109468,1.158924
2013-01-01,-0.329581,0.929931,-0.005782,-0.942937
2013-01-04,-0.909263,-0.421862,-0.207443,-0.53354
2013-01-02,-1.305875,-0.416381,-0.313595,-2.352993
2013-01-03,-1.699662,0.044158,-0.358884,-0.904168
2013-01-06,0.665263,0.809805,-1.354646,-0.040172


<div style="font-size:20px; color:pink; margin:20px">
2 つのデータフレームを結合する
</div>
<div style="font-size:14px; color:white; margin:20px">
以下の例では、merge() メソッドを用いて、2 つのデータフレームを作成し、内部結合 (inner join) を行います。<br>
[Merge メソッドの使い方]<br>
merge(left, right, how='inner', on=None, left_on=None, right_on=None,<br>
      left_index=False, right_index=False, sort=True,<br>
      suffixes=('_x', '_y'), copy=True, indicator=False)<br>
<br>
<p></p>
<p><b>引数</b></p>
<table>
<tr>
<th>left</th>
<td>データフレーム (DataFrame) オブジェクト。</td>
</tr>
<tr>
<th>right</th>
<td>もう一つのデータフレーム (DataFrame) オブジェクト。</td>
</tr>
<tr>
<th>on</th>
<td>結合に用いる行の名前。left と right のデータフレーム両方に存在する必要があります。<br>
値が設定されていなく、かつ left_index と right_index も設定されていない場合は、結合に用いるキーを推測により選択します。</td>
</tr>
<tr>
<th>left_on</th>
<td>left のデータフレームでキーとして用いる列名、または配列。</td>
</tr>
<tr>
<th>right_on</th>
<td>right のデータフレームでキーとして用いる列名、または配列を選択。</td>
</tr>
<tr>
<th>left_index</th>
<td>True を設定すると、left のデータフレームの行ラベルを結合のキーとして用います。<br>
MultiIndex (階層的なインデックス構造) を持つデータフレームの場合、階層数を left と right で合わせる必要があります。</td>
</tr>
<tr>
<th>right_index</th>
<td>left_index と同じです。</td>
</tr>
<tr>
<th>how</th>
<td>&#8216;left&#8217;, &#8216;right&#8217;, &#8216;outer&#8217;, &#8216;inner&#8217; のいずれかを設定。 (デフォルトは &#8220;inner&#8221;)</td>
</tr>
<tr>
<th>sort</th>
<td>True を設定すると、結合後のデータフレームをソートします。(デフォルトは True)</td>
</tr>
<tr>
<th>suffixes</th>
<td>同一のカラム名が存在した場合に、後ろに文字列を追加して区別します。 (デフォルトは &#8216;_x&#8217;, &#8216;_y&#8217;)</td>
</tr>
<tr>
<th>copy</th>
<td>常に与えられたデータフレームをコピーします。場合によっては、False に設定すると、<br>
パフォーマンスやメモリの使用量を向上できる場合があります。 (デフォルトは True)</td>
</tr>
<tr>
<th>indicator</th>
<td>_merge という名前のカラムを出力後のデータフレームに追加し、結合前の行に関する情報を格納します。</td>
</tr>
</table>
<p></p>

</div>

In [72]:
# データフレーム customer (顧客) を作成
customer = pd.DataFrame([["0001", "John"], ["0002", "Lily"]], columns=['customer_id', 'name'])
customer

Unnamed: 0,customer_id,name
0,1,John
1,2,Lily


In [75]:
# データフレーム order (注文) を作成
order = pd.DataFrame(
    [
        ["0001", "Smartphone"],
        ["0001", "Wireless Charger"],
        ["0002", "Wearable watch"]
    ],
    columns=['customer_id', 'product_name']
)
order

Unnamed: 0,customer_id,product_name
0,1,Smartphone
1,1,Wireless Charger
2,2,Wearable watch


In [76]:
pd.merge(customer, order, how='inner', on='customer_id')

Unnamed: 0,customer_id,name,product_name
0,1,John,Smartphone
1,1,John,Wireless Charger
2,2,Lily,Wearable watch


<div style="font-size:20px; color:pink; margin:20px">

</div>
<div style="font-size:14px; color:white; margin:20px">
結合に用いるキーが異なる場合は、left_on, right_on 引数で指定します。<br>
</div>

In [77]:
# データフレーム employee (従業員) を作成
employee = pd.DataFrame(
    [
        ["Miki", "Tokyo"],["Ichiro", "Osaka"]
    ],
    columns=['employee_name', 'office_name']
)
employee

Unnamed: 0,employee_name,office_name
0,Miki,Tokyo
1,Ichiro,Osaka


In [80]:
# データフレーム office (事務所) を作成
office = pd.DataFrame(
    [
        ["Tokyo", "1-2-3 Chiyoda-ku Tokyo"],["Osaka", "3-2-1 Chuo-ku Osaka"]
    ],
    columns=['name', 'address']
)
office

Unnamed: 0,name,address
0,Tokyo,1-2-3 Chiyoda-ku Tokyo
1,Osaka,3-2-1 Chuo-ku Osaka


In [81]:
# employeeとofficeを内部結合
pd.merge(employee, office, how='inner', left_on='office_name', right_on='name')

Unnamed: 0,employee_name,office_name,name,address
0,Miki,Tokyo,Tokyo,1-2-3 Chiyoda-ku Tokyo
1,Ichiro,Osaka,Osaka,3-2-1 Chuo-ku Osaka


<div style="font-size:20px; color:pink; margin:20px">
</div>
<div style="font-size:14px; color:white; margin:20px">
外部結合 (Left join, outer join) を行う際は how 引数にてそれぞれ指定します。
</div>

In [82]:
# データフレーム products (商品) を作成
products = pd.DataFrame(
    [["P-001", "Orange"],["P-002", "Apple"],["P-003", "Blueberry"]],
    columns=['product_id', 'name']
)
products

Unnamed: 0,product_id,name
0,P-001,Orange
1,P-002,Apple
2,P-003,Blueberry


In [83]:
# データフレーム stock (在庫) を作成
stock = pd.DataFrame(
    [["P-001", 10],["P-002", 20],["P-010", 30]],
    columns=['product_id', 'amount']
)
stock

Unnamed: 0,product_id,amount
0,P-001,10
1,P-002,20
2,P-010,30


In [87]:
# left join(left outer join)
pd.merge(products, stock, how='left', on='product_id')

Unnamed: 0,product_id,name,amount
0,P-001,Orange,10.0
1,P-002,Apple,20.0
2,P-003,Blueberry,


In [88]:
# right join(right outer join)
pd.merge(products, stock, how='right', on='product_id')

Unnamed: 0,product_id,name,amount
0,P-001,Orange,10
1,P-002,Apple,20
2,P-010,,30


In [89]:
# outer join(full outer join)
pd.merge(products, stock, how='outer', on='product_id')

Unnamed: 0,product_id,name,amount
0,P-001,Orange,10.0
1,P-002,Apple,20.0
2,P-003,Blueberry,
3,P-010,,30.0


<div style="font-size:20px; color:pink; margin:20px">
2 つのデータフレームを結合する (concat)
</div>
<div style="font-size:14px; color:white; margin:20px">
2 つのデータフレームを縦方向に結合するには、pd.concat メソッドを用いて行えます。<br>
</div>

In [91]:
df1 = pd.DataFrame([["0001", "John"],
                        ["0002", "Lily"]],
                       columns=['id', 'name'])
df1

Unnamed: 0,id,name
0,1,John
1,2,Lily


In [92]:
df2 = pd.DataFrame([["0003", "Chris"],
                        ["0004", "Jessica"]],
                       columns=['id', 'name'])
df2

Unnamed: 0,id,name
0,3,Chris
1,4,Jessica


In [94]:
pd.concat([df1, df2], ignore_index=True)

Unnamed: 0,id,name
0,1,John
1,2,Lily
2,3,Chris
3,4,Jessica


<div style="font-size:20px; color:pink; margin:20px">
Pandas のファイルの読み込み関数
</div>
<div style="font-size:14px; color:white; margin:20px">
CSV ファイルのロード: read_csv()<br>
Pandas には、CSV ファイルをロードする関数として、read_csv() メソッドが用意されています。<br>
<br>
テキストファイルのロード: read_table()<br>
テキストファイルなど、一般的な可変長のテキストファイルを読み込む関数として、read_table()メソッドが用意されています。<br>
<br>
</div>

<div style="font-size:20px; color:pink; margin:20px">

</div>
<div style="font-size:14px; color:white; margin:20px">

<p><b>read_csv()</b> メソッドの書式</p>
<p></p><!-- Crayon Syntax Highlighter v_2.7.2_beta -->

		<div id="crayon-5a3e3d30eb5c4847891295" class="crayon-syntax crayon-theme-github crayon-font-monaco crayon-os-mac print-yes notranslate" data-settings=" no-popup minimize scroll-mouseover" style=" margin-top: 12px; margin-bottom: 12px; font-size: 13px !important; line-height: 16px !important;">
		
			<div class="crayon-toolbar" data-settings=" show" style="font-size: 13px !important;height: 19.5px !important; line-height: 19.5px !important;"><span class="crayon-title"></span>
			<div class="crayon-tools" style="font-size: 13px !important;height: 19.5px !important; line-height: 19.5px !important;"><div class="crayon-button crayon-wrap-button" title="Toggle Line Wrap"><div class="crayon-button-icon"></div></div><span class="crayon-language">Python</span></div></div>
			<div class="crayon-info" style="min-height: 18.2px !important; line-height: 18.2px !important;"></div>
			<div class="crayon-plain-wrap"></div>
			<div class="crayon-main" style="">
				<table class="crayon-table">
					<tr class="crayon-row">
				<td class="crayon-nums " data-settings="show">
					<div class="crayon-nums-content" style="font-size: 13px !important; line-height: 16px !important;"><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-1">1</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-2">2</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-3">3</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-4">4</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-5">5</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-6">6</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-7">7</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-8">8</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-9">9</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-10">10</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-11">11</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-12">12</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-13">13</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-14">14</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-15">15</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5c4847891295-16">16</div></div>
				</td>
						<td class="crayon-code"><div class="crayon-pre" style="font-size: 13px !important; line-height: 16px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;"><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-1"><span class="crayon-v">pandas</span><span class="crayon-sy">.</span><span class="crayon-e">read_csv</span><span class="crayon-sy">(</span><span class="crayon-v">filepath_or_buffer</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">sep</span><span class="crayon-o">=</span><span class="crayon-s">', '</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">delimiter</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">header</span><span class="crayon-o">=</span><span class="crayon-s">'infer'</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-2"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">names</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">index_col</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">usecols</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">squeeze</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-3"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">prefix</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">mangle_dupe_cols</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">dtype</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">engine</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-4"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">converters</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">true_values</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">false_values</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-5"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">skipinitialspace</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">skiprows</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">skipfooter</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-6"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">nrows</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">na_values</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">keep_default_na</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">na_filter</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-7"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">verbose</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">skip_blank_lines</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">parse_dates</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-8"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">infer_datetime_format</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">keep_date_col</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-9"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">date_parser</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">dayfirst</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">iterator</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">chunksize</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-10"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">compression</span><span class="crayon-o">=</span><span class="crayon-s">'infer'</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">thousands</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-k ">decimal</span><span class="crayon-o">=</span><span class="crayon-s">'.'</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-11"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">lineterminator</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">quotechar</span><span class="crayon-o">=</span><span class="crayon-s">'"'</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">quoting</span><span class="crayon-o">=</span><span class="crayon-cn">0</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">escapechar</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-12"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">comment</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">encoding</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">dialect</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">tupleize_cols</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-13"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">error_bad_lines</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">warn_bad_lines</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">skip_footer</span><span class="crayon-o">=</span><span class="crayon-cn">0</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-14"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">doublequote</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">delim_whitespace</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">as_recarray</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-15"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">compact_ints</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">use_unsigned</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">low_memory</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5c4847891295-16"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">buffer_lines</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">memory_map</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">float_precision</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">)</span></div></div></td>
					</tr>
				</table>
			</div>
		</div>
<!-- [Format Time: 0.0109 seconds] -->
<p></p>
<p><b>read_table()</b> メソッドの書式</p>
<p><code>read_csv()</code> と<code>read_table()</code> の違いは、区切り文字がカンマ (,)になっているか、タブ (\t) になっているかです。</p>
<p></p><!-- Crayon Syntax Highlighter v_2.7.2_beta -->

		<div id="crayon-5a3e3d30eb5cf312273984" class="crayon-syntax crayon-theme-github crayon-font-monaco crayon-os-mac print-yes notranslate" data-settings=" no-popup minimize scroll-mouseover" style=" margin-top: 12px; margin-bottom: 12px; font-size: 13px !important; line-height: 16px !important;">
		
			<div class="crayon-toolbar" data-settings=" show" style="font-size: 13px !important;height: 19.5px !important; line-height: 19.5px !important;"><span class="crayon-title"></span>
			<div class="crayon-tools" style="font-size: 13px !important;height: 19.5px !important; line-height: 19.5px !important;"><div class="crayon-button crayon-wrap-button" title="Toggle Line Wrap"><div class="crayon-button-icon"></div></div><span class="crayon-language">Python</span></div></div>
			<div class="crayon-info" style="min-height: 18.2px !important; line-height: 18.2px !important;"></div>
			<div class="crayon-plain-wrap"></div>
			<div class="crayon-main" style="">
				<table class="crayon-table">
					<tr class="crayon-row">
				<td class="crayon-nums " data-settings="show">
					<div class="crayon-nums-content" style="font-size: 13px !important; line-height: 16px !important;"><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-1">1</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-2">2</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-3">3</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-4">4</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-5">5</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-6">6</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-7">7</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-8">8</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-9">9</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-10">10</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-11">11</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-12">12</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-13">13</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-14">14</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-15">15</div><div class="crayon-num" data-line="crayon-5a3e3d30eb5cf312273984-16">16</div></div>
				</td>
						<td class="crayon-code"><div class="crayon-pre" style="font-size: 13px !important; line-height: 16px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;"><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-1"><span class="crayon-v">pandas</span><span class="crayon-sy">.</span><span class="crayon-e">read_csv</span><span class="crayon-sy">(</span><span class="crayon-v">filepath_or_buffer</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">sep</span><span class="crayon-o">=</span><span class="crayon-s">', '</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">delimiter</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">header</span><span class="crayon-o">=</span><span class="crayon-s">'infer'</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-2"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">names</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">index_col</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-v">usecols</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">squeeze</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-3"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">prefix</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">mangle_dupe_cols</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">dtype</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">engine</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-4"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">converters</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">true_values</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">false_values</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-5"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">skipinitialspace</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">skiprows</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">skipfooter</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-6"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">nrows</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">na_values</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">keep_default_na</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">na_filter</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-7"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">verbose</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">skip_blank_lines</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span><span class="crayon-v">parse_dates</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-8"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">infer_datetime_format</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">keep_date_col</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-9"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">date_parser</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">dayfirst</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-v">iterator</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">chunksize</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-10"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">compression</span><span class="crayon-o">=</span><span class="crayon-s">'infer'</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">thousands</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-k ">decimal</span><span class="crayon-o">=</span><span class="crayon-s">'.'</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-11"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">lineterminator</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-v">quotechar</span><span class="crayon-o">=</span><span class="crayon-s">'"'</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">quoting</span><span class="crayon-o">=</span><span class="crayon-cn">0</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">escapechar</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-12"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">comment</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">encoding</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">dialect</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">tupleize_cols</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-13"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">error_bad_lines</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">warn_bad_lines</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">skip_footer</span><span class="crayon-o">=</span><span class="crayon-cn">0</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-14"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">doublequote</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">delim_whitespace</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-v">as_recarray</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-15"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">compact_ints</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">use_unsigned</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">low_memory</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e3d30eb5cf312273984-16"> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span> <span class="crayon-h"> </span><span class="crayon-v">buffer_lines</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-v">memory_map</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">float_precision</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">)</span></div></div></td>
					</tr>
				</table>
			</div>
		</div>
<!-- [Format Time: 0.0077 seconds] -->
<p></p>
</div>

<div style="font-size:20px; color:pink; margin:20px">
</div>
<div style="font-size:14px; color:white; margin:20px">

<p><b>主要な引数</b></p>
<p><code>read_csv()</code> , <code>read_csv()</code> 共通で利用可能な引数のうち、主要なものを紹介します。</p>
<table>
<tr>
<th>filepath_or_buffer</th>
<td>読み込み元のファイルのパスや URL を指定。</td>
</tr>
<tr>
<th>sep</th>
<td>区切り文字。 (デフォルト: &#8216;,&#8217; (read.csv) / &#8216;\t&#8217; (read.table) ) </td>
</tr>
<tr>
<th>delimiter</th>
<td>sep の代わりに delimiter 引数でも区切り文字を指定可能。 (デフォルト: None) </td>
</tr>
<tr>
<th>header</th>
<td>ヘッダ行の行数を整数で指定。 (デフォルト: &#8216;infer&#8217;) </td>
</tr>
<tr>
<th>names</th>
<td>ヘッダ行をリストで指定。 (デフォルト: None) </td>
</tr>
<tr>
<th>index_col</th>
<td>行のインデックスに用いる列番号。 (デフォルト: None) </td>
</tr>
<tr>
<th>dtype</th>
<td>各行のデータタイプ。例: {‘a’: np.float64, ‘b’: np.int32}  (デフォルト: None) </td>
</tr>
<tr>
<th>skiprows</th>
<td>先頭から読み込みをスキップする行数。 (デフォルト: None) </td>
</tr>
<tr>
<th>skipfooter</th>
<td>末尾から読み込みをスキップする行数。 (デフォルト: None) </td>
</tr>
<tr>
<th>nrows</th>
<td>読み込む行数。 (デフォルト: None) </td>
</tr>
<tr>
<th>quotechar</th>
<td>ダブルクォーテーションなどでクオートされている場合のクオート文字。 (デフォルト: &#8216;&#8221;&#8216;) </td>
</tr>
<tr>
<th>escapechar</th>
<td>エスケープされている場合のエスケープ文字。 (デフォルト: None) </td>
</tr>
<tr>
<th>comment</th>
<td>コメント行の行頭文字を指定。指定した文字で始まる行は無視されます。 (デフォルト: None) </td>
</tr>
<tr>
<th>encoding</th>
<td>文字コード。&#8217;utf-8&#8242;, &#8216;shift_jis&#8217;, &#8216;euc_jp&#8217; などを指定。参考: <a href="https://docs.python.org/3/library/codecs.html#standard-encodings" target="_blank">Python の文字コードの一覧</a> (デフォルト: None) </td>
</tr>
</table>

</div>

<div style="font-size:20px; color:pink; margin:20px">
CSV ファイル / テキストファイル の読み込み例 (ローカルファイル)
</div>
<div style="font-size:14px; color:white; margin:20px">
事前に用意したファイルを読み込むには、Pythonファイルと同じフォルダにファイルを配置し、ファイル名を直接指定します。<br>
</div>

In [96]:
dataset1 = pd.read_csv("sample_dataset.csv")
dataset1

Unnamed: 0,ID,Name,Birthdate,Sex,Occupation,Salary
0,ID-0001,Abe,1985/1/1,M,Engineer,8422213
1,ID-0002,Saito,1970/2/11,F,Professor,8222588
2,ID-0003,Yamada,1975/3/21,M,Doctor,9845288
3,ID-0004,Tanaka,1980/4/22,F,Sales,8505218
4,ID-0005,Okamoto,1995/5/25,M,Student,218103


In [100]:
dataset2 = pd.read_table("sample_dataset.txt")
dataset2

Unnamed: 0,ID,Name,Birthdate,Sex,Occupation,Salary
0,ID-0001,Abe,1985/1/1,M,Engineer,8422213
1,ID-0002,Saito,1970/2/11,F,Professor,8222588
2,ID-0003,Yamada,1975/3/21,M,Doctor,9845288
3,ID-0004,Tanaka,1980/4/22,F,Sales,8505218
4,ID-0005,Okamoto,1995/5/25,M,Student,218103


<div style="font-size:20px; color:pink; margin:20px">

</div>
<div style="font-size:14px; color:white; margin:20px">
コードの例 (日本語を含むファイルを読み込む例)<br>
日本語や韓国語、中国語などのマルチバイト文字を含むファイルを読み込む場合は、<br>
引数に encoding="<文字コード>" を指定することで正しく文字化けしない状態で読み込むことができます。<br>
Python で使える文字コードの一覧は 7.2.3. Standard Encodings にあります。
</div>

In [101]:
dataset3 = pd.read_csv("sample_dataset.utf8.csv", encoding="utf_8")
dataset3

FileNotFoundError: File b'sample_dataset.utf8.csv' does not exist

In [102]:
dataset4 = pd.read_csv("sample_dataset.sjis.csv", encoding="shift_jis")
dataset4

FileNotFoundError: File b'sample_dataset.sjis.csv' does not exist

In [103]:
dataset5 = pd.read_table("sample_dataset.eucjp.txt", encoding="euc_jp")
dataset5

FileNotFoundError: File b'sample_dataset.eucjp.txt' does not exist

<div style="font-size:20px; color:pink; margin:20px">
CSV ファイル / テキストファイルの読み込み例 (URL を指定)
</div>
<div style="font-size:14px; color:white; margin:20px">
インターネット上に配置されたファイルを読み込むことも可能です。<br>
本例では、当サイトにアップロード済みのCSV ファイルやテキストファイルを読み込みます。<br>
</div>

In [104]:
dataset3 = pd.read_csv("http://pythondatascience.plavox.info/wp-content/uploads/2016/05/sample_dataset.csv")
dataset3

Unnamed: 0,ID,Name,Birthdate,Sex,Occupation,Salary
0,ID-0001,Abe,1985/1/1,M,Engineer,8422213
1,ID-0002,Saito,1970/2/11,F,Professor,8222588
2,ID-0003,Yamada,1975/3/21,M,Doctor,9845288
3,ID-0004,Tanaka,1980/4/22,F,Sales,8505218
4,ID-0005,Okamoto,1995/5/25,M,Student,218103


In [105]:
dataset4 = pd.read_table("http://pythondatascience.plavox.info/wp-content/uploads/2016/05/sample_dataset.txt")
dataset4

Unnamed: 0,ID,Name,Birthdate,Sex,Occupation,Salary
0,ID-0001,Abe,1985/1/1,M,Engineer,8422213
1,ID-0002,Saito,1970/2/11,F,Professor,8222588
2,ID-0003,Yamada,1975/3/21,M,Doctor,9845288
3,ID-0004,Tanaka,1980/4/22,F,Sales,8505218
4,ID-0005,Okamoto,1995/5/25,M,Student,218103


<div style="font-size:20px; color:pink; margin:20px">
CSV ファイルとして出力する: DataFrame.to_csv() メソッド
</div>
<div style="font-size:14px; color:white; margin:20px">
Pandas には、CSV ファイルとして出力するメソッドとして、DataFrame.to_csv() メソッドが存在します。<br>
また、この際、区切り文字を CSV ファイルで用いるカンマ (,) から タブ (\t) などへ置き換えることで、<br>
テキストファイルとして出力する事もできます。<br>
</div>

<div style="font-size:20px; color:pink; margin:20px">

</div>
<div style="font-size:14px; color:white; margin:20px">
<p><b>DataFrame.to_csv メソッドの書式と引数 (オプション)</b></p>
<p></p><!-- Crayon Syntax Highlighter v_2.7.2_beta -->

		<div id="crayon-5a3e415924ecb494565375" class="crayon-syntax crayon-theme-github crayon-font-monaco crayon-os-mac print-yes notranslate" data-settings=" no-popup minimize scroll-mouseover" style=" margin-top: 12px; margin-bottom: 12px; font-size: 13px !important; line-height: 16px !important;">
		
			<div class="crayon-toolbar" data-settings=" show" style="font-size: 13px !important;height: 19.5px !important; line-height: 19.5px !important;"><span class="crayon-title"></span>
			<div class="crayon-tools" style="font-size: 13px !important;height: 19.5px !important; line-height: 19.5px !important;"><div class="crayon-button crayon-wrap-button" title="Toggle Line Wrap"><div class="crayon-button-icon"></div></div><span class="crayon-language">Python</span></div></div>
			<div class="crayon-info" style="min-height: 18.2px !important; line-height: 18.2px !important;"></div>
			<div class="crayon-plain-wrap"></div>
			<div class="crayon-main" style="">
				<table class="crayon-table">
					<tr class="crayon-row">
				<td class="crayon-nums " data-settings="show">
					<div class="crayon-nums-content" style="font-size: 13px !important; line-height: 16px !important;"><div class="crayon-num" data-line="crayon-5a3e415924ecb494565375-1">1</div><div class="crayon-num" data-line="crayon-5a3e415924ecb494565375-2">2</div><div class="crayon-num" data-line="crayon-5a3e415924ecb494565375-3">3</div><div class="crayon-num" data-line="crayon-5a3e415924ecb494565375-4">4</div></div>
				</td>
						<td class="crayon-code"><div class="crayon-pre" style="font-size: 13px !important; line-height: 16px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;"><div class="crayon-line" id="crayon-5a3e415924ecb494565375-1"><span class="crayon-v">DataFrame</span><span class="crayon-sy">.</span><span class="crayon-e">to_csv</span><span class="crayon-sy">(</span><span class="crayon-v">path_or_buf</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">sep</span><span class="crayon-o">=</span><span class="crayon-s">', '</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">na_rep</span><span class="crayon-o">=</span><span class="crayon-s">''</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">float_format</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">columns</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">header</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e415924ecb494565375-2"><span class="crayon-h">&nbsp;&nbsp;</span><span class="crayon-v">index</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">index_label</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">mode</span><span class="crayon-o">=</span><span class="crayon-s">'w'</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">encoding</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">compression</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">quoting</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e415924ecb494565375-3"><span class="crayon-h">&nbsp;&nbsp;</span><span class="crayon-v">quotechar</span><span class="crayon-o">=</span><span class="crayon-s">'"'</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">line_terminator</span><span class="crayon-o">=</span><span class="crayon-s">'\n'</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">chunksize</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">tupleize_cols</span><span class="crayon-o">=</span><span class="crayon-t">False</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">date_format</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span></div><div class="crayon-line" id="crayon-5a3e415924ecb494565375-4"><span class="crayon-h">&nbsp;&nbsp;</span><span class="crayon-v">doublequote</span><span class="crayon-o">=</span><span class="crayon-t">True</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-v">escapechar</span><span class="crayon-o">=</span><span class="crayon-t">None</span><span class="crayon-sy">,</span><span class="crayon-h"> </span><span class="crayon-k ">decimal</span><span class="crayon-o">=</span><span class="crayon-s">'.'</span><span class="crayon-sy">)</span></div></div></td>
					</tr>
				</table>
			</div>
		</div>
<!-- [Format Time: 0.0053 seconds] -->
<p></p>
<p>引数を指定することで、出力するファイル名のほか、区切り文字などのフォーマット、列名、行名の出力有無を指定可能です。以下に主な引数を紹介します。</p>
<table>
<tr>
<th>path_or_buf</th>
<td>出力するファイル名。省略した場合は、コンソール上に文字列として出力されます。</td>
</tr>
<tr>
<th>sep</th>
<td>区切り文字 (デフォルト: , (カンマ) )</td>
</tr>
<tr>
<th>index</th>
<td>行名を出力するかどうか。Falseを指定した場合、行名は出力されません。(デフォルト: True)</td>
</tr>
<tr>
<th>encoding</th>
<td>出力する際の文字コード。&#8217;utf-8&#8242;, &#8216;shift_jis&#8217;, &#8216;euc_jp&#8217; などを指定。参考: <a href="https://docs.python.org/3/library/codecs.html#standard-encodings" target="_blank">Python の文字コードの一覧</a> (デフォルト: Python 3 の場合 &#8220;utf-8&#8221;, Python 2 の場合 &#8220;ascii&#8221;)</td>
</tr>
</table>
<hr></div>

<div style="font-size:20px; color:pink; margin:20px">
コード例
</div>
<div style="font-size:14px; color:white; margin:20px">
以下に実際に作成した Pandas のデータフレームをファイルとして出力するコードの例を紹介します。<br>
</div>

In [None]:
df = pd.DataFrame([
  ["0001", "John", "Engineer"],
  ["0002", "Lily", "Sales"]],
  columns=['id', 'name', 'job'])

# CSV ファイル (employee.csv) として出力
df.to_csv("employee.csv")

In [107]:
# テキストファイル (employee.txt) として出力
df.to_csv("employee.txt", sep=",")

<div style="font-size:20px; color:pink; margin:20px">

</div>
<div style="font-size:14px; color:white; margin:20px">
コード例 (日本語を含む場合)<br>
日本語の文字列を含んだデータセットを出力する場合は以下のように、encoding="<文字コード名>" を引数に指定します。<br>
<br>
Windows 版のExcel で読み込みできる形式で出力する場合は、以下のように、シフト JIS 形式で出力する必要があります。<br>
Python で使える文字コードの一覧は 7.2.3. Standard Encodings にあります。<br>
</div>

In [108]:
# データフレームを作成
df = pd.DataFrame([
  ["1001", "山田 裕司", "エンジニア"],
  ["1002", "佐々木 美紀", "営業"]],
  columns=['id', 'name', 'job'])
 
# Shift-JIS 形式の CSV ファイル (employee.sjis.csv) して出力
df.to_csv("employee.sjis.csv", encoding="shift_jis")