In [9]:
# 番号による参照
"""
インデックス、カラムの番号で、DataFrame型のデータを参照する場合はilocを使う
df.iloc['行番号のリスト', '列番号のリスト']
スライス機能を使うこともできる
"""
import pandas as pd

data = {
    'fruits': ['apple', 'orange', 'banana', 'strawberry', 'kiwifruit'],
    'time': [1, 2, 3, 4, 5],
    'year': [2000, 2001, 2002, 2003, 2004]
}
df = pd.DataFrame(data)
print(df)

       fruits  time  year
0       apple     1  2000
1      orange     2  2001
2      banana     3  2002
3  strawberry     4  2003
4   kiwifruit     5  2004


In [11]:
df = df.iloc[[1, 3], [0, 2]]
print(df)

       fruits  year
1      orange  2001
3  strawberry  2003


In [12]:
import pandas as pd
import numpy as np

np.random.seed(0)
columns = ['apple', 'orange', 'banana', 'kiwifruit', 'strawberry']

# DataFrameを生成し、列を追加
df = pd.DataFrame()
for column in columns:
    df[column] = np.random.choice(range(1, 11), 10)
df.index = range(1, 11)
print(df)

    apple  orange  banana  kiwifruit  strawberry
1       6       8       6          3          10
2       1       7      10          4          10
3       4       9       9          9           1
4       4       9      10          2           5
5       8       2       5          4           8
6      10       7       4          4           4
7       4       8       1          4           3
8       6       8       4          8           8
9       3       9       6          1           3
10      5       2       1          2           1


In [14]:
df = df.iloc[1:5, [0, 3]]
print(df)

   apple  kiwifruit
2      1          4
3      4          9
4      4          2
5      8          4


In [16]:
# 行または列の削除
"""
df.drop()
インデックスまたはカラムを指定することで、
該当する行または列を削除したDataFrameを生成する
  - 行と列は同時に削除できない
  - 列を削除する場合は第二引数にaxis=1を指定する
"""
import pandas as pd
data = {"fruits": ["apple", "orange", "banana", "strawberry", "kiwifruit"],
        "time": [1, 4, 5, 6, 3],
        "year": [2001, 2002, 2001, 2008, 2006]}
df = pd.DataFrame(data)
print(df)


       fruits  time  year
0       apple     1  2001
1      orange     4  2002
2      banana     5  2001
3  strawberry     6  2008
4   kiwifruit     3  2006


In [17]:
df_1 = df.drop(range(2, 4))
print(df_1)

      fruits  time  year
0      apple     1  2001
1     orange     4  2002
4  kiwifruit     3  2006


In [18]:
df_2 = df.drop('year', axis=1)
print(df_2)

       fruits  time
0       apple     1
1      orange     4
2      banana     5
3  strawberry     6
4   kiwifruit     3


In [21]:
import pandas as pd
import numpy as np

columns = {'apple', 'banana', 'grape', 'mango', 'peach'}
df = pd.DataFrame()
for column in columns:
    df[column] = np.random.choice(range(1, 11), 10)
df.index = range(1, 11)
print(df)

    mango  banana  apple  peach  grape
1       1       5      6      1      3
2       4       5     10      4     10
3       6       9      4      3      3
4      10       5      1      1      4
5       5       4      6      8      4
6       5       8      1      6      3
7       7       6      2     10      4
8       5       6      3      1      5
9       5       1      5      3      2
10      4       2      3      8      3


In [23]:
df = df.drop(np.arange(2, 11, 2))
print(df)

   mango  banana  apple  peach  grape
1      1       5      6      1      3
3      6       9      4      3      3
5      5       4      6      8      4
7      7       6      2     10      4
9      5       1      5      3      2


In [25]:
df = df.drop('mango', axis=1)
print(df)

   banana  apple  peach  grape
1       5      6      1      3
3       9      4      3      3
5       4      6      8      4
7       6      2     10      4
9       1      5      3      2


In [27]:
# ソート
"""
df.sort_values(by='カラムのリスト', ascending=True)
  - 列の値が昇順(小さい順)
ascending=False
  - 降順(大きい順)
"""
import pandas as pd

data = {
    'fruits': ['apple', 'grape', 'orange', 'peach', 'mango'],
    'time': [1, 5, 2, 7, 4],
    'year': [2000, 2001, 2002, 2003, 2004]
}
df = pd.DataFrame(data)
print(df)

   fruits  time  year
0   apple     1  2000
1   grape     5  2001
2  orange     2  2002
3   peach     7  2003
4   mango     4  2004


In [30]:
df = df.sort_values(by='time', ascending=True)
print(df)

   fruits  time  year
0   apple     1  2000
2  orange     2  2002
4   mango     4  2004
1   grape     5  2001
3   peach     7  2003


In [34]:
import pandas as pd
import numpy as np

columns = ['apple', 'mango', 'orange', 'grape', 'peach']
df = pd.DataFrame()
for column in columns:
    df[column] = np.random.choice(range(1, 11), 10)
df.index = range(1, 11)
print(df)

    apple  mango  orange  grape  peach
1       7      8       8     10      3
2       9      1       6      8      3
3       9      9       4      4      9
4       4      5       5      3      2
5       3      7       6      4      6
6       4      6       4     10      9
7       7      9       4      8      5
8       4      3       8      8      1
9       7      4      10      6      3
10      6     10      10      2      6


In [35]:
df = df.sort_values(columns)
print(df)

    apple  mango  orange  grape  peach
5       3      7       6      4      6
8       4      3       8      8      1
4       4      5       5      3      2
6       4      6       4     10      9
10      6     10      10      2      6
9       7      4      10      6      3
1       7      8       8     10      3
7       7      9       4      8      5
2       9      1       6      8      3
3       9      9       4      4      9


In [36]:
df = df.sort_values(by='mango', ascending=True)
print(df)

    apple  mango  orange  grape  peach
2       9      1       6      8      3
8       4      3       8      8      1
9       7      4      10      6      3
4       4      5       5      3      2
6       4      6       4     10      9
5       3      7       6      4      6
1       7      8       8     10      3
7       7      9       4      8      5
3       9      9       4      4      9
10      6     10      10      2      6


In [39]:
# フィルタリング
import pandas as pd

data = {"fruits": ["apple", "orange", "banana", "strawberry", "kiwifruit"],
        "year": [2001, 2002, 2001, 2008, 2006],
        "time": [1, 4, 5, 6, 3]}

df = pd.DataFrame(data)
print(df)

       fruits  time  year
0       apple     1  2001
1      orange     4  2002
2      banana     5  2001
3  strawberry     6  2008
4   kiwifruit     3  2006


In [40]:
print(df.index % 2 == 0)

[ True False  True False  True]


In [42]:
print(df[df.index % 2 == 0])

      fruits  time  year
0      apple     1  2001
2     banana     5  2001
4  kiwifruit     3  2006


In [45]:
import pandas as pd
import numpy as np

np.random.seed(0)
columns = ['apple', 'mango', 'peach', 'orange', 'grape']
df = pd.DataFrame()
for column in columns:
    df[column] = np.random.choice(range(1, 11), 10)
df.index = range(1, 11)
print(df)

    apple  mango  peach  orange  grape
1       6      8      6       3     10
2       1      7     10       4     10
3       4      9      9       9      1
4       4      9     10       2      5
5       8      2      5       4      8
6      10      7      4       4      4
7       4      8      1       4      3
8       6      8      4       8      8
9       3      9      6       1      3
10      5      2      1       2      1


In [47]:
df = df.loc[df['apple'] >= 2][df['orange'] >= 3]
print(df)

   apple  mango  peach  orange  grape
8      6      8      4       8      8
