## 2.4 行や列の追加と削除

In [1]:
import pandas as pd
import numpy as np

df = pd.DataFrame(
    {
        'A': [1, 2, 3, 4],
        'B': [5, 6, 7, 8],
        'C': [9, 10, 11, 12]
    },
    index=['a', 'b', 'c', 'd']
)

df

Unnamed: 0,A,B,C
a,1,5,9
b,2,6,10
c,3,7,11
d,4,8,12


In [2]:
df['D'] = [13, 14, 15, 16]

df

Unnamed: 0,A,B,C,D
a,1,5,9,13
b,2,6,10,14
c,3,7,11,15
d,4,8,12,16


In [3]:
df = df.assign(E=[17, 18, 19, 20])

df

Unnamed: 0,A,B,C,D,E
a,1,5,9,13,17
b,2,6,10,14,18
c,3,7,11,15,19
d,4,8,12,16,20


In [4]:
df.insert(2, 'X', [21, 22, 23, 24])

df

Unnamed: 0,A,B,X,C,D,E
a,1,5,21,9,13,17
b,2,6,22,10,14,18
c,3,7,23,11,15,19
d,4,8,24,12,16,20


In [5]:
df.loc['e'] = [25, 26, 27, 28, 29, 30]

df

Unnamed: 0,A,B,X,C,D,E
a,1,5,21,9,13,17
b,2,6,22,10,14,18
c,3,7,23,11,15,19
d,4,8,24,12,16,20
e,25,26,27,28,29,30


In [6]:
df = df.drop('c')

df

Unnamed: 0,A,B,X,C,D,E
a,1,5,21,9,13,17
b,2,6,22,10,14,18
d,4,8,24,12,16,20
e,25,26,27,28,29,30


In [7]:
df = df.drop(df.index[[1, 3]])

df

Unnamed: 0,A,B,X,C,D,E
a,1,5,21,9,13,17
d,4,8,24,12,16,20


In [8]:
df = df.drop(['B', 'C'], axis=1)

df

Unnamed: 0,A,X,D,E
a,1,21,13,17
d,4,24,16,20


In [9]:
df = df.drop(df. columns[2], axis=1)

df

Unnamed: 0,A,X,E
a,1,21,17
d,4,24,20


In [10]:
def make_dummy_value(size=None):
    if size is None:
        val1 = np.random.rand()
        val2 = np.random.rand()
        val3 = np.random.rand()
    else:
        val1 = np.random.rand(size)
        val2 = np.random.rand(size)
        val3 = np.random.rand(size)
    return val1, val2, val3

make_dummy_value()

(0.4676436268312316, 0.13280676804690272, 0.1074747813334812)

In [11]:
make_dummy_value(4)

(array([0.87709289, 0.1192485 , 0.23785832, 0.54096733]),
 array([6.18569187e-01, 2.77485377e-01, 6.24345444e-04, 7.35191487e-01]),
 array([0.51256914, 0.91514491, 0.08918638, 0.2600772 ]))

In [12]:
%%timeit

df = pd.DataFrame({
    'A': [0.],
    'B': [0.],
    'C': [0.]
})

for i in range(10000):
    val1, val2, val3  = make_dummy_value()
    record = pd.DataFrame({'A': [val1], 'B': [val2], 'C': [val3]})
    df = pd.concat([df, record], axis=0)

3.37 s ± 48.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
%%timeit

df = pd.DataFrame({
    'A': [0.],
    'B': [0.],
    'C': [0.]
})

val1_list = []
val2_list = []
val3_list = []
for i in range(10000):
    val1, val2, val3 = make_dummy_value()
    val1_list.append(val1)
    val2_list.append(val2)
    val3_list.append(val3)

df_new = pd.DataFrame({
    'A': val1_list,
    'B': val2_list,
    'C': val3_list
})

df = pd.concat([df, df_new], axis=0)

13.1 ms ± 315 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [14]:
%%timeit

df = pd.DataFrame({
    'A': [0.],
    'B': [0.],
    'C': [0.]
})


val1, val2, val3 = make_dummy_value(10000)
df_new = pd.DataFrame({
    'A': val1,
    'B': val2,
    'C': val3,
})

df = pd.concat([df, df_new], axis=0)

617 µs ± 7.48 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
