# Column selection, addition, deletion

You can treat a DataFrame semantically like a dict
of like-indexed Series objects.
Getting, setting, and deleting columns works with the same syntax
as the analogous dict operations.

When inserting a scalar value,
it will naturally be propagated to fill the column.

When inserting a Series
that does not have the same index as the DataFrame,
it will be conformed to the DataFrame’s index.

Columns can be deleted or popped like with a dict.

You can insert raw ndarrays
but their length must match the length of the DataFrame’s index.

By default, columns get inserted at the end.
The *insert* function is available to insert
at a particular location in the columns.

In [65]:
import random
import numpy as np
import pandas as pd

### Creation

In [96]:
# Creates a dict of strings,
# where keys is for setting of DataFrame columns
# and strings is for setting of DataFrame index.
d = {'one': 'abcdefghij',
     'two': 'abcdefghi',
     'three': 'bcdefghij',
     'four': 'bcdefghi'}

# Creates a dict of Series,
# iterating over the dict of strings.
# When Series data is a list of random integers,
# having the same length as the string passing by the dict.

# Sets integer range for randint function.
ir = (1, 9)
ds = {}
for _ in d:
# Which works like the following.
# for _ in list(d):
# or
# for _ in d.keys():
    ds[_] = pd.Series([random.randint(*ir)
                       for _ in range(len(d[_]))],
                       index=list(d[_]))

# Creates a DataFrame from the dict of Series.
df = pd.DataFrame(ds)

# Prints the DataFrame together with some its attributes.

# Sets recap string, i.e. a string to be formated.
s = '{}:\n{}\n\n{}: {}\n{}: {}\n{}: {}\n{}: {}'
# Sets recap, i.e. a tuple of objects to be substituted.
r = ('dataframe', df,
     'index', list(df.index),
     'columns', list(df.columns),
     'len', len(df),
     'shape', df.shape)
print(s.format(*r))

dataframe:
   one  two  three  four
a    7  2.0    NaN   NaN
b    8  1.0    2.0   3.0
c    7  6.0    9.0   9.0
d    3  6.0    4.0   9.0
e    1  9.0    3.0   3.0
f    2  7.0    5.0   1.0
g    9  9.0    8.0   1.0
h    5  9.0    5.0   3.0
i    8  9.0    3.0   9.0
j    2  NaN    1.0   NaN

index: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
columns: ['one', 'two', 'three', 'four']
len: 10
shape: (10, 4)


### Selection

In [97]:
# Print the DataFrame.
print(df, end='\n\n')

# Sets a key list
df_keys = list(df)
# Which looks simpler than following.
# df_keys = list(df.columns)
# or
# df_keys = list(df.keys()) 

# Selects a random key.
df_key = random.choice(df_keys)

# Selects column by the key,
# which is acuall a Series.
df_column = df[df_key]

# Print the Series.
print(df_column, end='\n\n')

# Do the same, using get method,
# but with another key.

# Updates the list of keys,
# after that selects a key from it.
df_keys.remove(df_key)
df_key = random.choice(df_keys)

# Selects column by the key,
# which is acuall a Series.
df_column = df.get(df_key)

# Print the Series.
print(df_column)

   one  two  three  four
a    7  2.0    NaN   NaN
b    8  1.0    2.0   3.0
c    7  6.0    9.0   9.0
d    3  6.0    4.0   9.0
e    1  9.0    3.0   3.0
f    2  7.0    5.0   1.0
g    9  9.0    8.0   1.0
h    5  9.0    5.0   3.0
i    8  9.0    3.0   9.0
j    2  NaN    1.0   NaN

a    NaN
b    3.0
c    9.0
d    9.0
e    3.0
f    1.0
g    1.0
h    3.0
i    9.0
j    NaN
Name: four, dtype: float64

a    7
b    8
c    7
d    3
e    1
f    2
g    9
h    5
i    8
j    2
Name: one, dtype: int64


In [98]:
# Print the DataFrame.
print(df, end='\n\n')

# Sets a ramdom list of keys.
df_keys = random.sample(population=list(df),
                        k=df.shape[1])

# Selects columns by the list of keys,
# which has a DataFrame as a result.
df_columns = df[df_keys]

# Print the DataFrame.
print(df_columns, end='\n\n')

# Do the same, using get method to prevent KeyError raising,
# when the list of keys contains nonexistent one.

# Makes the list of keys to be corrupted.
df_non_key = 'five'
df_keys.append(df_non_key)

# Selects columns by the list of keys,
# which supposes to have a DataFrame as a result.
df_columns = df.get(df_keys, default=None)

# Print the DataFrame.
print(df_columns, end='\n\n')

   one  two  three  four
a    7  2.0    NaN   NaN
b    8  1.0    2.0   3.0
c    7  6.0    9.0   9.0
d    3  6.0    4.0   9.0
e    1  9.0    3.0   3.0
f    2  7.0    5.0   1.0
g    9  9.0    8.0   1.0
h    5  9.0    5.0   3.0
i    8  9.0    3.0   9.0
j    2  NaN    1.0   NaN

   two  four  three  one
a  2.0   NaN    NaN    7
b  1.0   3.0    2.0    8
c  6.0   9.0    9.0    7
d  6.0   9.0    4.0    3
e  9.0   3.0    3.0    1
f  7.0   1.0    5.0    2
g  9.0   1.0    8.0    9
h  9.0   3.0    5.0    5
i  9.0   9.0    3.0    8
j  NaN   NaN    1.0    2

None



### Single column operations

In [101]:
# Makes some operations with single random columns,
# adding a result to the DataFrame.

# Makes a shallow copy of the DataFrame.
x = df.copy()

# Sets a list of keys.
kl = list(x)

# Sets an integer range for randint function.  
ir = 1, 9

a = '='
v = random.randint(*ir)
k = a + str(v)
x[k] = v 

a = '+'
# a = '-'
v = random.randint(*ir)
k = [random.choice(kl)]
k.append(k[0] + a + str(v))
x[k[1]] = x[k[0]] + v 
# x[k[1]] = x[k[0]] - v 

a = '*'
# a = '/'
v = random.randint(*ir)
k = [random.choice(kl)]
k.append(k[0] + a + str(v))
x[k[1]] = x[k[0]] * v 
# x[k[1]] = x[k[0]] / v 

a = '<'
# a = '>'
# a = '<='
# a = '>='
v = random.randint(*ir)
k = [random.choice(kl)]
k.append(k[0] + a + str(v))
x[k[1]] = x[k[0]] < v
# x[k[1]] = x[k[0]] > v
# x[k[1]] = x[k[0]] <= v
# x[k[1]] = x[k[0]] >= v

a = 'even'
# a = 'odd'
k = [random.choice(kl)]
k.append(k[0] + ' ' + a)
x[k[1]] = (x[k[0]] % 2 == 0)
# x[k[1]] = (x[k[0]] % 2 == 1)

# Outputs the resulting DataFrame.
x

Unnamed: 0,one,two,three,four,=3,one+8,three*1,two<1,two even
a,7,2.0,,,3,15,,False,True
b,8,1.0,2.0,3.0,3,16,2.0,False,False
c,7,6.0,9.0,9.0,3,15,9.0,False,True
d,3,6.0,4.0,9.0,3,11,4.0,False,True
e,1,9.0,3.0,3.0,3,9,3.0,False,False
f,2,7.0,5.0,1.0,3,10,5.0,False,False
g,9,9.0,8.0,1.0,3,17,8.0,False,False
h,5,9.0,5.0,3.0,3,13,5.0,False,False
i,8,9.0,3.0,9.0,3,16,3.0,False,False
j,2,,1.0,,3,10,1.0,False,False


### Two columns operations

In [103]:
# Makes some operations with two random columns,
# adding a result to the DataFrame.

# Makes a shallow copy of the DataFrame.
x = df.copy()

# Sets a list of keys.
kl = list(x)

a = '='
k = [random.choice(kl)]
k.append(a + k[0])
x[k[1]] = x[k[0]] 

a = '+'
# a = '-'
k = random.sample(kl, 2)
k.append(k[0] + a + k[1])
x[k[2]] = x[k[0]] + x[k[1]] 
# x[k[2]] = x[k[0]] - x[k[1]] 

a = '*'
# a = '/'
k = random.sample(kl, 2)
k.append(k[0] + a + k[1])
x[k[2]] = x[k[0]] * x[k[1]] 
# x[k[2]] = x[k[0]] / x[k[1]] 

a = '<'
# a = '>'
# a = '<='
# a = '>='
k = random.sample(kl, 2)
k.append(k[0] + a + k[1])
x[k[2]] = x[k[0]] < x[k[1]]
# x[k[2]] = x[k[0]] > x[k[1]]
# x[k[2]] = x[k[0]] <= x[k[1]]
# x[k[2]] = x[k[0]] >= x[k[1]]

# Outputs the resulting DataFrame.
x

Unnamed: 0,one,two,three,four,=four,one+two,three*four,four<three
a,7,2.0,,,,9.0,,False
b,8,1.0,2.0,3.0,3.0,9.0,6.0,False
c,7,6.0,9.0,9.0,9.0,13.0,81.0,False
d,3,6.0,4.0,9.0,9.0,9.0,36.0,False
e,1,9.0,3.0,3.0,3.0,10.0,9.0,False
f,2,7.0,5.0,1.0,1.0,9.0,5.0,True
g,9,9.0,8.0,1.0,1.0,18.0,8.0,True
h,5,9.0,5.0,3.0,3.0,14.0,15.0,True
i,8,9.0,3.0,9.0,9.0,17.0,27.0,False
j,2,,1.0,,,,,False


### Removing

In [107]:
# Makes a shallow copy of the DataFrame.
x = df.copy()

print(x, end='\n\n')

# Pops random column from the DataFrame.

# Sets key list.
kl = list(x)
# Selects random key.
k = random.choice(kl)
# Pops a column by the key.
y = x.pop(k)

print(y, x, sep='\n\n', end='\n\n')

# Do the same, using combination
# of recieving by the key and removing with del operator,
# but with another key.

# Updates key list.
kl.remove(k)
# Selects random key.
k = random.choice(kl)
# Recieves a column by the key.
y = x[k]
# Deletes selected column from the DataFrame.
del x[k]

print(y, x, sep='\n\n')

   one  two  three  four
a    7  2.0    NaN   NaN
b    8  1.0    2.0   3.0
c    7  6.0    9.0   9.0
d    3  6.0    4.0   9.0
e    1  9.0    3.0   3.0
f    2  7.0    5.0   1.0
g    9  9.0    8.0   1.0
h    5  9.0    5.0   3.0
i    8  9.0    3.0   9.0
j    2  NaN    1.0   NaN

a    7
b    8
c    7
d    3
e    1
f    2
g    9
h    5
i    8
j    2
Name: one, dtype: int64

   two  three  four
a  2.0    NaN   NaN
b  1.0    2.0   3.0
c  6.0    9.0   9.0
d  6.0    4.0   9.0
e  9.0    3.0   3.0
f  7.0    5.0   1.0
g  9.0    8.0   1.0
h  9.0    5.0   3.0
i  9.0    3.0   9.0
j  NaN    1.0   NaN

a    2.0
b    1.0
c    6.0
d    6.0
e    9.0
f    7.0
g    9.0
h    9.0
i    9.0
j    NaN
Name: two, dtype: float64

   three  four
a    NaN   NaN
b    2.0   3.0
c    9.0   9.0
d    4.0   9.0
e    3.0   3.0
f    5.0   1.0
g    8.0   1.0
h    5.0   3.0
i    3.0   9.0
j    1.0   NaN


### Slicing

In [105]:
ks = '{}[{}:{}]'

x = df.copy()
print(x, end='\n\n')

kl = list(x)
# kl = list(x.columns)
# kl = list(x.keys())
k = [random.choice(kl)]

i = random.sample(list(x.index), 2)
if i[0] > i[1]:
    i = i[::-1]

kr = (k[0], *i)
k.append(ks.format(*kr))

print(k[0], i, end='\n\n')

y = x[k[0]][i[0]:i[1]]
x[k[1]] = y

print(y, x, sep='\n\n', end='\n\n')


kl.remove(k[0])

k = [random.choice(kl)]

i = random.sample(range(len(x)), 2)
if i[0] > i[1]:
    i = i[::-1]

kr = (k[0], *i)
k.append(ks.format(*kr))

print(k[0], i, end='\n\n')

y = x[k[0]][i[0]:i[1]]
x[k[1]] = y

print(y, x, sep='\n\n')

   one  two  three  four
a    7  2.0    NaN   NaN
b    8  1.0    2.0   3.0
c    7  6.0    9.0   9.0
d    3  6.0    4.0   9.0
e    1  9.0    3.0   3.0
f    2  7.0    5.0   1.0
g    9  9.0    8.0   1.0
h    5  9.0    5.0   3.0
i    8  9.0    3.0   9.0
j    2  NaN    1.0   NaN

four ['g', 'j']

g    1.0
h    3.0
i    9.0
j    NaN
Name: four, dtype: float64

   one  two  three  four  four[g:j]
a    7  2.0    NaN   NaN        NaN
b    8  1.0    2.0   3.0        NaN
c    7  6.0    9.0   9.0        NaN
d    3  6.0    4.0   9.0        NaN
e    1  9.0    3.0   3.0        NaN
f    2  7.0    5.0   1.0        NaN
g    9  9.0    8.0   1.0        1.0
h    5  9.0    5.0   3.0        3.0
i    8  9.0    3.0   9.0        9.0
j    2  NaN    1.0   NaN        NaN

two [0, 4]

a    2.0
b    1.0
c    6.0
d    6.0
Name: two, dtype: float64

   one  two  three  four  four[g:j]  two[0:4]
a    7  2.0    NaN   NaN        NaN       2.0
b    8  1.0    2.0   3.0        NaN       1.0
c    7  6.0    9.0   9.0        N

### ndarray insertion

In [106]:
x = df.copy()
r = (type(x), x)
s = '{}:\n{}\n'
print(s.format(*r))

k = str(x.shape[1] + 1)

yr = 1, 9 + 1, len(x) 
y = np.random.randint(*yr) 

x[k] = y

r = ('key', k,
     type(y), y,
     type(x), x)
s = '{}: {}\n\n{}: {}\n\n{}:\n{}'
print(s.format(*r))

<class 'pandas.core.frame.DataFrame'>:
   one  two  three  four
a    7  2.0    NaN   NaN
b    8  1.0    2.0   3.0
c    7  6.0    9.0   9.0
d    3  6.0    4.0   9.0
e    1  9.0    3.0   3.0
f    2  7.0    5.0   1.0
g    9  9.0    8.0   1.0
h    5  9.0    5.0   3.0
i    8  9.0    3.0   9.0
j    2  NaN    1.0   NaN

key: 5

<class 'numpy.ndarray'>: [4 2 9 7 4 7 8 2 5 6]

<class 'pandas.core.frame.DataFrame'>:
   one  two  three  four  5
a    7  2.0    NaN   NaN  4
b    8  1.0    2.0   3.0  2
c    7  6.0    9.0   9.0  9
d    3  6.0    4.0   9.0  7
e    1  9.0    3.0   3.0  4
f    2  7.0    5.0   1.0  7
g    9  9.0    8.0   1.0  8
h    5  9.0    5.0   3.0  2
i    8  9.0    3.0   9.0  5
j    2  NaN    1.0   NaN  6
