In [311]:
import numpy as np
import pandas as pd

from pandas import Series, DataFrame

It is possible to concatenate dataframes by calling the pd.concat() function.
When you transform your data, you convert it into the format that is required to facilitate the analysis.
- Dropping data - dropping variables or observation
- Adding data - adding variables or observation
- Sorting data

## Concatenating data

In [312]:
DF_obj = DataFrame(np.arange(36).reshape(6, 6))
DF_obj

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35


In [313]:
DF_obj_2 = DataFrame(np.arange(15).reshape(5, 3))
DF_obj_2

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11
4,12,13,14


Concatenate along the columns

In [314]:
pd.concat([DF_obj, DF_obj_2], axis=1) # Since DF_obj_2 has lower number of rows (4) with respect to DF_obj (5), rows with NaNs will be added.

Unnamed: 0,0,1,2,3,4,5,0.1,1.1,2.1
0,0,1,2,3,4,5,0.0,1.0,2.0
1,6,7,8,9,10,11,3.0,4.0,5.0
2,12,13,14,15,16,17,6.0,7.0,8.0
3,18,19,20,21,22,23,9.0,10.0,11.0
4,24,25,26,27,28,29,12.0,13.0,14.0
5,30,31,32,33,34,35,,,


Concatenate along the rows

In [315]:
pd.concat([DF_obj, DF_obj_2], axis=0) # axis = 0 is default, it can also be omitted.

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3.0,4.0,5.0
1,6,7,8,9.0,10.0,11.0
2,12,13,14,15.0,16.0,17.0
3,18,19,20,21.0,22.0,23.0
4,24,25,26,27.0,28.0,29.0
5,30,31,32,33.0,34.0,35.0
0,0,1,2,,,
1,3,4,5,,,
2,6,7,8,,,
3,9,10,11,,,


Dropping data

In [316]:
DF_obj

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35


In [317]:
DF_obj.drop([0, 2]) # Drop rows 0 and 2

Unnamed: 0,0,1,2,3,4,5
1,6,7,8,9,10,11
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35


Drop columns

In [318]:
DF_obj.drop([0, 2], axis=1) # Drop cols 0 and 2

Unnamed: 0,1,3,4,5
0,1,3,4,5
1,7,9,10,11
2,13,15,16,17
3,19,21,22,23
4,25,27,28,29
5,31,33,34,35


Adding data

In [319]:
series_obj = Series(np.arange(6))
np.arange(6), series_obj

(array([0, 1, 2, 3, 4, 5]),
 0    0
 1    1
 2    2
 3    3
 4    4
 5    5
 dtype: int64)

In [320]:
series_obj.name = "added_variable"
series_obj

0    0
1    1
2    2
3    3
4    4
5    5
Name: added_variable, dtype: int64

In [321]:
variable_added = DataFrame.join(DF_obj, series_obj)
variable_added


Unnamed: 0,0,1,2,3,4,5,added_variable
0,0,1,2,3,4,5,0
1,6,7,8,9,10,11,1
2,12,13,14,15,16,17,2
3,18,19,20,21,22,23,3
4,24,25,26,27,28,29,4
5,30,31,32,33,34,35,5


In [322]:
pd.concat([DF_obj, series_obj], axis=1) # same as join

Unnamed: 0,0,1,2,3,4,5,added_variable
0,0,1,2,3,4,5,0
1,6,7,8,9,10,11,1
2,12,13,14,15,16,17,2
3,18,19,20,21,22,23,3
4,24,25,26,27,28,29,4
5,30,31,32,33,34,35,5


In [323]:
added_datatable = pd.concat([variable_added, variable_added])
added_datatable

Unnamed: 0,0,1,2,3,4,5,added_variable
0,0,1,2,3,4,5,0
1,6,7,8,9,10,11,1
2,12,13,14,15,16,17,2
3,18,19,20,21,22,23,3
4,24,25,26,27,28,29,4
5,30,31,32,33,34,35,5
0,0,1,2,3,4,5,0
1,6,7,8,9,10,11,1
2,12,13,14,15,16,17,2
3,18,19,20,21,22,23,3


In [324]:
# if ignore_index = False the indexes of the rows are duplicated
added_datatable = pd.concat([variable_added, variable_added], ignore_index=False)
added_datatable

Unnamed: 0,0,1,2,3,4,5,added_variable
0,0,1,2,3,4,5,0
1,6,7,8,9,10,11,1
2,12,13,14,15,16,17,2
3,18,19,20,21,22,23,3
4,24,25,26,27,28,29,4
5,30,31,32,33,34,35,5
0,0,1,2,3,4,5,0
1,6,7,8,9,10,11,1
2,12,13,14,15,16,17,2
3,18,19,20,21,22,23,3


In [325]:
# if ignore_index = True the indexes of the rows are not duplicated
added_datatable = pd.concat([variable_added, variable_added], ignore_index=True)
added_datatable

Unnamed: 0,0,1,2,3,4,5,added_variable
0,0,1,2,3,4,5,0
1,6,7,8,9,10,11,1
2,12,13,14,15,16,17,2
3,18,19,20,21,22,23,3
4,24,25,26,27,28,29,4
5,30,31,32,33,34,35,5
6,0,1,2,3,4,5,0
7,6,7,8,9,10,11,1
8,12,13,14,15,16,17,2
9,18,19,20,21,22,23,3


In [326]:
# Concatenate along the columns, ignore_index = False
added_datatable = pd.concat([variable_added, variable_added], axis=1, ignore_index=False)
added_datatable

Unnamed: 0,0,1,2,3,4,5,added_variable,0.1,1.1,2.1,3.1,4.1,5.1,added_variable.1
0,0,1,2,3,4,5,0,0,1,2,3,4,5,0
1,6,7,8,9,10,11,1,6,7,8,9,10,11,1
2,12,13,14,15,16,17,2,12,13,14,15,16,17,2
3,18,19,20,21,22,23,3,18,19,20,21,22,23,3
4,24,25,26,27,28,29,4,24,25,26,27,28,29,4
5,30,31,32,33,34,35,5,30,31,32,33,34,35,5


In [327]:
# Concatenate along the columns, ignore_index = True
added_datatable = pd.concat([variable_added, variable_added], axis=1, ignore_index=True)
added_datatable

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,0,1,2,3,4,5,0,0,1,2,3,4,5,0
1,6,7,8,9,10,11,1,6,7,8,9,10,11,1
2,12,13,14,15,16,17,2,12,13,14,15,16,17,2
3,18,19,20,21,22,23,3,18,19,20,21,22,23,3
4,24,25,26,27,28,29,4,24,25,26,27,28,29,4
5,30,31,32,33,34,35,5,30,31,32,33,34,35,5


# Sorting data

In [328]:
DF_obj

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35


In [329]:
DF_sorted = DF_obj.sort_values(by=5, ascending=False) # sort the table by column in index pos = 5 in descending order
DF_sorted

Unnamed: 0,0,1,2,3,4,5
5,30,31,32,33,34,35
4,24,25,26,27,28,29
3,18,19,20,21,22,23
2,12,13,14,15,16,17
1,6,7,8,9,10,11
0,0,1,2,3,4,5


In [330]:
DF_sorted = DF_obj.sort_values(by=5, ascending=True)
DF_sorted

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35


In [331]:
DF_sorted = DF_obj.sort_values(by=1, ascending=False)
DF_sorted

Unnamed: 0,0,1,2,3,4,5
5,30,31,32,33,34,35
4,24,25,26,27,28,29
3,18,19,20,21,22,23
2,12,13,14,15,16,17
1,6,7,8,9,10,11
0,0,1,2,3,4,5


In [332]:
# Create random matrix
random_matrix = np.random.randint(0, 100, size=(4, 4))
random_matrix

array([[18, 15, 39,  2],
       [57, 84, 41, 99],
       [95, 90, 30, 17],
       [27, 26, 17, 15]])

In [333]:
# Create dataframe
DF_random = DataFrame(random_matrix)
DF_random

Unnamed: 0,0,1,2,3
0,18,15,39,2
1,57,84,41,99
2,95,90,30,17
3,27,26,17,15


In [334]:
# Sort dataframe by column 1 in ascending order
DF_random.sort_values(by=1, ascending=True)

Unnamed: 0,0,1,2,3
0,18,15,39,2
3,27,26,17,15
1,57,84,41,99
2,95,90,30,17


In [337]:
# Sort dataframe by column 0 in descending order
DF_random.sort_values(by=0, ascending=True)

Unnamed: 0,0,1,2,3
0,18,15,39,2
3,27,26,17,15
1,57,84,41,99
2,95,90,30,17


In [335]:
# Sort dataframe by column 1 in descending order
DF_random.sort_values(by=1, ascending=False)

Unnamed: 0,0,1,2,3
2,95,90,30,17
1,57,84,41,99
3,27,26,17,15
0,18,15,39,2


In [336]:
# Sort dataframe by column 2 in ascending order
DF_random.sort_values(by=2, ascending=True)

Unnamed: 0,0,1,2,3
3,27,26,17,15
2,95,90,30,17
0,18,15,39,2
1,57,84,41,99
