# Chapter 2 - Data Preparation Basics
## Segment 4 - Concatenating and transforming data

In [6]:
%config IPCompleter.greedy=True

In [1]:
import numpy as np
import pandas as pd

# from pandas import Series, DataFrame

In [4]:
DF_obj = pd.DataFrame(np.arange(36).reshape(6,6))
DF_obj

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35


In [15]:
series_obj = pd.Series(np.arange(6))
series_obj.name = 'added_col'
series_obj

0    0
1    1
2    2
3    3
4    4
5    5
Name: added_col, dtype: int64

### Concatenating data

In [16]:
pd.concat([DF_obj, series_obj], axis = 1)

Unnamed: 0,0,1,2,3,4,5,added_col
0,0,1,2,3,4,5,0
1,6,7,8,9,10,11,1
2,12,13,14,15,16,17,2
3,18,19,20,21,22,23,3
4,24,25,26,27,28,29,4
5,30,31,32,33,34,35,5


In [17]:
pd.concat([DF_obj, series_obj], axis = 0)

Unnamed: 0,0,1,2,3,4,5
0,0,1.0,2.0,3.0,4.0,5.0
1,6,7.0,8.0,9.0,10.0,11.0
2,12,13.0,14.0,15.0,16.0,17.0
3,18,19.0,20.0,21.0,22.0,23.0
4,24,25.0,26.0,27.0,28.0,29.0
5,30,31.0,32.0,33.0,34.0,35.0
0,0,,,,,
1,1,,,,,
2,2,,,,,
3,3,,,,,


### Transforming data
#### Dropping data

In [22]:
DF_obj.drop(labels = [1,2], axis = 1)

Unnamed: 0,0,3,4,5
0,0,3,4,5
1,6,9,10,11
2,12,15,16,17
3,18,21,22,23
4,24,27,28,29
5,30,33,34,35


In [24]:
DF_obj.drop(labels = [1,2])

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35


### Adding data

In [30]:
pd.DataFrame.join(DF_obj, series_obj)

Unnamed: 0,0,1,2,3,4,5,added_col
0,0,1,2,3,4,5,0
1,6,7,8,9,10,11,1
2,12,13,14,15,16,17,2
3,18,19,20,21,22,23,3
4,24,25,26,27,28,29,4
5,30,31,32,33,34,35,5


In [31]:
DF_obj.append(series_obj)

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35
added_col,0,1,2,3,4,5


In [33]:
DF_obj.append(series_obj, ignore_index=True)

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35
6,0,1,2,3,4,5


In [34]:
DF_obj.append(series_obj, ignore_index=False)

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35
added_col,0,1,2,3,4,5


### Sorting data

In [45]:
DF_obj_2 = pd.DataFrame((np.round(np.random.rand(36)*100, decimals = 0)).reshape(6,6))
DF_obj_2

Unnamed: 0,0,1,2,3,4,5
0,94.0,97.0,85.0,28.0,16.0,24.0
1,16.0,19.0,28.0,8.0,61.0,17.0
2,10.0,95.0,75.0,15.0,7.0,36.0
3,37.0,23.0,35.0,21.0,95.0,50.0
4,63.0,64.0,48.0,72.0,99.0,50.0
5,97.0,62.0,14.0,74.0,1.0,58.0


In [55]:
DF_obj_2.sort_values(by = [5], ascending = False)

Unnamed: 0,0,1,2,3,4,5
5,97.0,62.0,14.0,74.0,1.0,58.0
3,37.0,23.0,35.0,21.0,95.0,50.0
4,63.0,64.0,48.0,72.0,99.0,50.0
2,10.0,95.0,75.0,15.0,7.0,36.0
0,94.0,97.0,85.0,28.0,16.0,24.0
1,16.0,19.0,28.0,8.0,61.0,17.0


In [53]:
DF_obj_2.sort_index(axis = 1, ascending = False)

Unnamed: 0,5,4,3,2,1,0
0,24.0,16.0,28.0,85.0,97.0,94.0
1,17.0,61.0,8.0,28.0,19.0,16.0
2,36.0,7.0,15.0,75.0,95.0,10.0
3,50.0,95.0,21.0,35.0,23.0,37.0
4,50.0,99.0,72.0,48.0,64.0,63.0
5,58.0,1.0,74.0,14.0,62.0,97.0
