In [1]:
import numpy as np
import pandas as pd

# Working with Multiple Dataframes and Transforming

In [2]:
# Sample Dataframe 1.
data1 = [[500, 24],[1000,54], [1500,56], [2000,45]]
df_1_sample = pd.DataFrame(data = data1)
df_1_sample
# Note that if we don't specify column names it assigns an integers 0 and 1 (i.e., uses the column index for naming).

Unnamed: 0,0,1
0,500,24
1,1000,54
2,1500,56
3,2000,45


In [3]:
# Sample Dataframe 2.
data2 = [[500, 23],[1000,34], [1500,54], [2000,32]]
df_2_sample = pd.DataFrame(data2)
df_2_sample

Unnamed: 0,0,1
0,500,23
1,1000,34
2,1500,54
3,2000,32


In [4]:
# I can create a list of the dataframes.
df_list = [df_1_sample, df_2_sample]
df_list

[      0   1
 0   500  24
 1  1000  54
 2  1500  56
 3  2000  45,
       0   1
 0   500  23
 1  1000  34
 2  1500  54
 3  2000  32]

In [5]:
df_list[0] # I can call the data from a dataframe in a list of dataframes.

Unnamed: 0,0,1
0,500,24
1,1000,54
2,1500,56
3,2000,45


In [6]:
# I can create a derived feature by combining data from one column and strings.
df_1_sample['col'] = 'test'+df_1_sample[0].astype(str).str[:2]
df_1_sample

Unnamed: 0,0,1,col
0,500,24,test50
1,1000,54,test10
2,1500,56,test15
3,2000,45,test20


In [7]:
# I can drop a column and reorder by selecting specific columns in the order I want and redefining the dataframe variable.
# Could also use the df.drop(columns=['col_to_drop']) 
df_1_sample = df_1_sample[['col', 1]]
df_1_sample

Unnamed: 0,col,1
0,test50,24
1,test10,54
2,test15,56
3,test20,45


In [8]:
# I can extract the column values as an array.
df_1_sample['col'].values

array(['test50', 'test10', 'test15', 'test20'], dtype=object)

In [9]:
# I can extract the column values to a list.
col_one_list = df_1_sample['col'].tolist()
print(col_one_list)
# I can extract the column values to a numpy array.
col_one_arr = df_1_sample['col'].to_numpy()
print(col_one_arr)

['test50', 'test10', 'test15', 'test20']
['test50' 'test10' 'test15' 'test20']


In [10]:
# I can add new rows by concatenating two dataframes with the same column names.
new_data_frame = pd.DataFrame({'col': ['Hello', 'World'],
                               1: [1, 3]
                              })
df_1_sample = pd.concat([new_data_frame, df_1_sample], ignore_index = True)
df_1_sample

Unnamed: 0,col,1
0,Hello,1
1,World,3
2,test50,24
3,test10,54
4,test15,56
5,test20,45


In [11]:
df_1_sample.T # Transposing dataframe

Unnamed: 0,0,1,2,3,4,5
col,Hello,World,test50,test10,test15,test20
1,1,3,24,54,56,45


In [12]:
# I can resent index from the transposed dataframe
df_1_sample = df_1_sample.T.reset_index(drop = True)
df_1_sample

Unnamed: 0,0,1,2,3,4,5
0,Hello,World,test50,test10,test15,test20
1,1,3,24,54,56,45


In [13]:
# I can rename columns using data from a row.
df_1_sample.rename(columns=df_1_sample.iloc[0], inplace = True) 
df_1_sample

Unnamed: 0,Hello,World,test50,test10,test15,test20
0,Hello,World,test50,test10,test15,test20
1,1,3,24,54,56,45


In [14]:
# I can drop a row by selecting the other rows.
df_1_sample = df_1_sample.iloc[1:].reset_index(drop = True)
df_1_sample

Unnamed: 0,Hello,World,test50,test10,test15,test20
0,1,3,24,54,56,45


# Workign with Column Names in Dataframes

In [15]:
# Empty dataframe with specific columns.
data_frame_cols = pd.DataFrame(columns = ['Val1', 'Val2', *['VHS' + str(i) for i in range(5, 8, 1)]])
data_frame_cols

Unnamed: 0,Val1,Val2,VHS5,VHS6,VHS7


In [16]:
# Extracting column values from a dataframe.
data_frame_cols.columns.values

array(['Val1', 'Val2', 'VHS5', 'VHS6', 'VHS7'], dtype=object)

In [17]:
# Extracting column values or feature names.
feature_names = data_frame_cols.columns.values

In [18]:
# Defining data values.
data_values = ['Hello', 'World', 1, 3, 4]

In [19]:
# Creating a dataframe from a list of values 
data_frame = pd.DataFrame(data = [data_values], columns = feature_names)
data_frame

Unnamed: 0,Val1,Val2,VHS5,VHS6,VHS7
0,Hello,World,1,3,4


# Working with Strings and Lists

In [20]:
# Defining a list of strings.
df_names = ['df_1_sample33', 'df_2_sample33']
df_names

['df_1_sample33', 'df_2_sample33']

In [21]:
df_names[1] # Calling element in position 1 from a list of strings.

'df_2_sample33'

In [26]:
# Splitting an element from a list into another list using a delimiter.
df_names_sections = df_names[0].split("_")
df_names_sections

['df', '1', 'sample33']

In [23]:
df_names_sections[1] # Calling a element of a list of strings.

'1'

In [24]:
df_names_sections[2][2:4] # Selecting partial string within element 2.

'mp'

In [25]:
df_names_sections[2][-2:] # Selecting the last two characters from element 2.

'33'

# Notebook End