# 1. DATAFRAME CONCATENATION

In [1]:
# Import Pandas

import pandas as pd

In [2]:
# Creating a dataframe from a dictionary
# Let's define a dataframe with a list of bank clients with IDs = 1, 2, 3, 4, 5
# Check this out: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html

raw_data = {'Bank Client ID': ['1', '2', '3', '4', '5'],
            'First Name': ['Nancy', 'Alex', 'Shep', 'Max', 'Allen'],
            'Last Name': ['Rob', 'Ali', 'George', 'Mitch', 'Steve'],
            }

raw_data

bank1_df = pd.DataFrame(raw_data, columns=['Bank Client ID', 'First Name', 'Last Name'])
bank1_df

Unnamed: 0,Bank Client ID,First Name,Last Name
0,1,Nancy,Rob
1,2,Alex,Ali
2,3,Shep,George
3,4,Max,Mitch
4,5,Allen,Steve


In [3]:
# Let's define another dataframe for a separate list of clients (IDs = 6, 7, 8, 9, 10)

raw_data = {'Bank Client ID': ['6', '7', '8', '9', '10'],
            'First Name': ['BIll', 'Dina', 'Sarah', 'Heather', 'Holly'],
            'Last Name': ['Christian', 'Mo', 'SXteve', 'Bob', 'Michelle'],
            }

bank2_df = pd.DataFrame(raw_data, columns=['Bank Client ID', 'First Name', 'Last Name'])
bank2_df

Unnamed: 0,Bank Client ID,First Name,Last Name
0,6,BIll,Christian
1,7,Dina,Mo
2,8,Sarah,SXteve
3,9,Heather,Bob
4,10,Holly,Michelle


In [4]:
# Let's concatenate both dataframes #1 and #2
# Note that we now have client IDs from 1 to 10
# Note that by default ignore_index has been set to False meaning indexes from both dataframes are kept unchanged

bank_all_df = pd.concat([bank1_df, bank2_df])

bank_all_df

Unnamed: 0,Bank Client ID,First Name,Last Name
0,1,Nancy,Rob
1,2,Alex,Ali
2,3,Shep,George
3,4,Max,Mitch
4,5,Allen,Steve
0,6,BIll,Christian
1,7,Dina,Mo
2,8,Sarah,SXteve
3,9,Heather,Bob
4,10,Holly,Michelle


In [5]:
# Let's concatenate both dataframes #1 and #2
# Note that by setting ignore_index = True, the index has been automatically set to numeric and now ranges from 1 to 9

bank_all_df = pd.concat([bank1_df, bank2_df], ignore_index=True)

bank_all_df

Unnamed: 0,Bank Client ID,First Name,Last Name
0,1,Nancy,Rob
1,2,Alex,Ali
2,3,Shep,George
3,4,Max,Mitch
4,5,Allen,Steve
5,6,BIll,Christian
6,7,Dina,Mo
7,8,Sarah,SXteve
8,9,Heather,Bob
9,10,Holly,Michelle


In [6]:
len(bank_all_df)

10

In [7]:
# You can also use the append method to perform similar task
# Note that order matters!

# bank_all_df = bank2_df.append(bank1_df, ignore_index=True)

In [8]:
# You can also use the append method to perform similar task

# bank_all_df = bank1_df.append(bank2_df, ignore_index=True)

**MINI CHALLENGE #1:**
- **Assume that you and your significant other become a new client at the bank and would like to add your first names, last names and unique client IDs. Define a new DataFrame and add it to the master list "bank_all_df"**

In [9]:
raw_data = {'Bank Client ID': ['11', '12', '13', '14', '15'],
            'First Name': ['Cho', 'Kim', 'Jung', 'Hwang', 'Cheon'],
            'Last Name': ['Changseo', 'Minhyung', 'SXteve', 'Bob', 'Michelle'],
            }

bank3_df = pd.DataFrame(raw_data, columns=['Bank Client ID', 'First Name', 'Last Name'])

bank_all_df = pd.concat([bank1_df, bank2_df, bank3_df], ignore_index=True)

bank_all_df

Unnamed: 0,Bank Client ID,First Name,Last Name
0,1,Nancy,Rob
1,2,Alex,Ali
2,3,Shep,George
3,4,Max,Mitch
4,5,Allen,Steve
5,6,BIll,Christian
6,7,Dina,Mo
7,8,Sarah,SXteve
8,9,Heather,Bob
9,10,Holly,Michelle


# 2. DATAFRAME CONCATENATION WITH MULTI-INDEXING

In [10]:
bank1_df

Unnamed: 0,Bank Client ID,First Name,Last Name
0,1,Nancy,Rob
1,2,Alex,Ali
2,3,Shep,George
3,4,Max,Mitch
4,5,Allen,Steve


In [11]:
bank2_df

Unnamed: 0,Bank Client ID,First Name,Last Name
0,6,BIll,Christian
1,7,Dina,Mo
2,8,Sarah,SXteve
3,9,Heather,Bob
4,10,Holly,Michelle


In [12]:
# We can perform concatenation and also use multi-indexing dataframe as follows:
bank_all_df = pd.concat([bank1_df, bank2_df], keys=['Customers Group 1', 'Customers Group 2'])
bank_all_df

Unnamed: 0,Unnamed: 1,Bank Client ID,First Name,Last Name
Customers Group 1,0,1,Nancy,Rob
Customers Group 1,1,2,Alex,Ali
Customers Group 1,2,3,Shep,George
Customers Group 1,3,4,Max,Mitch
Customers Group 1,4,5,Allen,Steve
Customers Group 2,0,6,BIll,Christian
Customers Group 2,1,7,Dina,Mo
Customers Group 2,2,8,Sarah,SXteve
Customers Group 2,3,9,Heather,Bob
Customers Group 2,4,10,Holly,Michelle


In [13]:
# You can access elements using multi-indexing as follows
bank_all_df.loc[('Customers Group 1'), 0]

Unnamed: 0_level_0,Customers Group 1
Unnamed: 0_level_1,0
Bank Client ID,1
First Name,Nancy
Last Name,Rob


In [14]:
# You can access elements using multi-indexing as follows
bank_all_df.loc[('Customers Group 1'), :]

Unnamed: 0,Bank Client ID,First Name,Last Name
0,1,Nancy,Rob
1,2,Alex,Ali
2,3,Shep,George
3,4,Max,Mitch
4,5,Allen,Steve


In [15]:
# You can access elements using multi-indexing as follows
bank_all_df.loc[('Customers Group 2'), 'First Name']

Unnamed: 0,First Name
0,BIll
1,Dina
2,Sarah
3,Heather
4,Holly


**MINI CHALLENGE #2:**
- **Assume that you and your significant other belong to Customers Group #3. Use multindexing to add both names to the master list. Write a line of code to access Group #3 only.**

In [16]:
raw_data = {'Bank Client ID': ['11', '12', '13', '14', '15'],
            'First Name': ['Cho', 'Kim', 'Jung', 'Hwang', 'Cheon'],
            'Last Name': ['Changseo', 'Minhyung', 'SXteve', 'Bob', 'Michelle'],
            }

bank3_df = pd.DataFrame(raw_data, columns=['Bank Client ID', 'First Name', 'Last Name'])

bank_all_df = pd.concat([bank1_df, bank2_df, bank3_df], keys=['Customors Group 1', 'Customers Group 2', 'Customers Group 3'])

bank_all_df

Unnamed: 0,Unnamed: 1,Bank Client ID,First Name,Last Name
Customors Group 1,0,1,Nancy,Rob
Customors Group 1,1,2,Alex,Ali
Customors Group 1,2,3,Shep,George
Customors Group 1,3,4,Max,Mitch
Customors Group 1,4,5,Allen,Steve
Customers Group 2,0,6,BIll,Christian
Customers Group 2,1,7,Dina,Mo
Customers Group 2,2,8,Sarah,SXteve
Customers Group 2,3,9,Heather,Bob
Customers Group 2,4,10,Holly,Michelle


In [17]:
bank_all_df.loc[('Customers Group 3'), :]

Unnamed: 0,Bank Client ID,First Name,Last Name
0,11,Cho,Changseo
1,12,Kim,Minhyung
2,13,Jung,SXteve
3,14,Hwang,Bob
4,15,Cheon,Michelle


# 3. DATA MERGING

In [18]:
# Let's concatenate both dataframes #1 and #2
# Note that we now have client IDs from 1 to 10
# Note that by default ignore_index has been set to False meaning indexes from both dataframes are kept unchanged

bank_all_df = pd.concat([bank1_df, bank2_df], ignore_index=True)
bank_all_df

Unnamed: 0,Bank Client ID,First Name,Last Name
0,1,Nancy,Rob
1,2,Alex,Ali
2,3,Shep,George
3,4,Max,Mitch
4,5,Allen,Steve
5,6,BIll,Christian
6,7,Dina,Mo
7,8,Sarah,SXteve
8,9,Heather,Bob
9,10,Holly,Michelle


In [22]:
# Let's assume we obtained additional information (Annual Salary) about our bank customers
# Note that data obtained is for all clients with IDs 1 to 10

raw_data = {'Bank Client ID': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
            'Annual Salary [$/year]': [25000, 35000, 45000, 48000, 49000, 32000, 33000, 34000, 23000, 22000]
            }


bank_salary_df = pd.DataFrame(raw_data, columns=['Bank Client ID', 'Annual Salary [$/year]'])

bank_salary_df

Unnamed: 0,Bank Client ID,Annual Salary [$/year]
0,1,25000
1,2,35000
2,3,45000
3,4,48000
4,5,49000
5,6,32000
6,7,33000
7,8,34000
8,9,23000
9,10,22000


In [23]:
# Let's merge all data on 'Bank Client ID'

bank_all_df = pd.merge(bank_all_df, bank_salary_df, on='Bank Client ID')
bank_all_df

Unnamed: 0,Bank Client ID,First Name,Last Name,Annual Salary [$/year]
0,1,Nancy,Rob,25000
1,2,Alex,Ali,35000
2,3,Shep,George,45000
3,4,Max,Mitch,48000
4,5,Allen,Steve,49000
5,6,BIll,Christian,32000
6,7,Dina,Mo,33000
7,8,Sarah,SXteve,34000
8,9,Heather,Bob,23000
9,10,Holly,Michelle,22000


**MINI CHALLENGE #3:**
- **Let's assume that you were able to obtain two new pieces of information about the bank clients such as: (1) credit card debt, (2) age**
- **Define a new DataFrame that contains this new information**
- **Merge this new information to the DataFrame "bank_all_df".**

In [26]:
raw_data = {'Bank Client ID': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
            'Credit card debt': [100, 200, 150, 400, 1000, 250, 700, 900, 530, 1100],
            'Age': [26, 25, 22, 21, 30, 33, 31, 33, 28, 29]}

new_df = pd.DataFrame(data=raw_data, columns=['Bank Client ID', 'Credit card debt', 'Age'])

new_df

Unnamed: 0,Bank Client ID,Credit card debt,Age
0,1,100,26
1,2,200,25
2,3,150,22
3,4,400,21
4,5,1000,30
5,6,250,33
6,7,700,31
7,8,900,33
8,9,530,28
9,10,1100,29


In [28]:
bank_all_df = pd.merge(bank_all_df, new_df, on='Bank Client ID')

bank_all_df

Unnamed: 0,Bank Client ID,First Name,Last Name,Annual Salary [$/year],Credit card debt,Age
0,1,Nancy,Rob,25000,100,26
1,2,Alex,Ali,35000,200,25
2,3,Shep,George,45000,150,22
3,4,Max,Mitch,48000,400,21
4,5,Allen,Steve,49000,1000,30
5,6,BIll,Christian,32000,250,33
6,7,Dina,Mo,33000,700,31
7,8,Sarah,SXteve,34000,900,33
8,9,Heather,Bob,23000,530,28
9,10,Holly,Michelle,22000,1100,29


# MINI CHALLENGES SOLUTIONS

**MINI CHALLENGE #1 SOLUTION:**
- **Assume that you and your significant other become a new client at the bank and would like to add your first names, last names and unique client IDs. Define a new DataFrame and add it to the master list "bank_all_df"**


In [None]:
new_data = {'Bank Client ID': ['11', '12'],
            'First Name': ['Justin', 'Sophie'],
            'Last Name': ['Trudeau', 'Trudeau']}

bank3_df = pd.DataFrame(new_data, columns = ['Bank Client ID', 'First Name', 'Last Name'])
bank3_df

In [None]:
# Let's concatenate both dataframes #1 and #2
# Note that by setting ignore_index = True, the index has been automatically set to numeric and now ranges from 1 to 9
bank_all_df = pd.concat([bank_all_df, bank3_df], ignore_index = True)
bank_all_df

**MINI CHALLENGE #2 SOLUTION:**
- **Assume that you and your significant other belong to Customers Group #3. Use multindexing to add both names to the master list. Write a line of code to access Group #3 only.**

In [None]:
bank1_df

In [None]:
bank2_df

In [None]:
new_data = {'Bank Client ID': ['11', '12'],
            'First Name': ['Justin', 'Sophie'],
            'Last Name': ['Trudeau', 'Trudeau']}

bank3_df = pd.DataFrame(new_data, columns = ['Bank Client ID', 'First Name', 'Last Name'])
bank3_df

In [None]:
# We can perform concatenation and also use multi-indexing dataframe as follows:
bank_all_df = pd.concat([bank1_df, bank2_df, bank3_df], keys = ["Customers Group 1", "Customers Group 2",  "Customers Group 3"])
bank_all_df

In [None]:
# You can access elements using multi-indexing as follows
bank_all_df.loc[("Customers Group 3"), :]

**MINI CHALLENGE #3 SOLUTION:**
- **Let's assume that you were able to obtain two new pieces of information about the bank clients such as: (1) credit card debt, (2) age**
- **Define a new DataFrame that contains this new information**
- **Merge this new information to the DataFrame "bank_all_df".**

In [None]:
# Let's assume we obtained additional information (Annual Salary) about our bank customers
# Note that data obtained is for all clients with IDs 1 to 10

raw_data = {
        'Bank Client ID': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
        'Credit Card Debt': [1000, 100, 500, 600, 0, 20, 360, 127, 3000, 2200],
        'Age': [44, 35, 67, 19, 22, 45, 48, 33, 34, 36]}
bank_credit_age_df = pd.DataFrame(raw_data, columns = ['Bank Client ID','Credit Card Debt', 'Age'])
bank_credit_age_df

In [None]:
# Let's merge all data on 'Bank Client ID'
bank_all_df = pd.merge(bank_all_df, bank_credit_age_df, on = 'Bank Client ID')
bank_all_df
