# Pandas II
## Handling Missing Data and Data Transformation

### 1. Create a new column called **professor_initials** that stores the initials of each professor's first and last names. Use the following data:


In [1]:
import pandas as pd

data = {
    'professor': ['Ludmila Kuncheva', 'Antonio Torralba', 'Manuel Gonzalez', 'Bastian Leibe'],
    'department': ['Computer Science', 'Computer Vision', 'AI & Robotics', 'Autonomous Systems'],
    'age': [45, 50, 47, 38]
}

df = pd.DataFrame(data)

In [2]:
# Create 'professor_initials' column based on the 'professor' column
df['professor_initials'] = df['professor'].apply(
    lambda x: ''.join([name[0] for name in x.split()])
)

# Display
print("DataFrame with 'professor_initials' Column:")
df

DataFrame with 'professor_initials' Column:


Unnamed: 0,professor,department,age,professor_initials
0,Ludmila Kuncheva,Computer Science,45,LK
1,Antonio Torralba,Computer Vision,50,AT
2,Manuel Gonzalez,AI & Robotics,47,MG
3,Bastian Leibe,Autonomous Systems,38,BL


### 2. Given the dataframe below. Use **join** to combine this new DataFrame with the original one based on the professor column.

In [3]:
courses_data = {
    'professor': ['Ludmila Kuncheva', 'Antonio Torralba', 'Manuel Gonzalez', 'Bastian Leibe'],
    'courses': ['Machine Learning', 'Computer Vision', 'AI Programming', 'Self-Driving Cars']
}
df_courses = pd.DataFrame(courses_data)

In [4]:
# Set the index to 'professor' for both DataFrames and join them
df_combined = df.set_index('professor').join(df_courses.set_index('professor'))

# Reset the index for the combined DataFrame
df_combined.reset_index(inplace=True)

# Display
print("Combined DataFrame:")
df_combined

Combined DataFrame:


Unnamed: 0,professor,department,age,professor_initials,courses
0,Ludmila Kuncheva,Computer Science,45,LK,Machine Learning
1,Antonio Torralba,Computer Vision,50,AT,Computer Vision
2,Manuel Gonzalez,AI & Robotics,47,MG,AI Programming
3,Bastian Leibe,Autonomous Systems,38,BL,Self-Driving Cars


### 3. Combine the original df and df_courses DataFrames.

In [5]:
data = {
    'professor': ['Ludmila Kuncheva', 'Antonio Torralba', 'Manuel Gonzalez', 'Bastian Leibe'],
    'department': ['Computer Science', 'Computer Vision', 'AI & Robotics', 'Autonomous Systems'],
    'age': [45, 50, 47, 38]
}

df = pd.DataFrame(data)

courses_data = {
    'professor': ['Ludmila Kuncheva', 'Antonio Torralba', 'Manuel Gonzalez', 'Bastian Leibe'],
    'courses': ['Machine Learning', 'Computer Vision', 'AI Programming', 'Self-Driving Cars']
}
df_courses = pd.DataFrame(courses_data)

In [6]:
# Merge the two DataFrames on the 'professor' column
df_combined = pd.merge(df, df_courses, on='professor')

# Display
print("Combined DataFrame (Merged on 'professor'):")
df_combined

Combined DataFrame (Merged on 'professor'):


Unnamed: 0,professor,department,age,courses
0,Ludmila Kuncheva,Computer Science,45,Machine Learning
1,Antonio Torralba,Computer Vision,50,Computer Vision
2,Manuel Gonzalez,AI & Robotics,47,AI Programming
3,Bastian Leibe,Autonomous Systems,38,Self-Driving Cars


### 4. In the professor column, create a new column professor_last_name by extracting the last name of each professor using string operations.

In [7]:
# Add a new column with the last name of the professor
df_combined['professor_last_name'] = df_combined['professor'].str.split().str[-1]

# Display
df_combined

Unnamed: 0,professor,department,age,courses,professor_last_name
0,Ludmila Kuncheva,Computer Science,45,Machine Learning,Kuncheva
1,Antonio Torralba,Computer Vision,50,Computer Vision,Torralba
2,Manuel Gonzalez,AI & Robotics,47,AI Programming,Gonzalez
3,Bastian Leibe,Autonomous Systems,38,Self-Driving Cars,Leibe
