In [1]:
import pandas as pd
import numpy as np

try:
    employee_rating = pd.read_csv('employee_rating.csv', sep='|')
    employee_general = pd.read_csv('employee_general.csv', sep='|')
except FileNotFoundError as e:
    print(f"File not found: {e}")
    exit()

print("Employee Rating Data Types:")
print(employee_rating.dtypes)

print("\nEmployee General Data Types:")
print(employee_general.dtypes)

print("\nEmployee Rating Unique Values:")
for col in employee_rating.select_dtypes(include=['object']).columns:
    print(f"{col}: {employee_rating[col].unique()}")

print("\nEmployee General Unique Values:")
for col in employee_general.select_dtypes(include=['object']).columns:
    print(f"{col}: {employee_general[col].unique()}")

if 'MonthlyIncome' in employee_rating.columns:
    employee_rating['log_income'] = np.log(employee_rating['MonthlyIncome'] + 1)
else:
    print("Column 'MonthlyIncome' not found in employee_rating DataFrame.")

def map_categories(df, column, mapping):
    if column in df.columns:
        if df[column].dtype == 'object':
            df[column] = df[column].map(mapping).astype(pd.Int64Dtype())
        else:
            print(f"Column '{column}' is not of type 'object'.")
    else:
        print(f"Column '{column}' not found in DataFrame.")

mapping_job_role = {'Manager': 1, 'Engineer': 2, 'Analyst': 3}
map_categories(employee_rating, 'JobRole', mapping_job_role)

mapping_department = {'Sales': 1, 'HR': 2, 'IT': 3}
map_categories(employee_general, 'Department', mapping_department)

print("\nEmployee Rating Columns:")
print(employee_rating.columns)
print("\nEmployee General Columns:")
print(employee_general.columns)

if 'Employee ID' in employee_rating.columns and 'EmployeeNumber' in employee_general.columns:
    print("\nSample data for 'Employee ID':")
    print(employee_rating['Employee ID'].head())

    print("\nSample data for 'EmployeeNumber':")
    print(employee_general['EmployeeNumber'].head())

    merged_df = pd.merge(employee_rating, employee_general, left_on='Employee ID', right_on='EmployeeNumber', how='inner')

    print("\nMerged DataFrame:")
    print(merged_df.head())
else:
    print("Columns 'Employee ID' or 'EmployeeNumber' not found. Please check the column names.")

Employee Rating Data Types:
EmployeeNumber,Department,EnvironmentSatisfaction,JobRole,PerformanceRating,YearsAtCompany,YearsInCurrentRole,MonthlyIncome    object
dtype: object

Employee General Data Types:
EmployeeNumber     int64
Education         object
EducationField    object
MaritalStatus     object
dtype: object

Employee Rating Unique Values:
EmployeeNumber,Department,EnvironmentSatisfaction,JobRole,PerformanceRating,YearsAtCompany,YearsInCurrentRole,MonthlyIncome: ['1,Sales,Medium,Sales Executive,Excellent,6,4,5993'
 '2,Research & Development,High,Research Scientist,Outstanding,10,7,5130'
 '4,Research & Development,Very High,Laboratory Technician,Excellent,0,0,2090'
 ...
 '2064,Research & Development,Medium,Manufacturing Director,Outstanding,6,2,6142'
 '2065,Sales,Very High,Sales Executive,Excellent,9,6,5390'
 '2068,Research & Development,Medium,Laboratory Technician,Excellent,4,3,4404']

Employee General Unique Values:
Education: ['College' 'Below College' 'Master' 'Bachelor' 