<a href="https://colab.research.google.com/github/anjalii-s/HR-Analytics-and-Attrition-Prediction-System/blob/main/HR_Analytics_%26_Attrition_Prediction_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## HR ANALYTICS AND ATTRITION PREDICTION



Import necessary libraries

In [23]:
!pip install pandas numpy scikit-learn matplotlib seaborn ipywidgets -q

Data Exploration Tab

In [19]:
def create_data_exploration_tab():
    """
    Create the data exploration tab
    """
    explore_button = widgets.Button(description="Explore Data", button_style='info')
    output = widgets.Output()

    def on_explore_click(b):
        with output:
            clear_output()
            try:
                # Load and display basic data info - USING YOUR FILENAME
                df = pd.read_csv('HR_IBM_dataset.csv')

                print("=== Dataset Overview ===")
                print(f"Dataset shape: {df.shape}")
                print("\nFirst 5 rows:")
                display(df.head())

                print("\n=== Basic Statistics ===")
                display(df.describe())

                print("\n=== Attrition Distribution ===")
                attrition_counts = df['Attrition'].value_counts()
                print(attrition_counts)

                # Simple visualization
                plt.figure(figsize=(12, 5))

                plt.subplot(1, 2, 1)
                df['Attrition'].value_counts().plot(kind='bar', color=['skyblue', 'salmon'])
                plt.title('Attrition Distribution')
                plt.xlabel('Attrition')
                plt.ylabel('Count')
                plt.xticks(rotation=0)

                plt.subplot(1, 2, 2)
                df['Age'].hist(bins=20, color='lightgreen', edgecolor='black')
                plt.title('Age Distribution')
                plt.xlabel('Age')
                plt.ylabel('Frequency')

                plt.tight_layout()
                plt.show()

                print("\n‚úÖ Data exploration completed successfully!")

            except Exception as e:
                print(f"‚ùå Error loading data: {e}")
                print("Please make sure 'HR_IBM_dataset.csv' exists in your current directory.")

    explore_button.on_click(on_explore_click)

    return widgets.VBox([
        widgets.HTML("<h3>üìä Data Exploration</h3>"),
        widgets.HTML("<p>Click below to explore the HR dataset:</p>"),
        explore_button,
        output
    ])

Model Training Tab

In [20]:
def create_model_training_tab():
    """
    Create the model training tab
    """
    train_button = widgets.Button(description="Train Model", button_style='success')
    output = widgets.Output()

    def on_train_click(b):
        with output:
            clear_output()
            try:
                # USING YOUR FILENAME
                df = pd.read_csv('HR_IBM_dataset.csv')

                # Convert Attrition to binary (Yes=1, No=0)
                df['Attrition'] = (df['Attrition'] == 'Yes').astype(int)

                # Select relevant features for prediction
                features = ['Age', 'MonthlyIncome', 'Department', 'JobRole',
                           'OverTime', 'JobSatisfaction', 'YearsAtCompany', 'Attrition']

                # Check which features actually exist in your dataset
                available_features = [col for col in features if col in df.columns]
                missing_features = [col for col in features if col not in df.columns]

                print(f"‚úÖ Available features: {available_features}")
                if missing_features:
                    print(f"‚ö†Ô∏è Missing features: {missing_features}")

                if all(col in df.columns for col in features):
                    df_subset = df[features].copy()
                    print("üîÑ Training model... Please wait.")
                    result = train_model(df_subset)
                    print(result)

                    if current_model is not None:
                        print("\n‚úÖ Model trained successfully! You can now use the Prediction tab.")
                        print(f"üìù Features used: {', '.join(features[:-1])}")
                else:
                    print("‚ùå Some required columns not found in dataset.")
                    print(f"üìã All available columns: {list(df.columns)}")

            except Exception as e:
                print(f"‚ùå Error: {e}")
                print("Please make sure the dataset file exists and is accessible.")

    train_button.on_click(on_train_click)

    return widgets.VBox([
        widgets.HTML("<h3>ü§ñ Model Training</h3>"),
        widgets.HTML("<p>Click below to train the attrition prediction model:</p>"),
        train_button,
        output
    ])

Check Your Dataset Structure

In [21]:

print("üîç Checking your dataset structure...")

try:
    df = pd.read_csv('HR_IBM_dataset.csv')
    print(f"‚úÖ Dataset loaded successfully: {df.shape}")
    print("\nüìä First 3 rows:")
    display(df.head(3))

    print("\nüéØ Key columns check:")
    important_cols = ['Attrition', 'Age', 'MonthlyIncome', 'Department', 'JobRole', 'OverTime']
    for col in important_cols:
        if col in df.columns:
            print(f"‚úÖ '{col}' - Found")
        else:
            print(f"‚ùå '{col}' - Missing")

    print(f"\nüìã All columns ({len(df.columns)} total):")
    for col in df.columns:
        print(f"  - {col}")

except Exception as e:
    print(f"‚ùå Error: {e}")

üîç Checking your dataset structure...
‚úÖ Dataset loaded successfully: (1470, 35)

üìä First 3 rows:


Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,2,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,279,Research & Development,8,1,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,1373,Research & Development,2,2,Other,1,4,...,2,80,0,7,3,3,0,0,0,0



üéØ Key columns check:
‚úÖ 'Attrition' - Found
‚úÖ 'Age' - Found
‚úÖ 'MonthlyIncome' - Found
‚úÖ 'Department' - Found
‚úÖ 'JobRole' - Found
‚úÖ 'OverTime' - Found

üìã All columns (35 total):
  - Age
  - Attrition
  - BusinessTravel
  - DailyRate
  - Department
  - DistanceFromHome
  - Education
  - EducationField
  - EmployeeCount
  - EmployeeNumber
  - EnvironmentSatisfaction
  - Gender
  - HourlyRate
  - JobInvolvement
  - JobLevel
  - JobRole
  - JobSatisfaction
  - MaritalStatus
  - MonthlyIncome
  - MonthlyRate
  - NumCompaniesWorked
  - Over18
  - OverTime
  - PercentSalaryHike
  - PerformanceRating
  - RelationshipSatisfaction
  - StandardHours
  - StockOptionLevel
  - TotalWorkingYears
  - TrainingTimesLastYear
  - WorkLifeBalance
  - YearsAtCompany
  - YearsInCurrentRole
  - YearsSinceLastPromotion
  - YearsWithCurrManager


Application launch

In [22]:
#  UI with fixed filenames created
print("üîÑ Launching fixed HR Analytics System...")
print(f"üìÅ Using dataset: HR_IBM_dataset.csv")

# Clear previous UI
clear_output()

# Create new UI with fixed functions
hr_ui_fixed = create_hr_analytics_ui()

print("=" * 70)
print("=== HR Analytics & Attrition Prediction System ===")
print(" Using HR_IBM_dataset.csv ")
print("=" * 70)

display(hr_ui_fixed)

=== HR Analytics & Attrition Prediction System ===
 Using HR_IBM_dataset.csv 


Tab(children=(VBox(children=(HTML(value='<h3>üìä Data Exploration</h3>'), HTML(value='<p>Click below to explore ‚Ä¶