# **HR Analytics Analysis**

In [363]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import iplot
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.cluster import KMeans
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np

In [364]:
df = pd.read_csv('HR_Analytics.csv')
df.head()

Unnamed: 0,EmpID,Age,AgeGroup,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,RM297,18,18-25,Yes,Travel_Rarely,230,Research & Development,3,3,Life Sciences,...,3,80,0,0,2,3,0,0,0,0.0
1,RM302,18,18-25,No,Travel_Rarely,812,Sales,10,3,Medical,...,1,80,0,0,2,3,0,0,0,0.0
2,RM458,18,18-25,Yes,Travel_Frequently,1306,Sales,5,3,Marketing,...,4,80,0,0,3,3,0,0,0,0.0
3,RM728,18,18-25,No,Non-Travel,287,Research & Development,5,2,Life Sciences,...,4,80,0,0,2,3,0,0,0,0.0
4,RM829,18,18-25,Yes,Non-Travel,247,Research & Development,8,1,Medical,...,4,80,0,0,0,3,0,0,0,0.0


In [365]:
df.columns

Index(['EmpID', 'Age', 'AgeGroup', 'Attrition', 'BusinessTravel', 'DailyRate',
       'Department', 'DistanceFromHome', 'Education', 'EducationField',
       'EmployeeCount', 'EmployeeNumber', 'EnvironmentSatisfaction', 'Gender',
       'HourlyRate', 'JobInvolvement', 'JobLevel', 'JobRole',
       'JobSatisfaction', 'MaritalStatus', 'MonthlyIncome', 'SalarySlab',
       'MonthlyRate', 'NumCompaniesWorked', 'Over18', 'OverTime',
       'PercentSalaryHike', 'PerformanceRating', 'RelationshipSatisfaction',
       'StandardHours', 'StockOptionLevel', 'TotalWorkingYears',
       'TrainingTimesLastYear', 'WorkLifeBalance', 'YearsAtCompany',
       'YearsInCurrentRole', 'YearsSinceLastPromotion',
       'YearsWithCurrManager'],
      dtype='object')

In [366]:
null_counts = df.isnull().sum()

columns_with_null = null_counts[null_counts > 0]        

print("Columns with null values and their counts:")
print(columns_with_null)

Columns with null values and their counts:
YearsWithCurrManager    57
dtype: int64


# **Attrition rate among departments of different ages**

In [367]:
fig = go.Figure(data=[go.Scatter3d(
    x=df['Age'],
    y=df['Department'],
    z=df['Attrition'],
    mode='markers',
    hovertemplate='<b>Age</b>: %{x} <br> <b>Department</b>: %{y} <br> <b>Attrition</b>: %{z}',
    marker=dict(
        size=12,
        color=df['Age'],
        colorscale='Viridis',
        opacity=0.8
    )
)])

fig.update_layout(
    title={'text': 'Attrition Rate among departments of different ages', 'font_size': 24, 'font_family': 'Comic Sans MS'},
    scene=dict(
        xaxis=dict(title={'text': 'Age', 'font_size': 18, 'font_family': 'Courier New'}),
        yaxis=dict(title={'text': 'Department', 'font_size': 18, 'font_family': 'Courier New'}),
        zaxis=dict(title={'text': 'Attrition', 'font_size': 18, 'font_family': 'Courier New'}),
        xaxis_tickfont=dict(color='#663300'),
        yaxis_tickfont=dict(color='#663300'),
        zaxis_tickfont=dict(color='#663300'),
        xaxis_showgrid=False,
        yaxis_showgrid=False,
        zaxis_showgrid=False,
    ),
    plot_bgcolor='#F2F2F2',
    paper_bgcolor='#F2F2F2'
)

fig.show()

# **Travel Frequency with Attrition**

In [368]:
fig = go.Figure(
    go.Bar(
        x=df['BusinessTravel'],
        y=df['Attrition'].map({'Yes': 1, 'No': 0}),
        marker_color=['#00698F' if x == 'Travel_Rarely' 
                    else '#FF8C00' if x == 'Travel_Frequently' 
                    else '#8B0A1A' for x in df['BusinessTravel']],
        hovertemplate='<b>Business Travel</b>: %{x} <br><b>Attrition</b>: %{y}'
    )
)

fig.update_layout(
    title={'text': 'Business Travel Frequency with Attrition', 'font_size': 24, 'font_family': 'Comic Sans MS', 'font_color': '#454545'},
    xaxis_title={'text': 'Business Travel', 'font_size': 18, 'font_family': 'Courier New', 'font_color': '#454545'},
    yaxis_title={'text': 'Attrition Rate', 'font_size': 18, 'font_family': 'Lucida Console', 'font_color': '#454545'},
    xaxis_tickfont=dict(color='#663300'),
    yaxis_tickfont=dict(color='#663300'),
    plot_bgcolor='#F2F2F2',
    paper_bgcolor='#F2F2F2'
)
fig.update_layout(
    plot_bgcolor='#F2F2F2',
    paper_bgcolor='#F2F2F2'
)

fig.show()

## **Revenue Loss and Attrition Rate of each Department**

In [369]:
df["RevenueLoss"] = df["MonthlyIncome"] * 12 * df['YearsAtCompany'].mean()

revenue_loss_by_dept = df.groupby("Department")["RevenueLoss"].sum()

attrition_rate_by_dept = df.groupby("Department")["Attrition"].apply(lambda x: (x == "Yes").mean())

fig = go.Figure(
    go.Bar(
        x=revenue_loss_by_dept.index,
        y=revenue_loss_by_dept.values,
        hovertext=attrition_rate_by_dept.values,
        hoverinfo="text+y",
        hovertemplate="<b>Department: %{x}</b><br><b>Attrition Rate:</b> %{hovertext:.2f}<br><b>Revenue Loss:</b> %{y:.2f}",
        marker_color=revenue_loss_by_dept.index.map(lambda x: "#8B9467" if x == "Sales" else "#473C64" if x == "Research & Development" else "#6A5ACD")
))

fig.update_layout(
    yaxis2=dict(
        overlaying="y",
        side="right",
        range=[0, 1]
    )
)

fig.update_layout(
    title={"text": "Revenue Loss and Attrition Rate of each Department", "font_size": 24, "font_family": "TimesNewRoman"},
    xaxis_title={"text": "Department", "font_size": 18, "font_family": "TimesNewRoman"},
    yaxis_title={"text": "Revenue Loss", "font_size": 18, "font_family": "TimesNewRoman"},
    yaxis2_title={"text": "Attrition Rate", "font_size": 18, "font_family": "TimesNewRoman"},
    font_family="TimesNewRoman",
    font_size=16
)
fig.show()

## **Predicting employee attrition based on job satisfaction, job involvement, and performance ratings**

In [370]:
le = LabelEncoder()
df['Attrition'] = le.fit_transform(df['Attrition'])

In [371]:
X = df[['JobSatisfaction', 'JobInvolvement', 'PerformanceRating']]
y = df['Attrition']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [372]:
model = LinearRegression()
model.fit(X_train, y_train)

In [373]:
fig = px.scatter(df, x='JobSatisfaction', y='Attrition', trendline='ols')

fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=list([
                dict(label='Job Involvement',
                    method='update',
                    args=[{'x': 'JobInvolvement', 'y': 'Attrition'}]),
                dict(label='Performance Rating',
                    method='update',
                    args=[{'x': 'PerformanceRating', 'y': 'Attrition'}]),
                dict(label='Job Satisfaction',
                    method='update',
                    args=[{'x': 'JobSatisfaction', 'y': 'Attrition'}])
            ])
        )
    ]
)
fig.update_layout(
    title={
        'text': 'Employee Attrition Prediction',
        'font_size': 24,
        'font_family': 'Arial Black',
    },
    xaxis_title={
        'text': 'Job Satisfaction',
        'font_size': 18,
        'font_family': 'Calibri',
    },
    yaxis_title={
        'text': 'Attrition',
        'font_size': 18,
        'font_family': 'Calibri',
    },
    plot_bgcolor='#F7F7F7',
    paper_bgcolor='#F7F7F7'
)

iplot(fig)

## **Optimal Employee Retention Strategy**

In [374]:
pivot_df = df.pivot_table(index='JobRole', columns='Department', values='YearsAtCompany', aggfunc='mean')

fig = go.Figure(data=go.Heatmap(
    z=pivot_df.values.tolist(),
    x=pivot_df.columns.tolist(),
    y=pivot_df.index.tolist(),
    colorscale='Viridis',
    zmin=0,
    zmax=pivot_df.values.max(),
    hovertemplate="<b>Job Role: %{y}</b><br><b>Department: %{x}</b><br><b>Average Years at Company: %{z:.2f}</b>"
))

fig.update_layout(
    title={
        'text': 'Optimal Employee Retention Strategy',
        'font_size': 24,
        'font_family': 'Playfair Display',
    },
    xaxis_title={
        'text': 'Department',
        'font_size': 18,
        'font_family': 'Merriweather',
    },
    yaxis_title={
        'text': 'Job Role',
        'font_size': 18,
        'font_family': 'Merriweather',
    },
    plot_bgcolor='#F2F2F2',
    paper_bgcolor='#F2F2F2',
    font_family='Open Sans',
    font_color='#333333',
    font_size=14,
    xaxis=dict(
        tickfont=dict(size=12)
    ),
    yaxis=dict(
        tickfont=dict(size=12)
    )
)

fig.show()

## **Employee Productivity by Department and Job Role**

In [375]:
fig = px.treemap(
    df,
    path=['Department', 'JobRole'],
    values='JobInvolvement',
    color='JobSatisfaction',
    color_continuous_scale='RdYlGn',
    hover_data=['JobInvolvement', 'JobSatisfaction'],
    hover_name='JobRole'
)


fig.update_layout(
    title={
        'text': 'Employee Productivity by Department and Job Role',
        'font_size': 20,
        'font_family': 'Arial Black',
    },
    font_family='Open Sans',
    font_size=14,
    plot_bgcolor='#F7F7F7',
    paper_bgcolor='#F7F7F7',
    margin=dict(l=20, r=20, t=50, b=20)
)
fig.update_traces(
    hovertemplate='<b>Department</b>: %{label}<br>'
                '<b>Job Role</b>: %{hovertext}<br>'
                '<b>Job Involvement</b>: %{customdata[0]}')

fig.show()

## **Clustering Analysis of Employee Productivity**

In [376]:
clustering_cols = ['JobInvolvement', 'JobSatisfaction']
kmeans = KMeans(n_clusters=5, n_init='auto')
kmeans.fit(df[clustering_cols])

df['Cluster'] = kmeans.labels_

fig = make_subplots(rows=2, cols=2, subplot_titles=['Scatter Plot', 'Bar Chart', 'Bar Chart', 'Bar Chart'])

fig.add_trace(go.Scatter(x=df['JobInvolvement'], 
                        y=df['JobSatisfaction'], 
                        mode='markers', 
                        marker=dict(color=kmeans.labels_), 
                        name='Cluster', 
                        hovertemplate="<b>Job Involvement: %{x}</b><br><b>Job Satisfaction: %{y}</b><br><b>Cluster: %{marker.color}</b>"), 
                        row=1, 
                        col=1)

fig.add_trace(go.Bar(x=df['Cluster'].unique(), 
                    y=df['Cluster'].value_counts(), 
                    name='Cluster Size', 
                    hovertemplate="<b>Cluster: %{x}</b><br><b>Count: %{y}</b>"), 
                    row=1, 
                    col=2)

fig.add_trace(go.Bar(x=df['Cluster'].unique(), 
                    y=df.groupby('Cluster')['JobInvolvement'].mean(), 
                    name='Average Job Involvement', 
                    hovertemplate="<b>Cluster: %{x}</b><br><b>Average Job Involvement: %{y:.2f}</b>"), 
                    row=2, 
                    col=1)

fig.add_trace(go.Bar(x=df['Cluster'].unique(), 
                    y=df.groupby('Cluster')['JobSatisfaction'].mean(), 
                    name='Average Job Satisfaction', 
                    hovertemplate="<b>Cluster: %{x}</b><br><b>Average Job Satisfaction: %{y:.2f}</b>"), 
                    row=2, 
                    col=2)

fig.update_layout(title='Clustering Analysis', font_size=12)

fig.show()

## **Employee Carrier Progression**

In [377]:
X = df[['EducationField', 'JobRole', 'PerformanceRating']]
y = df['JobLevel']

In [378]:
df[['EducationField', 'JobRole', 'PerformanceRating']]

Unnamed: 0,EducationField,JobRole,PerformanceRating
0,Life Sciences,Laboratory Technician,3
1,Medical,Sales Representative,3
2,Marketing,Sales Representative,3
3,Life Sciences,Research Scientist,3
4,Medical,Laboratory Technician,3
...,...,...,...
1475,Life Sciences,Manager,3
1476,Marketing,Sales Executive,3
1477,Marketing,Sales Executive,3
1478,Marketing,Sales Executive,3


In [379]:
le = LabelEncoder()
df['EducationField'] = le.fit_transform(df['EducationField'])

In [380]:
lee = LabelEncoder()
df['JobRole'] = lee.fit_transform(df['JobRole'])

In [381]:
df[['EducationField', 'JobRole', 'PerformanceRating']]

Unnamed: 0,EducationField,JobRole,PerformanceRating
0,1,2,3
1,3,8,3
2,2,8,3
3,1,6,3
4,3,2,3
...,...,...,...
1475,1,3,3
1476,2,7,3
1477,2,7,3
1478,2,7,3


In [382]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [383]:
model = LinearRegression()
model.fit(X_train, y_train)

ValueError: could not convert string to float: 'Marketing'

In [None]:
fig = make_subplots(rows=2, cols=2, subplot_titles=['Scatter Plot', 'Regression Plot', 'Residual Plot', 'Prediction Plot'])