<a href="https://colab.research.google.com/github/cbonnin88/starfield_industries/blob/main/performance_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import polars as pl
import plotly.express as px
import gdown as gd

In [None]:
url = 'https://drive.google.com/uc?id=1pAE3Knjo4JSSY4vVi8DwBFkIEU6vlRDT'
gd.download(url,'starfield_HR.csv',quiet=True)

df_starfield = pl.read_csv('starfield_HR.csv')

In [None]:
df_starfield.head()

ID,Name,Surname,Age,Tenure,Hire Date,Gender,Region,Job Title,Department,Manager,Hours,Salary Band,Salary,Performance,Satisfaction
i64,str,str,i64,i64,str,str,str,str,str,str,i64,str,i64,str,f64
4045,"""Gwendolyn""","""Turner""",18,0,"""2025-05-07""","""Female""","""ile-de-France""","""Automation Engineer""","""Engineering""","""no""",8,"""T5""",13600,"""Average""",3.0
5780,"""Jason""","""Peterson""",20,1,"""2024-09-01""","""Male""","""ile-de-France""","""Release Engineer""","""Engineering""","""no""",8,"""T5""",14688,"""Average""",5.0
9231,"""Max""","""Hopkins""",19,1,"""2024-02-03""","""Male""","""ile-de-France""","""Support Engineer""","""Engineering""","""no""",8,"""T5""",14786,"""Average""",3.0
11163,"""Vanesa""","""Saldaña""",18,0,"""2025-06-17""","""Male""","""ile-de-France""","""Quantum Engineer""","""Engineering""","""no""",8,"""T5""",15439,"""Average""",0.0
7851,"""Amador""","""Roybal""",54,2,"""2023-02-11""","""Male""","""ile-de-France""","""Financial Planning & Analysis …","""Finance""","""no""",8,"""T5""",22427,"""Average""",5.0


In [None]:
# Cleaning up the titles
original_column = df_starfield.columns
new_columns = [col.lower().replace(' ','_').replace('.','').replace('/','_') for col in original_column]
df_starfield = df_starfield.rename({old: new for old, new in zip(original_column, new_columns)})

df_starfield.columns

['id',
 'name',
 'surname',
 'age',
 'tenure',
 'hire_date',
 'gender',
 'region',
 'job_title',
 'department',
 'manager',
 'hours',
 'salary_band',
 'salary',
 'performance',
 'satisfaction']

In [None]:
# Convert 'hire_date' to Date type
if 'hire_date' in df_starfield.columns and df_starfield['hire_date'].dtype == pl.Utf8:
    try:
        df_starfield = df_starfield.with_columns(
            pl.col('hire_date').str.to_date('%Y-%m-%d', strict=False).alias('hire_date')
        )
        print("\nConverted 'hire_date' to Date type.")
    except Exception as e:
        print(f"Could not convert 'hire_date' to Date: {e}")


df_starfield.dtypes


Converted 'hire_date' to Date type.


[Int64,
 String,
 String,
 Int64,
 Int64,
 Date,
 String,
 String,
 String,
 String,
 String,
 Int64,
 String,
 Int64,
 String,
 Float64]

In [None]:
performance_counts = df_starfield.group_by('performance').agg(pl.len().alias('employee_count')).sort('performance')
performance_counts

performance,employee_count
str,u32
"""Average""",338
"""Bottom""",216
"""Top""",243


In [None]:
fig = px.bar(
    performance_counts,
    x='performance',
    y='employee_count',
    title='Distribution of Performance Ratings',
    text='employee_count',
    labels={"performance":"Performance Rating", 'employee_count':'Number of Employees'},
    color='performance'
)

fig.show()

# **Performance ratings by Department**

In [None]:
performance_counts_by_department = df_starfield.group_by(['performance','department']).agg(pl.len().alias('employee_count')).sort('performance')
performance_counts_by_department

performance,department,employee_count
str,str,u32
"""Average""","""Sales""",62
"""Average""","""Product & Tech""",68
"""Average""","""Finance""",33
"""Average""","""Engineering""",90
"""Average""","""Human Resources""",20
…,…,…
"""Top""","""Leadership""",20
"""Top""","""Finance""",10
"""Top""","""Marketing""",11
"""Top""","""Human Resources""",4


In [None]:
fig = px.bar(
    performance_counts_by_department,
    x='department',
    y='employee_count',
    title='Distribution of Performance Ratings by Department',
    text='employee_count',
    labels={"performance":"Performance Rating", 'employee_count':'Number of Employees'},
    color='performance',
    barmode='stack'
)

fig.show()