# Mapping Departments and Seniority
-------------------

> <i>Description: 
In this notebook, we categorize Departments and Seniority levels according to the classifications outlined in the slides. Department categorization was done using specific keywords. For the Kununu dataset, categorizing Seniority was straightforward due to well-defined labels. However, for the Glassdoor dataset, we utilized the GPT-4O mini model API to determine the appropriate seniority group for each position title.</i>


Input Files: 
1) Glassdoor_reviews_gathered.csv
2) Kununu_reviews_gathered.csv



Output:
1) departments_seniority.csv

In [2]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from openai import OpenAI
import pandas as pd

### kununu


In [3]:
df1 = pd.read_csv("../Initial_data/Kununu_reviews_gathered.csv")

In [4]:
print(df1['department'].unique())

['procurement' 'logistic' 'operations' 'sales' 'controlling' 'legal' nan
 'recruiting' 'it' 'administration' 'other' 'design' 'product'
 'communication' 'research' 'management']


In [7]:
def count_job_category(df1):
    return df1['department'].value_counts()

counts = count_job_category(df1)

print(counts)

sales             89
it                77
other             58
recruiting        44
design            40
procurement       35
administration    35
logistic          33
product           32
operations        18
controlling       17
communication     17
research          13
legal             11
management         3
Name: department, dtype: int64


In [8]:
# Mapping departments
def map_department_to_function(department):
    corporate_departments = [
        'procurement', 'controlling', 'legal', 'recruiting', 'it', 'administration', 
        'design', 'product', 'communication', 'research', 'management'
    ]
    retail_departments = ['sales']
    logistics_departments = ['logistic', 'operations']
    
    if department in corporate_departments:
        return 'Corporate'
    elif department in retail_departments:
        return 'Retail'
    elif department in logistics_departments:
        return 'Logistics'
    else:
        return 'Other'  

df1['business_function_department'] = df1['department'].apply(map_department_to_function)

print(df1[['department', 'business_function_department']])

      department business_function_department
0    procurement                    Corporate
1       logistic                    Logistics
2     operations                    Logistics
3          sales                       Retail
4    controlling                    Corporate
..           ...                          ...
602        sales                       Retail
603           it                    Corporate
604        other                        Other
605          NaN                        Other
606          NaN                        Other

[607 rows x 2 columns]


In [10]:
def count_job_category(df):
    return df1['business_function_department'].value_counts()

business_function_department_counts = count_job_category(df1)

print(business_function_department_counts)

Corporate    324
Other        143
Retail        89
Logistics     51
Name: business_function_department, dtype: int64


In [11]:
print(df1['position'].unique())

['employee' 'apprentice' 'contractor' 'manager' 'student' nan 'intern'
 'freelancer']


In [40]:
# mapping positions
def position_indexer(pos):
     if pos == 'intern' or pos == 'student' or pos == 'apprentice':
          return 'Junior'
     elif pos == 'employee':
          return 'employee'
     elif pos == 'manager':
          return 'manager'
     else:
          return 'other'
     
df1['cleaned_position'] = df1['position'].apply(position_indexer)
df1['cleaned_position'].value_counts(dropna=False)

employee    352
manager     123
Junior       82
other        50
Name: cleaned_position, dtype: int64

### glassdoor

In [13]:
df2 = pd.read_csv("../Initial_data/Glassdoor_reviews_gathered.csv")

In [14]:
# Define expanded keyword lists for each category
corporate_keywords = [
    # English Keywords (Corporate)
    "Manager", "Director", "Vice President", "President", "CEO", "CFO", "COO", "Chief", 
    "Executive", "Global", "Corporate", "Business Analyst", "Financial Analyst", 
    "Business Partner", "Business Development", "Strategy", "Strategic", "Marketing", 
    "Communications", "Human Resources", "HR", "Legal", "Compliance", "Procurement", 
    "Sustainability", "Product Manager", "Product Owner", "Project Manager", "Consultant", 
    "Business Operations", "Content Production", "Graphic Design", "UX/UI", "SEO", 
    "Performance Marketing", "Creative", "Digital", "Brand", "Social Media", "Event Manager", 
    "Franchise Manager", "Talent Acquisition", "Learning & Development", "eCommerce", 
    "Customer Experience", "CRM", "Training", "Planning", "Buyer", "Data Analyst", 
    "Risk Management", "Corporate Affairs", "Merchandising", "Finance", "Tax", "Audit", 
    "Real Estate", "Architecture", "Supply Chain", "Corporate", "Marketing", 
    "Administrative Assistant", "Senior Software Developer", "Product Developer", "Designer", 
    "Systems Administrator", "Technical Developer", "Software Engineer", "IT Analytics", 
    "Wholesale Administrative", "Engineer", "Quality Assurance Engineer", "Merchandise Allocator",
    "Business Controller", "Customer Care Specialist", "Senior Visual Merchandiser", 
    "Fashion Designer", "Assistant Fashion Designer", "SAP Specialist", "Junior Designer", 
    "Designer", "PR Assistant", "Data Scientist", "Financial/Business Controller", 
    "Wholesale Sales Intern", "General Manger", "Controlling", "CTPD", "IT", "Project Manager",
    
    # German Keywords (Corporate)
    "Geschäftsführer", "Bereichsleiter", "Abteilungsleiter", "Manager", "Direktor", 
    "Berater", "Analyst", "Unternehmensstrategie", "Marketing", "Personalwesen", 
    "Rechtsabteilung", "Nachhaltigkeit", "Produktmanager", "Projektleiter", "Kundenerfahrung", 
    "Markenstrategie", "Personalentwicklung", "Finanz", "Rechnungswesen", "Compliance", 
    "Einkauf", "Content-Produktion", "Digital",
    
    # French Keywords (Corporate)
    "Directeur", "Manager", "Responsable", "Consultant", "Chef", "Stratégie", "Analyste", 
    "Marketing", "Ressources Humaines", "Finance", "Audit", "Comptabilité", 
    "Développement des affaires", "Communications", "Juridique", "Conformité", "Achats", 
    "Gestion des risques", "Apprentissage", "Acquisition de talents", "Formation", 
    "Chef de produit", "Gestionnaire de projet", "Production de contenu", "eCommerce", 
    "Expérience client",
    
    # Spanish Keywords (Corporate)
    "Gerente", "Director", "Responsable", "Consultor", "Estrategia", "Analista", 
    "Recursos Humanos", "Cumplimiento", "Marketing", "Finanzas", "Compras", 
    "Desarrollo de Negocios", "Comunicaciones", "Legal", "Producción de Contenidos", 
    "Cadenas de Suministro", "Arquitectura", "Planificación", "Comercio electrónico", 
    "Experiencia del Cliente", "Estrategia de marca"
]

retail_keywords = [
    # English Keywords (Retail)
    "Store Manager", "Retail Manager", "Sales Associate", "Cashier", "Customer Service", 
    "Retail Operations", "Assistant Store Manager", "Department Manager", "Area Manager", 
    "Retail", "Retail Sales", "Salesperson", "Sales Advisor", "Tailor", "Shop Supervisor", 
    "Supervisor", "Floor Manager", "Team Leader", "Retail Sales Assistant", "Sale", 
    "Suit Specialist", "Conseiller Vente", "Conseillere", "Sales Specialist", "Vendeuse", 
    "Sales Representative", "Key Holder", "Visual Merchandiser", "Conseill re De Vente", 
    "Store Assistant", "Salesman", "Sale Assistant", "Conseiller Client", "Wholesale Assistant",
    "Host", "Team Member", "Fashion", "Customer Advisor", "Seller", "Personal Stylist", 
    "Sales Lead", "Top Seller", "Sales Leader", "Retail Sales Assistant", "Cashier", 
    
    # German Keywords (Retail)
    "Filialleiter", "Verkäufer", "Einzelhandel", "Kassierer", "Ladenleiter", 
    "Kundendienst", "Abteilungsleiter", "Verkaufsberater", "Teamleiter", "Ladenaufsicht", 
    "Einzelhandelsmanagement",
    
    # French Keywords (Retail)
    "Responsable de Magasin", "Vendeur", "Caisse", "Assistant Commercial", 
    "Conseiller de Vente", "Superviseur", "Gestion de Magasin", "Service Clients", 
    "Magasinier", "Manager de Magasin", "Responsable de Rayon", "Chef de Magasin", 
    "Commis de Vente",
    
    # Spanish Keywords (Retail)
    "Gerente de Tienda", "Cajero", "Asistente de Ventas", "Vendedor", 
    "Asociado de Ventas", "Supervisor de Tienda", "Jefe de Departamento", 
    "Servicio al Cliente", "Gestión de Ventas al por Menor", "Encargado de tienda"
]

logistics_keywords = [
    # English Keywords (Logistics)
    "Warehouse Manager", "Warehouse Worker", "Logistics", "Logistics Manager", 
    "Supply Chain", "Operations", "Transportation", "Customs", "Logistics Coordinator", 
    "Inventory", "Planner", "Warehouse Supervisor", "Distribution", "Shipping", 
    "Technician", "Transport", "Freight", "Fleet", "Driver", "Logistics Planner", 
    "Operations Manager", "Logistics Specialist", "Logistics Operations", 
    "Warehouse Associate", "Dispatch", "Stockroom Assistant", "Order Picker", 
    "Stock Controller", "Picker", "Production Engineer", "Stock Assistant", 
    "Stock Associate", "Stockiste", "Inventory", "Logistics Planner", 
    
    # German Keywords (Logistics)
    "Lagerleiter", "Lagerarbeiter", "Logistik", "Logistikleiter", "Supply Chain", 
    "Transport", "Zoll", "Logistikkoordinator", "Fracht", "Versand", "Techniker", 
    "Disponent", "Flottenmanager", "Transportplaner", "Lager", "Manutentionnaire", 
    
    # French Keywords (Logistics)
    "Responsable Logistique", "Magasinier", "Transport", "Douanes", 
    "Planificateur Logistique", "Chauffeur", "Technicien Logistique", 
    "Gestionnaire de Stock", "Expédition", "Flotte", "Distribution", 
    "Coordination Logistique", "Stockiste", 
    
    # Spanish Keywords (Logistics)
    "Gerente de Logística", "Almacén", "Técnico en Logística", "Transporte", 
    "Aduanas", "Coordinador de Logística", "Planificador de Logística", 
    "Conductor", "Supervisor de Almacén", "Despacho", "Cadena de Suministro", 
    "Envíos", "Inventario", "Distribución"
]

# Function to map job categories
def map_job_category(job_title):
    if pd.isna(job_title):
        return np.nan  
    job_title_lower = job_title.lower() 
    
    if any(keyword.lower() in job_title_lower for keyword in corporate_keywords):
        return "Corporate"
    
    elif any(keyword.lower() in job_title_lower for keyword in retail_keywords):
        return "Retail"
    
    elif any(keyword.lower() in job_title_lower for keyword in logistics_keywords):
        return "Logistics"
    
    return "Other"

# Apply the function to the 'jobTitle.text' column to create 'business_function_department' column
df2['business_function_department'] = df2['jobTitle.text'].apply(map_job_category)

In [15]:
def count_job_category(df):
    return df2['business_function_department'].value_counts()

business_function_department_counts = count_job_category(df2)

print(business_function_department_counts)

Retail       664
Corporate    513
Other        257
Logistics     38
Name: business_function_department, dtype: int64


In [17]:
# using GPT 4O-Mini to get the position names (managers, Employees, ..)

API_key = "YOUR_API_KEY"
for i in range(len(df2)):
     # Set up your OpenAI API key
     client = OpenAI(api_key=API_key)


     completion = client.chat.completions.create(
     model="gpt-4o-mini",
     messages=[
          {"role": "system", "content": "You are a helpful assistant.you only out put one of the categories based on the job title I give you, nothing else.Categories: manager , employee , intern , student , aprentice , other"},
          {
               "role": "user",
               "content": str(df2['jobTitle.text'][i])
          }
     ]
     )
     df2.loc[i, 'position'] = completion.choices[0].message.content





In [19]:
df2['position'].value_counts(dropna=False)

employee      913
other         413
manager       375
intern        139
student         8
apprentice      4
Name: position, dtype: int64

In [41]:
# cleaning the positions
df2['cleaned_position'] = df2['position'].apply(position_indexer)
df2['cleaned_position'].value_counts(dropna=False)

employee    913
other       413
manager     375
Junior      151
Name: cleaned_position, dtype: int64

In [49]:
# Merging
df1_renamed = df1.rename(columns={
    'uuid': 'reviewId',
    'createdAt': 'reviewDateTime',
})

df1_needed = df1_renamed[['reviewId', 'reviewDateTime', 'cleaned_position', 'business_function_department']]
df2_needed = df2[['reviewId', 'reviewDateTime', 'cleaned_position', 'business_function_department']]

df_merged = pd.concat([df1_needed, df2_needed], axis=0)
df_merged['business_function_department'] = df_merged['business_function_department'].fillna('other')
print(len(df_merged))
print(df_merged.info())
df_merged.head()

2459
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2459 entries, 0 to 1851
Data columns (total 4 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   reviewId                      2459 non-null   object
 1   reviewDateTime                2459 non-null   object
 2   cleaned_position              2459 non-null   object
 3   business_function_department  2459 non-null   object
dtypes: object(4)
memory usage: 96.1+ KB
None


Unnamed: 0,reviewId,reviewDateTime,cleaned_position,business_function_department
0,ca6e64a6-c45e-4b04-9d85-8ff633cbe289,2024-09-21T00:00:00+00:00,employee,Corporate
1,b11b7978-d151-4249-a747-3ba7501e1bad,2024-09-05T00:00:00+00:00,employee,Logistics
2,fe76c408-b3a7-4e8d-be08-4bb67d0868da,2024-08-30T00:00:00+00:00,Junior,Logistics
3,fdd82a74-7524-4567-9844-1e3eed861f8c,2024-08-17T00:00:00+00:00,other,Retail
4,977c452c-f91c-4f2f-ae7d-b26e045eaddb,2024-08-01T00:00:00+00:00,employee,Corporate


In [50]:
df_merged.to_csv('departments_seniority.csv')

### End of Notebook