In [11]:
import json

# Load technician profiles
with open('technicians_large.json', 'r') as file:
    technicians = json.load(file)

# Load task descriptions
with open('tasks_large.json', 'r') as file:
    tasks = json.load(file)

# Load historical assignments
with open('historical_assignments.json', 'r') as file:
    historical_assignments = json.load(file)


In [12]:
# Check for and handle missing values
for technician in technicians:
    if not all(k in technician for k in ["id", "name", "capabilities", "experience", "availability", "location", "performance"]):
        print(f"Missing data in technician: {technician}")

for task in tasks:
    if not all(k in task for k in ["id", "description", "location", "priority", "required_skills", "assigned_technician"]):
        print(f"Missing data in task: {task}")


Missing data in task: {'id': 'TS001', 'description': 'Fix the broken sprinkler system in the garden.', 'location': 'Los Angeles', 'priority': 'Medium', 'assigned_technician': 'T063'}
Missing data in task: {'id': 'TS002', 'description': 'Install new hand dryers in the public restrooms.', 'location': 'San Jose', 'priority': 'High', 'assigned_technician': 'T001'}
Missing data in task: {'id': 'TS003', 'description': 'Install new telephone lines in the customer service center.', 'location': 'Phoenix', 'priority': 'High', 'assigned_technician': 'T095'}
Missing data in task: {'id': 'TS004', 'description': 'Install new shelving units in the warehouse.', 'location': 'San Diego', 'priority': 'Low', 'assigned_technician': 'T008'}
Missing data in task: {'id': 'TS005', 'description': 'Install new hand dryers in the public restrooms.', 'location': 'Los Angeles', 'priority': 'Low', 'assigned_technician': 'T046'}
Missing data in task: {'id': 'TS006', 'description': 'Service the air conditioning system

In [13]:
import spacy

nlp = spacy.load("en_core_web_sm")

def preprocess(text):
    doc = nlp(text.lower())
    return " ".join(token.lemma_ for token in doc if not token.is_stop and not token.is_punct)

# Preprocess task descriptions and technician capabilities
for task in tasks:
    task['processed_description'] = preprocess(task['description'])

for technician in technicians:
    technician['processed_capabilities'] = preprocess(technician['capabilities'])


In [19]:
import pandas as pd

# Create DataFrames from JSON data
tasks_df = pd.DataFrame(tasks)
technicians_df = pd.DataFrame(technicians)

# Merge tasks with technicians on 'assigned_technician' and 'id'
combined_df = pd.merge(tasks_df, technicians_df, left_on='assigned_technician', right_on='id', suffixes=('_task', '_tech'), how='left')

# Select relevant columns for the combined dataset
combined_df = combined_df[['id_task', 'processed_description', 'location_task', 'priority', 'name', 'processed_capabilities', 'experience', 'availability', 'location_tech', 'performance','assigned_technician']]

# Rename columns for clarity
combined_df = combined_df.rename(columns={
    'id_task': 'task_id',
    'processed_description': 'task_description',
    'location_task': 'task_location',
    'name': 'technician_name',
    'processed_capabilities': 'technician_capabilities',
    'experience': 'technician_experience',
    'availability': 'technician_availability',
    'location_tech': 'technician_location',
    'assigned_technician': 'technician_id',
})

# Preview combined data
print(combined_df.head())

# Save to a new JSON file if needed
combined_df.to_json('combined_tasks_technicians_preprocessed.json', orient='records', indent=2)


  task_id                                   task_description task_location  \
0   TS001                 fix broken sprinkler system garden   Los Angeles   
1   TS002             install new hand dryer public restroom      San Jose   
2   TS003  install new telephone line customer service ce...       Phoenix   
3   TS004                  install new shelve unit warehouse     San Diego   
4   TS005             install new hand dryer public restroom   Los Angeles   

  priority technician_name                            technician_capabilities  \
0   Medium  Charles Garcia  specialist setup maintenance home brewing equi...   
1     High      Kelly Yang  expert diagnose repair hvac system advanced kn...   
2     High    Matthew Hill  skilled setup maintenance video conferencing s...   
3      Low   Lucas Collins  adept software installation configuration incl...   
4      Low      Eric Patel  proficient setup maintenance home fitness equi...   

   technician_experience technician_availabi

In [37]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Vectorize text data
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(combined_df['task_description'] + " " + combined_df['technician_capabilities'])
# Ensure technician_id is numeric for model training
label_encoder = LabelEncoder()
combined_df['technician_id_encoded'] = label_encoder.fit_transform(combined_df['technician_id'])

y = combined_df['technician_id_encoded']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Evaluate model
accuracy = model.score(X_test, y_test)
print(f"Model accuracy: {accuracy:.2f}")


Model accuracy: 0.70


In [35]:
# Predict technicians for new tasks

# Define new task descriptions
new_tasks = [
    "Install a new HVAC system in the warehouse.",
    "Repair the broken air conditioning unit in the lobby.",
    "Fix the leaking water pipe in the basement.",
    "Upgrade the electrical system in the office building.",
    "Install new carpeting in the conference room.",
    "Repair the faulty fire alarm system in the warehouse.",
    "Service the backup generator in the utility room.",
    "Install new LED lighting in the parking lot.",
    "Fix the broken door lock in the storage area.",
    "Inspect and service the ventilation system in the factory."
]

# Predict technicians for new tasks and show suitability percentage
predictions = []
for new_task in new_tasks:
    new_task_preprocessed = preprocess(new_task)
    new_task_vector = vectorizer.transform([new_task_preprocessed])
    predicted_probabilities = model.predict_proba(new_task_vector)
    predicted_technician_encoded = model.predict(new_task_vector)[0]
    predicted_technician_id = label_encoder.inverse_transform([predicted_technician_encoded])[0]
    predicted_technician = technicians_df[technicians_df["id"] == predicted_technician_id].iloc[0]
    suitability_percentage = predicted_probabilities[0][predicted_technician_encoded] * 100
    predictions.append({
        "task_description": new_task,
        "assigned_technician": predicted_technician['name'],
        "suitability_percentage": suitability_percentage,
        "technician capabilities": predicted_technician['capabilities']
    })

# Display predictions
for prediction in predictions:
    print(f"Task: {prediction['task_description']}")
    print(f"Assigned Technician: {prediction['assigned_technician']}")
    print(f"Suitability: {prediction['suitability_percentage']:.2f}%")
    print(f"Technician Capabilities: {prediction['technician capabilities']}")
    print()



Task: Install a new HVAC system in the warehouse.
Assigned Technician: Henry Mcdaniel
Suitability: 0.00%
Technician Capabilities: Adept at installing and maintaining building automation systems, including HVAC, lighting, and security controls.

Task: Repair the broken air conditioning unit in the lobby.
Assigned Technician: Scott Ellis
Suitability: 7.00%
Technician Capabilities: Experienced in flooring installation and repair, specializing in hardwood, laminate, and tile floors.

Task: Fix the leaking water pipe in the basement.
Assigned Technician: Christine Morgan
Suitability: 2.00%
Technician Capabilities: Specialist in water filtration systems, capable of installing and maintaining residential and commercial water purification solutions.

Task: Upgrade the electrical system in the office building.
Assigned Technician: Ebony Graham
Suitability: 2.00%
Technician Capabilities: Experienced in the setup of remote work environments, including network configurations and ergonomic office s