In [1]:
from classes import *
from helper_functions import *
import pandas as pd
import numpy as np
from tqdm import tqdm
import datetime, time
from datetime import date
import random
random.seed(2022)

# for clustering
from sklearn import metrics
from sklearn.cluster import DBSCAN, OPTICS, KMeans
from scipy.spatial.distance import pdist, squareform
import scipy

## for plotting
import matplotlib.pyplot as plt
import seaborn as sns
import folium

# for configurations
from dotenv import load_dotenv
import os, re
load_dotenv()

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Importing the dataset and configuration parameters
df = pd.read_csv('data/test02.csv')
factory_coord = os.getenv("FACTORY_GEO_COORD") 

In [3]:
# Convert datetime columns to their correct types respectively
df['est_installation_date'] = pd.to_datetime(df['est_installation_date'],dayfirst=True)

#Sort according to the [deadline date, distance to customer] for optimally processing the jobs
df = df.sort_values(by=['est_installation_date'], ascending=[True],ignore_index=True)

In [4]:
df1 = df[df["job_geo_coordinate"].notnull()] # Contains rows with non-empty coordinates
df1[['lat','long']] = df1["job_geo_coordinate"].str.split(',',expand=True) 

cols=list(df1.columns)
cols=cols[:2]+cols[-2:]+cols[2:-2]
df1=df1[cols]

In [5]:
df1.rename(columns={'INSTALLER_IDs': 'installer_ids'}, inplace=True)

In [6]:
# CLUSTERING JOBS INTO 20 AREAS
k = 20
model = KMeans(n_clusters=k, init='k-means++')
X = df1[["lat","long"]]
## clustering
df1_X = X.copy()
df1_X["cluster"] = model.fit_predict(X)
## find real centroids
closest, distances = scipy.cluster.vq.vq(model.cluster_centers_, 
                     df1_X.drop("cluster", axis=1).values.astype(float))
df1_X["centroid"] = 0
for i in closest:
    df1_X["centroid"].iloc[i] = 1
## add clustering info to the original dataset
df1[["cluster","centroid"]] = df1_X[["cluster","centroid"]]

In [7]:
# Visualization
m = folium.Map(location=[df1.lat.astype(float).mean(), df1.long.astype(float).mean()], zoom_start=9, 
               tiles='OpenStreet Map')

# Defining colors for those clusters
no_of_colors=20
color=["#"+''.join([random.choice('0123456789ABCDEF') for i in range(6)])
       for j in range(no_of_colors)]
        
# Adding markers for all jobs
for index, row in df1.iterrows():
    folium.CircleMarker(
        location=[row.lat, row.long],
        radius=2,
        popup=row.cluster,
        color=color[row.cluster],
        fill=True,
        fill_colour=color[row.cluster]
    ).add_to(m)
    
# Adding markers for centroids
for index, row in df1.iterrows():
    if row.centroid==1:
        folium.CircleMarker(
            location=[row.lat, row.long],
            radius=7,
            popup=row.cluster,
            color=color[row.cluster],
            fill=True,
            fill_colour=color[row.cluster]
        ).add_to(m)


In [8]:
df_centroids = df1[df1['centroid']==1]
df_centroids = df_centroids[['cluster','lat','long']]
df_centroids = df_centroids.sort_values(by=['cluster'], ascending=[True],ignore_index=True)

In [9]:
for index, row in df1.iterrows():
    cluster_id = row['cluster']
    df1.loc[index,'centroid'] = str(df_centroids.iloc[cluster_id].lat)+','+str(df_centroids.iloc[cluster_id].long)

In [12]:
df1

Unnamed: 0,id,job_geo_coordinate,lat,long,installers_required,expected_job_time,est_installation_date,installation_date,arrival_start_time,arrival_end_time,installer_ids,cluster,centroid
0,70903,"-34.9186748,138.5397291",-34.9186748,138.5397291,2,240.0,2023-01-14,,,,,9,"-34.8941852,138.5358478"
1,71899,"-35.5736137,138.5885685",-35.5736137,138.5885685,2,60.0,2023-01-14,,,,,12,"-35.5390725,138.6306281"
2,70822,"-34.8922422,138.613558",-34.8922422,138.613558,2,240.0,2023-01-14,,,,,14,"-34.8922422,138.613558"
3,69750,"-34.9320234,138.6351679",-34.9320234,138.6351679,2,180.0,2023-01-14,,,,,19,"-34.9378933,138.6531826"
4,70307,"-34.7996109,138.7078888",-34.7996109,138.7078888,1,120.0,2023-01-14,,,,,16,"-34.7996109,138.7078888"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
453,71710,"-34.9833229,138.5839169",-34.9833229,138.5839169,1,60.0,2023-02-05,,,,,2,"-34.9833229,138.5839169"
454,71730,"-34.7967566,138.7060069",-34.7967566,138.7060069,1,60.0,2023-02-05,,,,,16,"-34.7996109,138.7078888"
455,71742,"-34.8447459,138.6959711",-34.8447459,138.6959711,1,,2023-02-05,,,,,16,"-34.7996109,138.7078888"
457,71842,"-34.9274342,138.6020127",-34.9274342,138.6020127,2,240.0,2023-02-05,,,,,14,"-34.8922422,138.613558"


In [11]:
m

In [12]:
# df1.to_csv('data/clust.csv',index=False)

In [10]:
# Planning dates from next_working_date
first_date = next_working_date(date.today())

In [11]:
df1.head(10)

Unnamed: 0,id,job_geo_coordinate,lat,long,installers_required,expected_job_time,est_installation_date,installation_date,arrival_start_time,arrival_end_time,installer_ids,cluster,centroid
0,70903,"-34.9186748,138.5397291",-34.9186748,138.5397291,2,240.0,2023-01-14,,,,,5,"-34.87802,138.5587667"
1,71899,"-35.5736137,138.5885685",-35.5736137,138.5885685,2,60.0,2023-01-14,,,,,3,"-35.5390725,138.6306281"
2,70822,"-34.8922422,138.613558",-34.8922422,138.613558,2,240.0,2023-01-14,,,,,17,"-34.8671483,138.6286293"
3,69750,"-34.9320234,138.6351679",-34.9320234,138.6351679,2,180.0,2023-01-14,,,,,4,"-34.9320234,138.6351679"
4,70307,"-34.7996109,138.7078888",-34.7996109,138.7078888,1,120.0,2023-01-14,,,,,0,"-34.7996109,138.7078888"
5,70199,"-34.9442133,138.6698709",-34.9442133,138.6698709,1,90.0,2023-01-14,,,,,4,"-34.9320234,138.6351679"
6,71451,"-34.7705843,138.7344186",-34.7705843,138.7344186,1,60.0,2023-01-14,,,,,0,"-34.7996109,138.7078888"
7,71516,"-34.9225441,138.6440962",-34.9225441,138.6440962,1,240.0,2023-01-14,,,,,4,"-34.9320234,138.6351679"
8,70089,"-34.9592692,138.6526538",-34.9592692,138.6526538,1,60.0,2023-01-14,,,,,4,"-34.9320234,138.6351679"
9,69844,"-34.887275,138.6506171",-34.887275,138.6506171,1,90.0,2023-01-14,,,,,7,"-34.8838335,138.6829783"


In [12]:
# Creating test_installers
num_installers = 5
installers = []
for i in range(num_installers):
    installer = Installer(i)
    
    #Initializing last_job_end times as 8 am for each installer
    installer.end_time = datetime.datetime.combine(first_date,datetime.time()) + datetime.timedelta(hours=8) ## Time at which they leave factory
    installers.append(installer)

In [None]:
for index, row in tqdm(df.iterrows()):
    try:
        # Check first_date < deadline
        if first_date > row["est_installation_date"].date():
            print(index,'Unable to process dates before ' + first_date)

        #Check if enough installers available
        if row["installers_required"] > num_installers:
            print(index,'Not enough installers available for this job')

        # Calculate which installers can reach next job location earliest
        ins_start_time_nextJob = []
        ins_travel_times = []
        for installer in installers:
            time_to_travel = get_travel_time(installer.geo_coord,row["job_geo_coordinate"])
            ins_travel_times.append(time_to_travel)
            if installer.time_spent + time_to_travel + row["expected_job_time"] > 480:                       # Can add drive back time here
                # Shift to next working day
                next_date = next_working_date(installer.end_time.date())
                installer.start_time = None
                installer.end_time = datetime.datetime.combine(next_date,datetime.time()) + datetime.timedelta(hours=8)
                installer.reset_location()
                installer.reset_time_spent()

            arrival_time = installer.end_time + datetime.timedelta(minutes=time_to_travel)                   # arrival time for next job
            ins_start_time_nextJob.append((installer.id,arrival_time.strftime('%Y-%m-%d %H:%M:%S')))
        ins_start_time_nextJob = sorted(ins_start_time_nextJob,key=lambda x: x[1])

        # Installers to send
        ins_to_send = []
        for i in range(row["installers_required"]):
            ins_to_send.append(ins_start_time_nextJob[i][0])

        # Update installers' variables
        curr_max = datetime.datetime.combine(date.today(),datetime.time()) # Start time for the installer who arrives last
        for ins_id in ins_to_send:
            installer = installers[ins_id]
            curr_travel_time = get_travel_time(installer.geo_coord,row["job_geo_coordinate"])
            prev_job_end_time = installer.end_time
            installer.start_time = prev_job_end_time + datetime.timedelta(minutes=curr_travel_time)
            curr_max = max(curr_max,installer.start_time)

        for ins_id in ins_to_send:
            installer = installers[ins_id]
            curr_travel_time = get_travel_time(installer.geo_coord,row["job_geo_coordinate"])
            prev_job_end_time = installer.end_time
            installer.avail = False
            installer.start_time = curr_max
            installer.end_time = installer.start_time + datetime.timedelta(minutes=row["expected_job_time"])
            installer.time_spent += row["expected_job_time"] + curr_travel_time
            installer.geo_coord = row["job_geo_coordinate"]

        job_start_time = curr_max
        job_end_time = curr_max + datetime.timedelta(minutes=row["expected_job_time"])
        installer_ids = ins_to_send

        df.loc[index,'installation_date'] = job_start_time.date()
        df.loc[index,'arrival_start_time'] = job_start_time.time()
        df.loc[index,'arrival_end_time'] = job_end_time.time()
        df.loc[index,'INSTALLER_IDs'] = ','.join(map(str,installer_ids))

        # # print(job_start_time,'\t',job_end_time,'\t',installer_ids)
        # if df.loc[index,'installation_date'] > row["est_installation_date"]:
        #     print(index,'Cant accomodate for', row["est_installation_date"])
    except:
        # print(index)
        continue

In [13]:
df1.installers_required.value_counts()

1    107
2     11
Name: installers_required, dtype: int64

In [None]:
df.to_csv('ram_ram.csv',index=False)