In [28]:
import pandas as pd

df = pd.read_csv("DataCoSupplyChainDataset.csv", encoding="latin-1")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180519 entries, 0 to 180518
Data columns (total 53 columns):
 #   Column                         Non-Null Count   Dtype  
---  ------                         --------------   -----  
 0   Type                           180519 non-null  object 
 1   Days for shipping (real)       180519 non-null  int64  
 2   Days for shipment (scheduled)  180519 non-null  int64  
 3   Benefit per order              180519 non-null  float64
 4   Sales per customer             180519 non-null  float64
 5   Delivery Status                180519 non-null  object 
 6   Late_delivery_risk             180519 non-null  int64  
 7   Category Id                    180519 non-null  int64  
 8   Category Name                  180519 non-null  object 
 9   Customer City                  180519 non-null  object 
 10  Customer Country               180519 non-null  object 
 11  Customer Email                 180519 non-null  object 
 12  Customer Fname                

In [29]:
df["start_node"] = df["Customer City"] + "_" + df["Customer Country"]
df["end_node"] = df["Order City"] + "_" + df["Order Country"]
nodes = set(df["start_node"].unique()).union(df["end_node"].unique())
nodes

{'Walsall_Reino Unido',
 'Hacienda Heights_EE. UU.',
 'Cumbernauld_Reino Unido',
 'Saint-Louis_Senegal',
 'Gilroy_EE. UU.',
 'Onitsha_Nigeria',
 'Mechanicsburg_EE. UU.',
 'Calamba_Filipinas',
 'Santa Cruz de la Sierra_Bolivia',
 'Chesterfield_Reino Unido',
 'Smyrna_EE. UU.',
 'Chatham_Reino Unido',
 'Ichalkaranji_India',
 'Paulista_Brasil',
 'Bilbao_España',
 'Carson_EE. UU.',
 'Palma Soriano_Cuba',
 'Bangor_Estados Unidos',
 'Balneário Camboriú_Brasil',
 "Donets'k_Ucrania",
 'Reno_Estados Unidos',
 'Carol Stream_EE. UU.',
 'Paris_Francia',
 'Yangzhou_China',
 'Vijayawada_India',
 'Kigoma_Tanzania',
 'Massapequa_EE. UU.',
 'Palermo_Italia',
 'Soledad Díez Gutiérrez_México',
 'Huaihua_China',
 'El Monte_EE. UU.',
 'East London_SudAfrica',
 'Newark_EE. UU.',
 'Warri_Nigeria',
 'Ensenada_México',
 'Langfang_China',
 'Kalyan_India',
 'Lakewood_Estados Unidos',
 'Nice_Francia',
 'Lille_Francia',
 'Alanya_Turquía',
 'Mauá_Brasil',
 'Tanggu_China',
 'Centurion_SudAfrica',
 'Tourcoing_Francia'

In [30]:
# Create a dictionary to map node identifiers to node IDs
node_to_id = {node: i for i, node in enumerate(nodes)}

from collections import defaultdict
# Initialize an adjacency list for the graph
graph = defaultdict(set)

# Create edges between nodes
for index, raw_in in df.iterrows():
    start_node = raw_in["start_node"]
    end_node = raw_in["end_node"]
    graph[node_to_id[start_node]].add(node_to_id[end_node])
    graph[node_to_id[end_node]].add(node_to_id[start_node])

In [31]:
id_to_node = {i: node for i, node in enumerate(nodes)}

In [13]:
graph[node_to_id["Arbil_Irak"]]

{519,
 798,
 1127,
 1330,
 1355,
 1507,
 1545,
 1572,
 1625,
 1879,
 2148,
 2406,
 2526,
 2726,
 2937,
 3367,
 3403,
 3869}

In [21]:
from collections import deque


def bfs(graph, start_node, end_node, num_paths=3):
    queue = deque([(start_node, [start_node], 0)])
    shortest_paths = []

    while queue:
        current_node, current_path, current_length = queue.popleft()

        if current_node == end_node:
            shortest_paths.append((current_path, current_length))
            if len(shortest_paths) == num_paths:
                break
            continue
        
        for neighbor in graph[current_node]:
            if neighbor in current_path:
                continue
            new_path = current_path + [neighbor]
            queue.append((neighbor, new_path, current_length + 1))

    return shortest_paths


# Perform BFS starting from a specific node
start_node = node_to_id["Venice_Italia"]
end_node = node_to_id["Arbil_Irak"]
routes = bfs(graph, start_node, end_node)
for route, path_len in routes:
    print(f"path length: {path_len}")
    for node in route:
        print(id_to_node[node])
    print("---------")

path length: 2
Venice_Italia
Caguas_Puerto Rico
Arbil_Irak
---------
path length: 2
Venice_Italia
Chicago_EE. UU.
Arbil_Irak
---------
path length: 2
Venice_Italia
Los Angeles_EE. UU.
Arbil_Irak
---------


In [32]:
import joblib
joblib.dump(graph, "graph.pkl")
joblib.dump(node_to_id, "node2id.pkl")
joblib.dump(id_to_node, "id2node.pkl")

['id2node.pkl']

In [20]:
import joblib
import torch
from SimpleNeuralNet import NeuralNetwork

graph = joblib.load("graph.pkl")
node_to_id = joblib.load("node2id.pkl")
id_to_node = joblib.load("id2node.pkl")
days_scaler = joblib.load("days_pred_scaler.pkl")
other_scaler = joblib.load("quantity_distance_scaler.pkl")
cost_scaler = joblib.load("cost_scaler.pkl")

origin_cities = joblib.load("Customer City.pkl")
origin_cities = {city: i for city, i in zip(origin_cities, range(len(origin_cities)))}
origin_countries = joblib.load("Customer Country.pkl")
origin_countries = {
    country: i for country, i in zip(origin_countries, range(len(origin_countries)))
}
destination_cities = joblib.load("Order City.pkl")
destination_cities = {
    city: i for city, i in zip(destination_cities, range(len(destination_cities)))
}
destination_countries = joblib.load("Order Country.pkl")
destination_countries = {
    country: i
    for country, i in zip(destination_countries, range(len(destination_countries)))
}
shipping_modes = {"Standard Class": 0, "First Class": 1, "Second Class": 2, "Same Day": 3}

days_model = NeuralNetwork(902, 64)
days_model.load_state_dict(torch.load("days_model.pth"))
days_model.eval()

NeuralNetwork(
  (fc1): Linear(in_features=902, out_features=64, bias=True)
  (dropout1): Dropout(p=0.15, inplace=False)
  (fc2): Linear(in_features=64, out_features=16, bias=True)
  (dropout2): Dropout(p=0.1, inplace=False)
  (fc3): Linear(in_features=16, out_features=1, bias=True)
)

In [25]:
# quantity, category, origin city/country, destination city/country, mode
# raw_input = [1, 12, "Venice", "Italia", "Arbil", "Irak", "Standard Class"]
raw_input = [1, 12, "Venice", "Italia", "San Jose", "EE. UU.", "Standard Class"]

# calculate distance
from geopy.geocoders import Nominatim
from geopy.distance import geodesic

geolocator = Nominatim(user_agent="city_distance_calculator")


def geocode_location(city, country):
    try:
        query = dict()
        query["city"] = city
        query["country"] = country
        location = geolocator.geocode(query, exactly_one=True)
        if location is None:
            return None
        return location
    except:
        return None


geo_origin = geocode_location(raw_input[2], raw_input[3])
geo_dest = geocode_location(raw_input[4], raw_input[5])

start_coords = (geo_origin.latitude, geo_origin.longitude)
end_coords = (geo_dest.latitude, geo_dest.longitude)
distance = geodesic(start_coords, end_coords).kilometers

# normalize quantity and distance
import pandas as pd

processed_input = (
    other_scaler.transform(
        pd.DataFrame({"Order Item Quantity": [raw_input[0]], "Distance": [distance]})
    )
    .flatten()
    .tolist()
)

# one hot
categories = [0] * 32
categories[raw_input[1] - 1] = 1
processed_input += categories

if raw_input[2] not in origin_cities or raw_input[3] not in origin_countries:
    raw_input[2:4], raw_input[4:6] = raw_input[4:6], raw_input[2:4]
temp = [0] * len(origin_cities)
temp[origin_cities[raw_input[2]]] = 1
processed_input += temp
temp = [0] * len(origin_countries)
temp[origin_countries[raw_input[3]]] = 1
processed_input += temp
temp = [0] * len(destination_cities)
temp[destination_cities[raw_input[4]]] = 1
processed_input += temp
temp = [0] * len(destination_countries)
temp[destination_countries[raw_input[5]]] = 1
processed_input += temp

temp = [0] * len(shipping_modes)
temp[shipping_modes[raw_input[-1]]] = 1
processed_input += temp

days_scaler.inverse_transform(
    days_model(torch.Tensor(processed_input)).detach().numpy().reshape(-1, 1)
)

array([[2.0976655]], dtype=float32)

In [26]:
from geopy.geocoders import Nominatim
from geopy.distance import geodesic

geolocator = Nominatim(user_agent="city_distance_calculator")


def geocode_location(city, country):
    try:
        query = dict()
        query["city"] = city
        query["country"] = country
        location = geolocator.geocode(query, exactly_one=True)
        if location is None:
            return None
        return location
    except:
        return None

In [52]:
import pandas as pd

def preprocess_input(raw_input):
    geo_origin = geocode_location(raw_input[2], raw_input[3])
    geo_dest = geocode_location(raw_input[4], raw_input[5])

    if geo_origin is None or geo_dest is None:
        return None

    start_coords = (geo_origin.latitude, geo_origin.longitude)
    end_coords = (geo_dest.latitude, geo_dest.longitude)
    distance = geodesic(start_coords, end_coords).kilometers

    # normalize quantity and distance
    processed_input = (
        other_scaler.transform(
            pd.DataFrame({"Order Item Quantity": [raw_input[0]], "Distance": [distance]})
        )
        .flatten()
        .tolist()
    )

    # one hot
    categories = [0] * 32
    categories[raw_input[1] - 1] = 1
    processed_input += categories

    if raw_input[2] not in origin_cities or raw_input[3] not in origin_countries:
        raw_input[2:4], raw_input[4:6] = raw_input[4:6], raw_input[2:4]
    temp = [0] * len(origin_cities)
    temp[origin_cities[raw_input[2]]] = 1
    processed_input += temp
    temp = [0] * len(origin_countries)
    temp[origin_countries[raw_input[3]]] = 1
    processed_input += temp
    temp = [0] * len(destination_cities)
    temp[destination_cities[raw_input[4]]] = 1
    processed_input += temp
    temp = [0] * len(destination_countries)
    temp[destination_countries[raw_input[5]]] = 1
    processed_input += temp

    temp = [0] * len(shipping_modes)
    temp[shipping_modes[raw_input[-1]]] = 1
    processed_input += temp

    return processed_input

In [37]:
from collections import deque

def bfs(graph, start_node, end_node, num_paths=3):
    queue = deque([[start_node]])
    shortest_paths = []

    while queue:
        current_path = queue.popleft()
        current_node = current_path[-1]

        if current_node == end_node:
            shortest_paths.append(current_path)
            if len(shortest_paths) == num_paths:
                break
            continue
        
        for neighbor in graph[current_node]:
            if neighbor in current_path:
                continue
            new_path = current_path + [neighbor]
            queue.append((new_path))

    return shortest_paths

In [55]:
raw_input = [1, 12, "Venice", "Italia", "Florence", "Italia", "Standard Class"]

# Perform BFS starting from a specific node
start_node = node_to_id["_".join(raw_input[2:4])]
end_node = node_to_id["_".join(raw_input[4:6])]
routes = bfs(graph, start_node, end_node)
for route in routes:
    total_days = 0
    for i in range(len(route) - 1):
        print(f"{id_to_node[route[i]]} to {id_to_node[route[i + 1]]}")
        current_input = raw_input[0:2]
        current_input += id_to_node[route[i]].split("_")
        current_input += id_to_node[route[i + 1]].split("_")
        current_input += [raw_input[6]]
        processed_input = preprocess_input(current_input)
        if processed_input is None:
            print("location not found, skipping")
            break
        days = days_scaler.inverse_transform(
            days_model(torch.Tensor(processed_input)).detach().numpy().reshape(-1, 1)
        ).squeeze()
        total_days += days
        print(f"{days} days")
    print(f"total: {total_days} days")
    print("---------")

Venice_Italia to Dallas_EE. UU.
1.641310691833496 days
Dallas_EE. UU. to Florence_Italia
1.2860562801361084 days
total: 2.9273669719696045 days
---------
Venice_Italia to Houston_EE. UU.
1.325096845626831 days
Houston_EE. UU. to Florence_Italia
1.1243057250976562 days
total: 2.4494025707244873 days
---------
Venice_Italia to Caguas_Puerto Rico
location not found, skipping
total: 0 days
---------
