In [4]:
import pandas as pd

df = pd.read_csv("DataCoSupplyChainDataset.csv", encoding="latin-1")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180519 entries, 0 to 180518
Data columns (total 53 columns):
 #   Column                         Non-Null Count   Dtype  
---  ------                         --------------   -----  
 0   Type                           180519 non-null  object 
 1   Days for shipping (real)       180519 non-null  int64  
 2   Days for shipment (scheduled)  180519 non-null  int64  
 3   Benefit per order              180519 non-null  float64
 4   Sales per customer             180519 non-null  float64
 5   Delivery Status                180519 non-null  object 
 6   Late_delivery_risk             180519 non-null  int64  
 7   Category Id                    180519 non-null  int64  
 8   Category Name                  180519 non-null  object 
 9   Customer City                  180519 non-null  object 
 10  Customer Country               180519 non-null  object 
 11  Customer Email                 180519 non-null  object 
 12  Customer Fname                

In [9]:
df["start_node"] = df["Customer City"] + "_" + df["Customer Country"]
df["end_node"] = df["Order City"] + "_" + df["Order Country"]
nodes = set(df["start_node"].unique()).union(df["end_node"].unique())
nodes

{'Gujranwala_Pakistán',
 'Cambridge_Reino Unido',
 'Vineland_Estados Unidos',
 'Solapur_India',
 'San Cristóbal_Venezuela',
 'Mantes-la-Ville_Francia',
 'Chililabombwe_Zambia',
 'Santa Fe_Estados Unidos',
 'Oxnard_EE. UU.',
 'Venice_Italia',
 'Brentwood_EE. UU.',
 'Arbil_Irak',
 'Palma Soriano_Cuba',
 'Blenheim_Nueva Zelanda',
 'Kolhapur_India',
 'Hillsboro_Estados Unidos',
 'Rangún_Myanmar (Birmania)',
 'Waterlooville_Reino Unido',
 'Ponteareas_España',
 'Bayamon_Puerto Rico',
 'Warsaw_Polonia',
 'Canton_EE. UU.',
 'Tsukuba_Japón',
 'São Gonçalo_Brasil',
 'Vanves_Francia',
 'Caracas_Venezuela',
 'Elx_España',
 'Kabwe_Zambia',
 'Utica_Estados Unidos',
 'Itapecerica da Serra_Brasil',
 'Martinsburg_EE. UU.',
 'La Crosse_Estados Unidos',
 'Delft_Países Bajos',
 'Botosani_Rumania',
 'Taubaté_Brasil',
 'Bondy_Francia',
 'Jember_Indonesia',
 'Bafra_Turquía',
 'Forster_Australia',
 'Yushu_China',
 'Lianyuan_China',
 'Frankfort_EE. UU.',
 'Henrico_EE. UU.',
 'Granada_Nicaragua',
 'Saint-Gratie

In [12]:
# Create a dictionary to map node identifiers to node IDs
node_to_id = {node: i for i, node in enumerate(nodes)}

from collections import defaultdict
# Initialize an adjacency list for the graph
graph = defaultdict(set)

# Create edges between nodes
for index, row in df.iterrows():
    start_node = row["start_node"]
    end_node = row["end_node"]
    graph[node_to_id[start_node]].add(node_to_id[end_node])
    graph[node_to_id[end_node]].add(node_to_id[start_node])

In [14]:
id_to_node = {i: node for i, node in enumerate(nodes)}

In [13]:
graph[node_to_id["Arbil_Irak"]]

{519,
 798,
 1127,
 1330,
 1355,
 1507,
 1545,
 1572,
 1625,
 1879,
 2148,
 2406,
 2526,
 2726,
 2937,
 3367,
 3403,
 3869}

In [21]:
from collections import deque


def bfs(graph, start_node, end_node, num_paths=3):
    queue = deque([(start_node, [start_node], 0)])
    shortest_paths = []

    while queue:
        current_node, current_path, current_length = queue.popleft()

        if current_node == end_node:
            shortest_paths.append((current_path, current_length))
            if len(shortest_paths) == num_paths:
                break
            continue
        
        for neighbor in graph[current_node]:
            if neighbor in current_path:
                continue
            new_path = current_path + [neighbor]
            queue.append((neighbor, new_path, current_length + 1))

    return shortest_paths


# Perform BFS starting from a specific node
start_node = node_to_id["Venice_Italia"]
end_node = node_to_id["Arbil_Irak"]
routes = bfs(graph, start_node, end_node)
for route, path_len in routes:
    print(f"path length: {path_len}")
    for node in route:
        print(id_to_node[node])
    print("---------")

path length: 2
Venice_Italia
Caguas_Puerto Rico
Arbil_Irak
---------
path length: 2
Venice_Italia
Chicago_EE. UU.
Arbil_Irak
---------
path length: 2
Venice_Italia
Los Angeles_EE. UU.
Arbil_Irak
---------


In [22]:
import joblib
joblib.dump(graph, "graph.pkl")

['graph.pkl']