In [56]:
#Importing Libraries
from sshtunnel import SSHTunnelForwarder
import psycopg2 as psy
import pandas as pd
from IPython.display import FileLink
import geopandas as gpd
import plotly.express as px
from dash import Dash, dcc, html, Input, Output
import paramiko
from io import StringIO
from shapely.geometry import MultiPoint, MultiPolygon
import scipy
import scipy.cluster
from shapely.ops import unary_union
import calendar
from datetime import datetime
from geopy.distance import great_circle
from scipy.optimize import minimize

In [57]:
#Establishing connection with datawarehouse
def get_conn_postgres(SSH_required,key_path):

    db='datawarehouse'
    DB_HOST='datawarehouse.cdgpvetprks3.ap-south-1.rds.amazonaws.com'
    conn = []
    if SSH_required == 'Yes':
        SSH_HOST='ec2-15-206-161-154.ap-south-1.compute.amazonaws.com'
        #LOCALHOST="0.0.0.0"
        ssh_tunnel= SSHTunnelForwarder(
                (SSH_HOST, 22),
                ssh_username="ec2-user",
                ssh_private_key= key_path,
                ssh_private_key_password= "",
                remote_bind_address=(DB_HOST, 5432),
                local_bind_address=('127.0.0.1', 0)
        )
        print('Tunnel Started')
        ssh_tunnel.start()
        conn = psy.connect(
            host=ssh_tunnel.local_bind_host,
            port=ssh_tunnel.local_bind_port,
            user='postgres',
            password= "Simply1234",
            database='postgres')
        print('Connection Made')
        return conn
    else:
        conn = psy.connect(
            host = DB_HOST,
            port = 5432,
            user = 'postgres',
            password= "Simply1234",
            database='postgres')
        print('Connection Made')
        return conn

In [58]:
#Getting dataframe from datawarehouse
def get_df_from_sql_postgres(SSH_required, query,key_path):   #for getting a datafarame as a result

    db='datawarehouse'
    DB_HOST='datawarehouse.cdgpvetprks3.ap-south-1.rds.amazonaws.com'
    conn = None
    if SSH_required == 'Yes':
        SSH_HOST='ec2-15-206-161-154.ap-south-1.compute.amazonaws.com'
        #LOCALHOST="0.0.0.0"
        ssh_tunnel= SSHTunnelForwarder(
                (SSH_HOST, 22),
                ssh_username="ec2-user",
                ssh_private_key= key_path,
                ssh_private_key_password= "",
                remote_bind_address=(DB_HOST, 5432),
                local_bind_address=('127.0.0.1', 0)
        )
        # ssh_tunnel._server_list[0].block_on_close = False
        ssh_tunnel.start()
        conn = psy.connect(
            host=ssh_tunnel.local_bind_host,
            port=ssh_tunnel.local_bind_port,
            user='postgres',
            password= "Simply1234",
            database='postgres')
        df_results = pd.read_sql(query, conn)
        conn.close()
        ssh_tunnel.stop()
        return df_results
    else:
        conn = psy.connect(
            host = DB_HOST,
            port = 5432,
            user = 'postgres',
            password= "Simply1234",
            database='postgres')
        df_results = pd.read_sql(query, conn)
        conn.close()
        return df_results

In [59]:
#Getting Ops Main Data for last 30 days
SSH_required = 'Yes'
key_path = '/home/rajat/Downloads/tunnel-ssh .cer'
query = "select warehouse_name, last_mile_hub, count(*) as orders from public.ops_main where date_trunc('month', created_date) >= date_trunc('month', now() - interval'1 month') and shipping_partner = 'Hyperlocal' and shipping_city = 'Bangalore' and warehouse_city = 'Bangalore' group by warehouse_name, last_mile_hub;"
# Establish a connection
conn = get_conn_postgres(SSH_required, key_path)

# Retrieve data into a DataFrame
df_ops_main = get_df_from_sql_postgres(SSH_required, query, key_path)

# Now you can perform further operations with the DataFrame
print(df_ops_main.head())

Tunnel Started
Connection Made
                                      warehouse_name last_mile_hub  orders
0  15 Ground Floor, SY No  131 3, Hoskote, Anjane...          BLDR      97
1  15 Ground Floor, SY No  131 3, Hoskote, Anjane...          CMRJ      11
2  15 Ground Floor, SY No  131 3, Hoskote, Anjane...          ECTY      44
3  15 Ground Floor, SY No  131 3, Hoskote, Anjane...          HBBL      45
4  15 Ground Floor, SY No  131 3, Hoskote, Anjane...          JPNR     100


In [60]:
#Getting client warehouse lat longs for BLR
df_warehouse = pd.read_csv('warehouses_w_lat_lng_blr.csv')
df_warehouse['lat_long'] = df_warehouse['lat_long'].apply(eval)
# print(df_warehouse.head())

In [61]:
#Getting hub lat longs for BLR
df_hubs = pd.read_csv('hubs_w_lat_lng_blr.csv')
df_hubs['last_mile_hub'] = df_hubs['sort_codes'].str.split('/').str[1].str.strip()
df_hubs['lat_long'] = df_hubs['lat_long'].apply(eval)
# print(df_hubs.head())dd

In [62]:
# Adding lat long data to Ops Main, making dataframe 'orders'

#warehouse lat long
df_merged = pd.merge(df_ops_main, df_warehouse[['warehouse_name', 'lat_long']], on='warehouse_name', how='left')
df_merged.rename(columns={'lat_long': 'warehouse_lat_long'}, inplace=True)
# print(df_merged.head())

# hub lat long
orders = pd.merge(df_merged, df_hubs[['last_mile_hub', 'lat_long']], on='last_mile_hub', how='left')
orders.rename(columns={'lat_long': 'hub_lat_long'}, inplace=True)

print(orders)

                                        warehouse_name last_mile_hub  orders  \
0    15 Ground Floor, SY No  131 3, Hoskote, Anjane...          BLDR      97   
1    15 Ground Floor, SY No  131 3, Hoskote, Anjane...          CMRJ      11   
2    15 Ground Floor, SY No  131 3, Hoskote, Anjane...          ECTY      44   
3    15 Ground Floor, SY No  131 3, Hoskote, Anjane...          HBBL      45   
4    15 Ground Floor, SY No  131 3, Hoskote, Anjane...          JPNR     100   
..                                                 ...           ...     ...   
475                                   Vaaree Warehouse          JPNR      51   
476                                   Vaaree Warehouse          MRTH      51   
477                                   Vaaree Warehouse          STNG      10   
478                                   Vaaree Warehouse          UTTR      27   
479                                   Vaaree Warehouse          YLHK       8   

                 warehouse_lat_long    

In [63]:
#Analysing Warehouse Lat Long Data
other_datatypes = set()
for coords in orders['warehouse_lat_long']:
    datatype = type(coords)
    if datatype != list:
        other_datatypes.add(datatype)

print(other_datatypes)

float_values = orders[orders['warehouse_lat_long'].apply(lambda x: isinstance(x, float))]
print(float_values['warehouse_lat_long'])

# Filter the DataFrame to only include rows where warehouse_lat_long is NaN
nan_values = orders[orders['warehouse_lat_long'].apply(lambda x: isinstance(x, float) and pd.isna(x))]
print(nan_values['warehouse_name'].unique())

# Filter the DataFrame to only include rows where warehouse_lat_long is NaN
nan_values = orders[orders['warehouse_lat_long'].apply(lambda x: isinstance(x, float) and pd.isna(x))]

# Get unique warehouse names
unique_warehouse_names = nan_values['warehouse_name'].unique()

# Create a DataFrame with unique warehouse names
unique_warehouse_df = pd.DataFrame({'warehouse_name': unique_warehouse_names})

# Save the DataFrame to an Excel file
unique_warehouse_df.to_excel('not_in_warehouses_warehouse.xlsx', index=False)



{<class 'float'>}
9      NaN
10     NaN
11     NaN
12     NaN
13     NaN
      ... 
466    NaN
467    NaN
468    NaN
469    NaN
470    NaN
Name: warehouse_lat_long, Length: 279, dtype: object
['16   1, Deganhalli Village Road, Kasaba Hobli,'
 '24 2, Chikkahullur Village, Kasba Hobli,' 'Asitis BLR2'
 'BLR Adret Retail' 'BNG - Bengaluru' 'BOAT Bangalore' 'BVO Bangalore'
 'Decathlon BLR' 'DS_blr_mtl_HK' 'EASYECOM-RDCBLRFC4'
 'Emiza supply chain services Pvt Ltd survey no 83 2 Kachanahalli Beside Kirloskar Electric company Budhihall post Nelmangala'
 'GIVA DEL' 'HK BLR'
 'Honasa Consumer Limited Emiza Bangalore Aqua, C O Emiza Supply chain service Pvt Ltd,Sy no 83 1,Kachanahalli village, Buddihal post, Kasaba Hobli, Nelamagala ta'
 'Katha no 461 100 7, Comprised of Converted Survey No 100 4, Reserve Survey No'
 'Manash Bangalore Warehouse'
 'Mathru Shree Warehouse,Survey No 83 2 ,Kachanahalli' 'Minimalist'
 'Mokobara Lifestyle Pvt Ltd' 'MW_Bangalore' 'Neemans Private Limited'
 'NO  33 34,1

In [64]:
# Cleaning Warehouse Lat Long Data, for non-NaN values in the warehouse_lat_long column
filtered_orders = orders.dropna(subset=['warehouse_lat_long'])
# Save the filtered DataFrame to a new variable named "orders"
orders = filtered_orders.copy()

In [65]:
##Analysing Hub Lat Long Data
other_datatypes = set()
for coords in orders['hub_lat_long']:
    datatype = type(coords)
    if datatype != list:
        other_datatypes.add(datatype)

print(other_datatypes)

float_values = orders[orders['hub_lat_long'].apply(lambda x: isinstance(x, float))]
print(float_values['hub_lat_long'])

# Filter the DataFrame to only include rows where hub_lat_long is NaN
nan_values = orders[orders['hub_lat_long'].apply(lambda x: isinstance(x, float) and pd.isna(x))]
print(nan_values['last_mile_hub'].unique())

# Filter the DataFrame to only include rows where hub_lat_long is NaN
nan_values = orders[orders['hub_lat_long'].apply(lambda x: isinstance(x, float) and pd.isna(x))]

# Get unique warehouse names
unique_warehouse_names = nan_values['last_mile_hub'].unique()

# Create a DataFrame with unique warehouse names
unique_warehouse_df = pd.DataFrame({'last_mile_hub': unique_warehouse_names})

# # Save the DataFrame to an Excel file
# unique_warehouse_df.to_excel('not_in_warehouses_warehouse.xlsx', index=False)



{<class 'float'>}
29     NaN
47     NaN
114    NaN
212    NaN
222    NaN
232    NaN
252    NaN
405    NaN
Name: hub_lat_long, dtype: object
['MTH']


In [66]:
# Cleaning Hub Lat Long Data, for non-NaN values in the hub_lat_long column
filtered_orders = orders.dropna(subset=['hub_lat_long'])

# Save the filtered DataFrame to a new variable named "orders"
orders = filtered_orders.copy()

# print(orders)

In [67]:
#Defining the Fixed Motherhub

# Retrieve lat_long column for central_hub nodes
fixed_motherhub = df_hubs[df_hubs['node_type'] == 'central_hub']['lat_long']

# Access latitude and longitude values from the first row
fixed_motherhub_latitude = fixed_motherhub.iloc[0][0]
fixed_motherhub_longitude = fixed_motherhub.iloc[0][1]

# Create a tuple with latitude and longitude values
fixed_motherhub = (fixed_motherhub_latitude, fixed_motherhub_longitude)

# Print the fixed_motherhub coordinates
print(fixed_motherhub)

(12.9497375, 77.6982656)


In [68]:
#cost should not go above a limit
#cost can be defined with rwith centre as centre

In [69]:
#Defining function for total distance
def total_distance(new_motherhub, orders, fixed_motherhubs):
    total_dist = 0

    for _, row in orders.iterrows():
        client_latitude = row['warehouse_lat_long'][0]
        client_longitude = row['warehouse_lat_long'][1]
        client_location = (client_latitude, client_longitude)

        last_mile_latitude = row['hub_lat_long'][0]
        last_mile_longitude = row['hub_lat_long'][1]
        last_mile_location = (last_mile_latitude, last_mile_longitude)
        
        for fixed_motherhub in fixed_motherhubs:
            dist_client_to_fixed = great_circle(client_location, fixed_motherhub).km
            dist_fixed_to_last_mile = great_circle(fixed_motherhub, last_mile_location).km
            total_dist_fixed = dist_client_to_fixed + dist_fixed_to_last_mile
            
            dist_client_to_new = great_circle(client_location, tuple(new_motherhub)).km
            dist_new_to_last_mile = great_circle(tuple(new_motherhub), last_mile_location).km
            total_dist_new = dist_client_to_new + dist_new_to_last_mile
            
            total_dist += row['orders'] * min(total_dist_fixed, total_dist_new)
    
    return total_dist

# Calculate the mean latitude and longitude separately
mean_latitude = orders['warehouse_lat_long'].apply(lambda x: x[0]).mean()
mean_longitude = orders['warehouse_lat_long'].apply(lambda x: x[1]).mean()

# Create the initial guess tuple
initial_guess = (mean_latitude, mean_longitude)
print("Initial Guess:", initial_guess)

# Calculate the initial total distance with the initial guess
initial_total_dist = total_distance(initial_guess, orders, [fixed_motherhub])
print("Initial Total Distance:", initial_total_dist)

# Apply the optimization
result = minimize(total_distance, initial_guess, args=(orders, [fixed_motherhub]), method='Nelder-Mead')

# Print the result
# print("Optimization Result:", result)
optimal_new_motherhub_location = result.x
print("Optimal New Motherhub Location:", optimal_new_motherhub_location)
print("Total Distance:", result.fun)

Initial Guess: (12.989052356476684, 77.6143475865285)
Initial Total Distance: 960301.6539781998
Optimal New Motherhub Location: [12.93524881 77.58228153]
Total Distance: 844947.7365317116


In [75]:
# Looking at the Mother Hub assigned

# Access latitude and longitude values of new motherhub
optimal_new_motherhub_latitude = optimal_new_motherhub_location[0]
optimal_new_motherhub_longitude = optimal_new_motherhub_location[1]

# Create a tuple with latitude and longitude values
new_motherhub = (optimal_new_motherhub_latitude, optimal_new_motherhub_longitude)

motherhub_labels = []

# Loop through each order and determine whether it passes through the fixed or new motherhub
for _, row in orders.iterrows():
    client_latitude = row['warehouse_lat_long'][0]
    client_longitude = row['warehouse_lat_long'][1]
    client_location = (client_latitude, client_longitude)

    last_mile_latitude = row['hub_lat_long'][0]
    last_mile_longitude = row['hub_lat_long'][1]
    last_mile_location = (last_mile_latitude, last_mile_longitude)
    
    dist_client_to_fixed = great_circle(client_location, fixed_motherhub).km
    dist_fixed_to_last_mile = great_circle(fixed_motherhub, last_mile_location).km
    total_dist_fixed = dist_client_to_fixed + dist_fixed_to_last_mile
    
    dist_client_to_new = great_circle(client_location, tuple(new_motherhub)).km
    dist_new_to_last_mile = great_circle(tuple(new_motherhub), last_mile_location).km
    total_dist_new = dist_client_to_new + dist_new_to_last_mile
    
    if total_dist_fixed < total_dist_new:
        motherhub_labels.append('Fixed')
    else:
        motherhub_labels.append('New')

# Add the list of labels as a new column in the orders DataFrame
orders['motherhub_label'] = motherhub_labels


In [76]:
# Specify the file path
file_path = "orders.xlsx"

# Write the DataFrame to an Excel file
orders.to_excel(file_path, index=False)

In [70]:
# Initialize the app
app = Dash(__name__)

# Define app layout
app.layout = html.Div([
    html.H1("Motherhub Locations", style={'text-align': 'center'}),
    dcc.Graph(
        id='map',
        figure={
            'data': [
                # Trace for fixed motherhub
                {
                    'type': 'scattermapbox',
                    'lat': [fixed_motherhub[0]],  # Fixed motherhub latitude
                    'lon': [fixed_motherhub[1]],  # Fixed motherhub longitude
                    'mode': 'markers',
                    'marker': {
                        'size': 12,
                        'color': 'green'  # Set color for fixed motherhub
                    },
                    'name': 'Fixed Motherhub'
                },
                # Trace for optimal new motherhub location
                {
                    'type': 'scattermapbox',
                    'lat': [optimal_new_motherhub_location[0]],  # Optimal new motherhub latitude
                    'lon': [optimal_new_motherhub_location[1]],  # Optimal new motherhub longitude
                    'mode': 'markers',
                    'marker': {
                        'size': 12,
                        'color': 'blue'  # Set color for optimal new motherhub location
                    },
                    'name': 'Optimal New Motherhub Location'
                }
            ],
            'layout': {
                'mapbox': {
                    'style': "open-street-map",
                    'center': {'lat': 12.97, 'lon': 77.59},  # Center coordinates of Bangalore
                    'zoom': 10
                }
            }
        }
    )
])

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True, port=8051)
