In [1282]:
#Importing libraries
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, time
from sshtunnel import SSHTunnelForwarder
import psycopg2 as psy
import math
from IPython.display import display
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, DateTime, Float

In [1283]:
#Defining function to get a dataframe from sql

def get_df_from_sql(SSH_required, query,key_path):

    db='datawarehouse'
    DB_HOST='datawarehouse.cdgpvetprks3.ap-south-1.rds.amazonaws.com'
    conn = None
    if SSH_required == 'Yes':
        SSH_HOST='ec2-15-206-161-154.ap-south-1.compute.amazonaws.com'
        #LOCALHOST="0.0.0.0"
        ssh_tunnel= SSHTunnelForwarder(
                (SSH_HOST, 22),
                ssh_username="ec2-user",
                ssh_private_key= key_path,
                ssh_private_key_password= "",
                remote_bind_address=(DB_HOST, 5432),
                local_bind_address=('127.0.0.1', 0)
        )
        # ssh_tunnel._server_list[0].block_on_close = False
        ssh_tunnel.start()
        conn = psy.connect(
            host=ssh_tunnel.local_bind_host,
            port=ssh_tunnel.local_bind_port,
            user='postgres',
            password= "Simply1234",
            database='postgres')
        df_results = pd.read_sql(query, conn)
        conn.close()
        ssh_tunnel.stop()
        return df_results
    else:
        conn = psy.connect(
            host = DB_HOST,
            port = 5432,
            user = 'postgres',
            password= "Simply1234",
            database='postgres')
        df_results = pd.read_sql(query, conn)
        conn.close()
        return df_results

In [1284]:
#Getting the dataframe from sql query on ops main

SSH_required = 'Yes'
key_path = '/Users/rajatsansaniwal/Documents/tunnel-ssh .cer'

query_path = 'pickup_query.sql'
with open(query_path,'r') as file:
    query = file.read()

df = get_df_from_sql(SSH_required, query, key_path)
# df = df[df['warehouse_id'] == 1514]

print(df)

                  key  warehouse_id pickup_vehicle          pickuptime  \
0      100-1514-Delhi          1514      Devraj As 2024-07-14 18:38:55   
1      100-1514-Delhi          1514      Devraj As 2024-07-14 18:39:01   
2      100-1514-Delhi          1514      Devraj As 2024-07-14 18:39:24   
3      100-1514-Delhi          1514      Devraj As 2024-07-14 18:39:32   
4      100-1514-Delhi          1514      Devraj As 2024-07-14 18:39:36   
...               ...           ...            ...                 ...   
32496   87-327-Mumbai           327     basid khan 2024-07-16 17:51:00   
32497   87-327-Mumbai           327     basid khan 2024-07-16 17:51:06   
32498   87-327-Mumbai           327     basid khan 2024-07-16 17:51:08   
32499   87-327-Mumbai           327     basid khan 2024-07-16 17:51:11   
32500   87-327-Mumbai           327     basid khan 2024-07-16 17:51:17   

                    created_date           awb  
0     2024-07-14 13:57:17.422820  GS1440374959  
1     2024-07

In [1285]:
#Making pickup slots on warehouses and pickup vehicle

df['pickuptime'] = pd.to_datetime(df['pickuptime'])

results = []

for (key, warehouse, pickup_vehicle), group in df.groupby(['key', 'warehouse_id', 'pickup_vehicle']):
    group = group.reset_index(drop=True)

    pickup_start_time = group.loc[0, 'pickuptime']

    for i in range(1, len(group)):
        current_time = group.loc[i, 'pickuptime']
        previous_time = group.loc[i - 1, 'pickuptime']
        
        # Check the difference between current and previous pickuptime
        if (current_time - previous_time).total_seconds() > 7200:  # More than two hours
            # End the current pickup slot and start a new one
            results.append({
                'key': key,
                'warehouse_id': warehouse,
                'pickup_vehicle': pickup_vehicle,
                'pickup_start_time': pickup_start_time,
                'pickup_end_time': previous_time
            })
            pickup_start_time = current_time  # Update the start time for the new slot
    
    # Append the last slot for the warehouse
    results.append({
        'key': key,
        'warehouse_id': warehouse,
        'pickup_vehicle': pickup_vehicle,
        'pickup_start_time': pickup_start_time,
        'pickup_end_time': group.loc[len(group) - 1, 'pickuptime']
    })

# Create DataFrame from results list
results_df = pd.DataFrame(results)

# Calculate slot_duration in minutes
results_df['slot_duration'] = ((results_df['pickup_end_time'] - results_df['pickup_start_time']).dt.total_seconds() / 60).apply(math.ceil)

# Sort DataFrame by warehouse_id and pickup_start_time (if not already sorted)
results_df.sort_values(by=['key', 'warehouse_id', 'pickup_start_time'], inplace=True)

# Calculate gaps between pickup slots in minutes
results_df['gap_to_next'] = results_df.groupby(['key', 'warehouse_id', 'pickup_vehicle'])['pickup_start_time'].shift(-1) - results_df['pickup_end_time']
results_df['gap_to_next'] = (results_df['gap_to_next'].dt.total_seconds() / 60)
results_df['gap_to_next'].fillna(0, inplace=True)
results_df['gap_to_next'] = (results_df['gap_to_next'].astype(int) + 1).replace(1, np.nan)

results_df.sort_values(by=['key', 'warehouse_id', 'pickup_vehicle'], inplace=True)

# print(results_df)

In [1286]:
# Adding actual load on slots

loads = []

for index, row in results_df.iterrows():
    key = row['key']
    warehouse_id = row['warehouse_id']
    pickup_vehicle = row['pickup_vehicle']
    slot_start_time = row['pickup_start_time']
    slot_end_time = row['pickup_end_time']
    
    load = 0  # Initialize load count for this slot

    filtered_df = df[(df['key'] == key) &
                     (df['warehouse_id'] == warehouse_id) &
                     (df['pickup_vehicle'] == pickup_vehicle) &
                     (df['pickuptime'] >= slot_start_time) &
                     (df['pickuptime'] <= slot_end_time)]

    # Count the number of AWBs within the slot time range and add to load
    load += len(filtered_df)
    
    # Append the total load count for this slot to the loads list
    loads.append(load)

# Assign the 'actual_load' column to results_df using the loads list
results_df['actual_load'] = loads
pickup_slots_df = results_df

# # Print or display the updated DataFrame with the 'actual_load' column
# print(pickup_slots_df)

# # Save DataFrame to Excel
# excel_file = 'pickup_slots.xlsx'
# pickup_slots_df.to_excel(excel_file, index=False)

# print(f"DataFrame saved to {excel_file}")


In [1287]:
#Loading pickup cutoff time data

query_path = 'pickup_cutoff_time.sql'
with open(query_path,'r') as file:
    query = file.read()

pickup_cutoff_df = get_df_from_sql(SSH_required, query, key_path)

pickup_cutoff_df['pickup_cutoff'] = pd.to_datetime(pickup_cutoff_df['pickup_cutoff'], format='%H:%M:%S', errors='coerce').dt.time
# pickup_cutoff_df = pickup_cutoff_df[pickup_cutoff_df['warehouse_id'] == 1514]
display(pickup_cutoff_df)

Unnamed: 0,key,warehouse_id,warehouse_name,user_name,user_id,warehouse_city,pickup_cutoff
0,100-1514-Bangalore,1514,GIVA DEL,GIVA,100,Bangalore,18:30:00
1,100-1514-Delhi,1514,GIVA DEL,GIVA,100,Delhi,17:30:00
2,100-1514-Delhi,1514,GIVA DEL,GIVA,100,Delhi,11:00:00
3,100-329-Bangalore,329,Indiejewel Fashions Private Limited,GIVA,100,Bangalore,18:30:00
4,101-1400-Hyderabad,1400,BW HYD New,Bebodywise,101,Hyderabad,17:00:00
...,...,...,...,...,...,...,...
227,81-250-Hyderabad,250,GS_HYD_KMP,HealthKart,81,Hyderabad,19:00:00
228,81-370-Bangalore,370,DS_blr_mtl_HK,HealthKart,81,Bangalore,15:00:00
229,81-833-Delhi,833,DS_DEL_HK,HealthKart,81,Delhi,15:00:00
230,87-1025-Delhi,1025,TTL_GGN,The Label Life,87,Delhi,10:30:00


In [1288]:
#Expanding cut off time data to a cut off datetime data with range of [d-3, d] days i.e. 4 days

# print(datetime.now())

# Step 1: Define the range of dates
start_date = datetime.now() - timedelta(days=3)  # Adjusted start date, datetime.now() is in IST already
end_date = datetime.now()

# Step 2: Create a new DataFrame to hold the expanded rows
expanded_rows = []

# Iterate through each row in pickup_cutoff_df
for index, row in pickup_cutoff_df.iterrows():
    key = row['key']
    warehouse_id = row['warehouse_id']
    warehouse_name = row['warehouse_name']
    user_name = row['user_name']
    user_id = row['user_id']
    warehouse_city = row['warehouse_city']
    pickup_cutoff = row['pickup_cutoff']

    # Iterate through each date in the range
    date_iterator = start_date
    while date_iterator <= end_date:
        # Combine current date with pickup_cutoff_time to create datetime
        pickup_datetime = datetime.combine(date_iterator.date(), pickup_cutoff)

        # Append row with key, pickup_cutoff_time, and current date
        expanded_rows.append({
            'key': key,
            'warehouse_id': warehouse_id,
            'warehouse_name' : warehouse_name,
            'user_name' : user_name,
            'user_id' : user_id,
            'warehouse_city' : warehouse_city,
            'pickup_cutoff': pickup_cutoff,
            'pickup_cutoff_time1': pickup_datetime
        })

        # Move to the next date
        date_iterator += timedelta(days=1)

# Create DataFrame from expanded rows
expanded_df = pd.DataFrame(expanded_rows)

merged_df = pd.merge(pickup_cutoff_df, expanded_df, left_on=['key', 'warehouse_id', 'warehouse_name', 'user_name', 'user_id', 'warehouse_city', 'pickup_cutoff'], right_on=['key', 'warehouse_id', 'warehouse_name', 'user_name', 'user_id', 'warehouse_city', 'pickup_cutoff'], how='left')

# Drop pickup_cutoff_time and rename pickup_cutoff_time1
merged_df.drop(columns=['pickup_cutoff'], inplace=True)
merged_df.rename(columns={'pickup_cutoff_time1': 'pickup_cutoff'}, inplace=True)
pickup_cutoff_df = merged_df

# Display the expanded DataFrame
print(pickup_cutoff_df)

                    key  warehouse_id warehouse_name       user_name  user_id  \
0    100-1514-Bangalore          1514       GIVA DEL            GIVA      100   
1    100-1514-Bangalore          1514       GIVA DEL            GIVA      100   
2    100-1514-Bangalore          1514       GIVA DEL            GIVA      100   
3    100-1514-Bangalore          1514       GIVA DEL            GIVA      100   
4        100-1514-Delhi          1514       GIVA DEL            GIVA      100   
..                  ...           ...            ...             ...      ...   
923       87-1025-Delhi          1025        TTL_GGN  The Label Life       87   
924       87-327-Mumbai           327           GOAT  The Label Life       87   
925       87-327-Mumbai           327           GOAT  The Label Life       87   
926       87-327-Mumbai           327           GOAT  The Label Life       87   
927       87-327-Mumbai           327           GOAT  The Label Life       87   

    warehouse_city       pi

In [1289]:
#Matching pickups that happened with every possible pickup cutoff time

final_df = pd.merge(pickup_slots_df, pickup_cutoff_df, left_on = ['key', 'warehouse_id'], right_on = ['key', 'warehouse_id'], how='left')
display(final_df)

Unnamed: 0,key,warehouse_id,pickup_vehicle,pickup_start_time,pickup_end_time,slot_duration,gap_to_next,actual_load,warehouse_name,user_name,user_id,warehouse_city,pickup_cutoff
0,100-1514-Delhi,1514,Devraj As,2024-07-14 18:38:55,2024-07-14 19:07:32,29,1368.0,101,GIVA DEL,GIVA,100.0,Delhi,2024-07-13 17:30:00
1,100-1514-Delhi,1514,Devraj As,2024-07-14 18:38:55,2024-07-14 19:07:32,29,1368.0,101,GIVA DEL,GIVA,100.0,Delhi,2024-07-14 17:30:00
2,100-1514-Delhi,1514,Devraj As,2024-07-14 18:38:55,2024-07-14 19:07:32,29,1368.0,101,GIVA DEL,GIVA,100.0,Delhi,2024-07-15 17:30:00
3,100-1514-Delhi,1514,Devraj As,2024-07-14 18:38:55,2024-07-14 19:07:32,29,1368.0,101,GIVA DEL,GIVA,100.0,Delhi,2024-07-16 17:30:00
4,100-1514-Delhi,1514,Devraj As,2024-07-14 18:38:55,2024-07-14 19:07:32,29,1368.0,101,GIVA DEL,GIVA,100.0,Delhi,2024-07-13 11:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1465,87-327-Mumbai,327,basid khan,2024-07-15 17:40:38,2024-07-15 17:40:57,1,1450.0,8,GOAT,The Label Life,87.0,Mumbai,2024-07-16 16:00:00
1466,87-327-Mumbai,327,basid khan,2024-07-16 17:50:43,2024-07-16 17:51:17,1,,8,GOAT,The Label Life,87.0,Mumbai,2024-07-13 16:00:00
1467,87-327-Mumbai,327,basid khan,2024-07-16 17:50:43,2024-07-16 17:51:17,1,,8,GOAT,The Label Life,87.0,Mumbai,2024-07-14 16:00:00
1468,87-327-Mumbai,327,basid khan,2024-07-16 17:50:43,2024-07-16 17:51:17,1,,8,GOAT,The Label Life,87.0,Mumbai,2024-07-15 16:00:00


In [1290]:
# There are some warehouse ids that are active but are not in pickup_cutoff_time, we are dropping them, but they need to be added in excel.pickup_cutoff_time
final_df.dropna(subset=['pickup_cutoff'], inplace=True)

In [1291]:
# Choosing rows with right pickup cutoff matching

final_df['slot_id'] = final_df['warehouse_id'].astype(str) + '-' + final_df['pickup_vehicle'].astype(str) + '-' + final_df['pickup_start_time'].astype(str)

# Calculate the duration and take the absolute value to see nearest cutoff time
final_df['start_to_pickup_cutoff'] = (final_df['pickup_start_time'] - final_df['pickup_cutoff']).abs()
final_df['end_to_pickup_cutoff'] = (final_df['pickup_end_time'] - final_df['pickup_cutoff']).abs()
# display(final_df)

# Function to apply the rules
def filter_rows(group):
    # Rule 1: If start_to_pickup_cutoff < 1 hour
    rule_1 = group[group['start_to_pickup_cutoff'] < pd.Timedelta(hours=1)]
    if not rule_1.empty:
        return rule_1.iloc[[0]]

    # Rule 2: If end_to_pickup_cutoff < 1 hour
    rule_2 = group[group['end_to_pickup_cutoff'] < pd.Timedelta(hours=1)]
    if not rule_2.empty:
        return rule_2.iloc[[0]]

    # Rule 3: If pickup_cutoff_time is within pickup_start_time and pickup_end_time
    rule_3 = group[(group['pickup_cutoff'] >= group['pickup_start_time']) & (group['pickup_cutoff'] <= group['pickup_end_time'])]
    if not rule_3.empty:
        return rule_3.iloc[[0]]

    # Rule 4: Closest pickup_cutoff_time < pickup_start_time
    group['diff'] = (group['pickup_start_time'] - group['pickup_cutoff']).abs()
    rule_4 = group[group['pickup_cutoff'] < group['pickup_start_time']]
    if not rule_4.empty:
        return rule_4.loc[[rule_4['diff'].idxmin()]]
    
    return pd.DataFrame()  # If no rule matches, return an empty DataFrame

# Apply the function to each group of slot_id
final_df = final_df.groupby('slot_id').apply(filter_rows).reset_index(drop=True)

# Drop the auxiliary column used for Rule 4
final_df.drop(columns=['diff'], inplace=True)

# display(final_df)



In [1292]:
# Cleaning data

final_df = final_df[['key', 'user_name', 'warehouse_id', 'pickup_vehicle', 'pickup_cutoff', 'pickup_start_time', 'pickup_end_time', 'actual_load']]
final_df = final_df.sort_values(by= ['key', 'pickup_cutoff'])
final_df = final_df.reset_index(drop=True)

summary_df = final_df.groupby(['key', 'warehouse_id', 'pickup_cutoff']).agg({
    'pickup_start_time': 'min',
    'pickup_end_time': 'max',
    'actual_load': 'sum'
}).reset_index()

# Display the summary DataFrame
# display(summary_df)

In [1293]:
# Ensures Missed Pickups are there

expanded_df.drop(columns=['pickup_cutoff'], inplace=True)
expanded_df.rename(columns={'pickup_cutoff_time1':'pickup_cutoff'}, inplace=True)
expanded_df.sort_values(by=['key', 'pickup_cutoff'], inplace=True)


# print(expanded_df)


In [1294]:
# Creating final table

final = pd.merge(expanded_df, summary_df, left_on=['key', 'warehouse_id', 'pickup_cutoff'], right_on=['key', 'warehouse_id', 'pickup_cutoff'], how='left')
display(final)

Unnamed: 0,key,warehouse_id,warehouse_name,user_name,user_id,warehouse_city,pickup_cutoff,pickup_start_time,pickup_end_time,actual_load
0,100-1514-Bangalore,1514,GIVA DEL,GIVA,100,Bangalore,2024-07-13 18:30:00,NaT,NaT,
1,100-1514-Bangalore,1514,GIVA DEL,GIVA,100,Bangalore,2024-07-14 18:30:00,NaT,NaT,
2,100-1514-Bangalore,1514,GIVA DEL,GIVA,100,Bangalore,2024-07-15 18:30:00,NaT,NaT,
3,100-1514-Bangalore,1514,GIVA DEL,GIVA,100,Bangalore,2024-07-16 18:30:00,NaT,NaT,
4,100-1514-Delhi,1514,GIVA DEL,GIVA,100,Delhi,2024-07-13 11:00:00,NaT,NaT,
...,...,...,...,...,...,...,...,...,...,...
923,87-1025-Delhi,1025,TTL_GGN,The Label Life,87,Delhi,2024-07-16 10:30:00,2024-07-16 09:50:31,2024-07-16 09:50:31,1.0
924,87-327-Mumbai,327,GOAT,The Label Life,87,Mumbai,2024-07-13 16:00:00,NaT,NaT,
925,87-327-Mumbai,327,GOAT,The Label Life,87,Mumbai,2024-07-14 16:00:00,NaT,NaT,
926,87-327-Mumbai,327,GOAT,The Label Life,87,Mumbai,2024-07-15 16:00:00,2024-07-15 17:40:38,2024-07-15 17:40:57,8.0


In [1295]:
# Creating expected load

# Ensure pickup_cutoff is a datetime column
final['pickup_cutoff'] = pd.to_datetime(final['pickup_cutoff'])
df['created_date'] = pd.to_datetime(df['created_date'])

# Sort final DataFrame
final_sorted = final.sort_values(by=['key', 'pickup_cutoff'], ascending=[True, False])

# Get the previous pickup_cutoff time for each key
final_sorted['previous_cutoff'] = final_sorted.groupby('key')['pickup_cutoff'].shift(-1)

# Define a function to count rows in df that fall between pickup_cutoff and previous_cutoff
def count_rows_between_cutoffs(row, df):
    if pd.isna(row['previous_cutoff']):
        return 0
    return df[(df['key'] == row['key']) & 
              (df['created_date'] >= row['previous_cutoff']) & 
              (df['created_date'] < row['pickup_cutoff'])].shape[0]

# Apply the function to each row in final_sorted
final_sorted['expected_load'] = final_sorted.apply(count_rows_between_cutoffs, axis=1, df=df)

#Drop the previous_cutoff column
final_sorted.drop(columns=['previous_cutoff'], inplace=True)
final_filtered = final_sorted[final_sorted['expected_load'] != 0]

# Assign final_sorted back to final
final = final_filtered

# Display the resulting DataFrame
# display(final)

In [1296]:
# Removing warehouses with no activity in all last 4 days

# Find all key values where pickup_start_time is NaT on all dates and remove them
key_with_all_NaT = final.groupby('key').filter(lambda x: x['pickup_start_time'].isna().all())['key'].unique()

# Convert the result to a list
key_with_all_NaT_list = key_with_all_NaT.tolist()

# Print the list
print(key_with_all_NaT_list)

final = final[~final['key'].isin(key_with_all_NaT_list)]
# display(final)

['334-1425-Mumbai', '377-1529-Hyderabad', '377-1603-Mumbai', '434-1896-Hyderabad']


In [1297]:
# Creating status column

# Step 1: Check if 'pickup_status' column exists, if not, initialize it
if 'pickup_status' not in final.columns:
    final['pickup_status'] = np.nan

# Step 2: Preserve the 'Missed' status
missed_mask = final['pickup_status'] == 'Missed'

# Step 3: Define the conditions
conditions = [
    (final['pickup_start_time'].notna() & (final['pickup_start_time'] <= final['pickup_cutoff'] + pd.Timedelta(minutes=30))),
    (final['pickup_start_time'].notna() & (final['pickup_start_time'] > final['pickup_cutoff'] + pd.Timedelta(minutes=30))),
    (final['pickup_start_time'].isna() & (final['pickup_cutoff'] + pd.Timedelta(hours=6, minutes=30) < current_time)),
    (final['pickup_start_time'].isna() & (final['pickup_cutoff'] + pd.Timedelta(minutes=30) <= current_time)),
    (final['pickup_start_time'].isna() & (final['pickup_cutoff'] + pd.Timedelta(minutes=30) > current_time))
]

# Define the corresponding values for the conditions
choices = [
    'On-time pickup',
    'Late pickup',
    'Missed',
    'Delayed',
    'Pending'
]

# Step 4: Apply the conditions to update the 'pickup_status' column
final['pickup_status'] = np.select(conditions, choices, default='Unknown')

# Step 5: Reapply the 'Missed' status
final.loc[missed_mask, 'pickup_status'] = 'Missed'

# Display the final DataFrame
display(final)


Unnamed: 0,key,warehouse_id,warehouse_name,user_name,user_id,warehouse_city,pickup_cutoff,pickup_start_time,pickup_end_time,actual_load,expected_load,pickup_status
11,100-1514-Delhi,1514,GIVA DEL,GIVA,100,Delhi,2024-07-16 17:30:00,2024-07-16 17:47:25,2024-07-16 18:10:13,66.0,53,On-time pickup
10,100-1514-Delhi,1514,GIVA DEL,GIVA,100,Delhi,2024-07-16 11:00:00,2024-07-16 10:31:38,2024-07-16 10:35:41,26.0,39,On-time pickup
9,100-1514-Delhi,1514,GIVA DEL,GIVA,100,Delhi,2024-07-15 17:30:00,2024-07-15 17:55:30,2024-07-15 18:34:16,92.0,78,On-time pickup
8,100-1514-Delhi,1514,GIVA DEL,GIVA,100,Delhi,2024-07-15 11:00:00,2024-07-15 11:01:45,2024-07-15 11:10:04,40.0,66,On-time pickup
7,100-1514-Delhi,1514,GIVA DEL,GIVA,100,Delhi,2024-07-14 17:30:00,2024-07-14 18:38:55,2024-07-14 19:07:32,101.0,89,Late pickup
...,...,...,...,...,...,...,...,...,...,...,...,...
902,449-2043-Bangalore,2043,Intellihealth solutions pvt ltd,Truemeds,449,Bangalore,2024-07-15 12:00:00,2024-07-15 12:28:55,2024-07-15 20:21:35,171.0,147,On-time pickup
923,87-1025-Delhi,1025,TTL_GGN,The Label Life,87,Delhi,2024-07-16 10:30:00,2024-07-16 09:50:31,2024-07-16 09:50:31,1.0,1,On-time pickup
922,87-1025-Delhi,1025,TTL_GGN,The Label Life,87,Delhi,2024-07-15 10:30:00,2024-07-15 09:42:19,2024-07-15 09:42:19,1.0,1,On-time pickup
927,87-327-Mumbai,327,GOAT,The Label Life,87,Mumbai,2024-07-16 16:00:00,2024-07-16 17:50:43,2024-07-16 17:51:17,8.0,9,Late pickup


In [1298]:
final['key'] = final.apply(lambda row: f"{row['key']}-{row['pickup_cutoff'].strftime('%d/%m/%y/%H')}", axis=1)


In [1299]:
display(final)

Unnamed: 0,key,warehouse_id,warehouse_name,user_name,user_id,warehouse_city,pickup_cutoff,pickup_start_time,pickup_end_time,actual_load,expected_load,pickup_status
11,100-1514-Delhi-16/07/24/17,1514,GIVA DEL,GIVA,100,Delhi,2024-07-16 17:30:00,2024-07-16 17:47:25,2024-07-16 18:10:13,66.0,53,On-time pickup
10,100-1514-Delhi-16/07/24/11,1514,GIVA DEL,GIVA,100,Delhi,2024-07-16 11:00:00,2024-07-16 10:31:38,2024-07-16 10:35:41,26.0,39,On-time pickup
9,100-1514-Delhi-15/07/24/17,1514,GIVA DEL,GIVA,100,Delhi,2024-07-15 17:30:00,2024-07-15 17:55:30,2024-07-15 18:34:16,92.0,78,On-time pickup
8,100-1514-Delhi-15/07/24/11,1514,GIVA DEL,GIVA,100,Delhi,2024-07-15 11:00:00,2024-07-15 11:01:45,2024-07-15 11:10:04,40.0,66,On-time pickup
7,100-1514-Delhi-14/07/24/17,1514,GIVA DEL,GIVA,100,Delhi,2024-07-14 17:30:00,2024-07-14 18:38:55,2024-07-14 19:07:32,101.0,89,Late pickup
...,...,...,...,...,...,...,...,...,...,...,...,...
902,449-2043-Bangalore-15/07/24/12,2043,Intellihealth solutions pvt ltd,Truemeds,449,Bangalore,2024-07-15 12:00:00,2024-07-15 12:28:55,2024-07-15 20:21:35,171.0,147,On-time pickup
923,87-1025-Delhi-16/07/24/10,1025,TTL_GGN,The Label Life,87,Delhi,2024-07-16 10:30:00,2024-07-16 09:50:31,2024-07-16 09:50:31,1.0,1,On-time pickup
922,87-1025-Delhi-15/07/24/10,1025,TTL_GGN,The Label Life,87,Delhi,2024-07-15 10:30:00,2024-07-15 09:42:19,2024-07-15 09:42:19,1.0,1,On-time pickup
927,87-327-Mumbai-16/07/24/16,327,GOAT,The Label Life,87,Mumbai,2024-07-16 16:00:00,2024-07-16 17:50:43,2024-07-16 17:51:17,8.0,9,Late pickup


In [1300]:
filepath = 'final_fm.xlsx'
final.to_excel(filepath, index=False)

In [1301]:
# def get_conn_string(SSH_required, key_path):
#     db = 'datawarehouse'
#     DB_HOST = 'datawarehouse.cdgpvetprks3.ap-south-1.rds.amazonaws.com'
#     conn_string = None

#     if SSH_required == 'Yes':
#         SSH_HOST = 'ec2-15-206-161-154.ap-south-1.compute.amazonaws.com'
#         ssh_tunnel = SSHTunnelForwarder(
#             (SSH_HOST, 22),
#             ssh_username="ec2-user",
#             ssh_private_key=key_path,
#             ssh_private_key_password="",
#             remote_bind_address=(DB_HOST, 5432),
#             local_bind_address=('127.0.0.1', 0)
#         )
#         print('Tunnel Started')
#         ssh_tunnel.start()
#         conn_string = f"postgresql://postgres:Simply1234@{ssh_tunnel.local_bind_host}:{ssh_tunnel.local_bind_port}/postgres"
#     else:
#         conn_string = f"postgresql://postgres:Simply1234@{DB_HOST}:5432/postgres"
    
#     print('Connection String Created')
#     return conn_string

# # Set your parameters for SSH and key path
# SSH_required = 'Yes'
# key_path = '/Users/rajatsansaniwal/Documents/tunnel-ssh .cer'

# # Get the connection string
# connection_string = get_conn_string(SSH_required, key_path)

# # Create the SQLAlchemy engine
# engine = create_engine(connection_string)

Tunnel Started
Connection String Created


In [1302]:
# Taking the dataframe to sql table in the database using sqlalchemy and pandas:

# Used this commented code to create the table in public schema:
# metadata = MetaData(schema='public')

# fm_status = Table(
#     'fm_status', metadata,
#     Column('key', String, primary_key=True),
#     Column('warehouse_id', String),
#     Column('warehouse_name', String),
#     Column('user_name', String),
#     Column('user_id', Integer),
#     Column('warehouse_city', String),
#     Column('pickup_cutoff', DateTime),
#     Column('pickup_start_time', DateTime),
#     Column('pickup_end_time', DateTime),
#     Column('actual_load', Float),
#     Column('pickup_status', String)
# )

# # Create the table in the database
# metadata.create_all(engine)

# Insert the DataFrame into the SQL table, specifying the schema
# final.to_sql('fm_status', con=engine, index=False, if_exists='replace', schema='public')

# Close the connection
# engine.dispose()