# Dataset Overview: Pakistani E-Commerce Delivery Data

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
import os   
warnings.filterwarnings('ignore')

# Load the dataset
file_path = 'data/input/export_cleaned_masked.csv'
start_time = datetime.now()
file_size_bytes = os.path.getsize(file_path)
print(f"File size (on disk): {file_size_bytes / 1024**2:.2f} MB ({file_size_bytes:,} bytes)")
df = pd.read_csv(file_path)
end_time = datetime.now()
load_time = (end_time - start_time).total_seconds()

sprint(f"\n✅ Dataset loaded successfully in {load_time:.2f} seconds")
print(f"\nDataset shape: {df.shape[0]:,} rows × {df.shape[1]} columns")
print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

File size (on disk): 1991.12 MB (2,087,840,798 bytes)


NameError: name 'sprint' is not defined

## 1. Load Dataset

Loading the full 9.4M row dataset. This may take a minute...

In [None]:
# Load the dataset 
file_path = 'data/input/export_cleaned_masked.csv'

print("Loading dataset...")
start_time = datetime.now()

# File size on disk
file_size_bytes = os.path.getsize(file_path)
print(f"File size (on disk): {file_size_bytes / 1024**2:.2f} MB ({file_size_bytes:,} bytes)")

df = pd.read_csv(file_path)

end_time = datetime.now()
load_time = (end_time - start_time).total_seconds()

print(f"\n✅ Dataset loaded successfully in {load_time:.2f} seconds")
print(f"\nDataset shape: {df.shape[0]:,} rows × {df.shape[1]} columns")
print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

Loading dataset...
File size (on disk): 1991.12 MB (2,087,840,798 bytes)

✅ Dataset loaded successfully in 20.21 seconds

Dataset shape: 9,476,006 rows × 14 columns
Memory usage: 6927.05 MB


## 2. Basic Information

In [None]:
# Display column names and data types
print("Column Information:")
print("=" * 80)
df.info()

Column Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9476006 entries, 0 to 9476005
Data columns (total 14 columns):
 #   Column                 Dtype  
---  ------                 -----  
 0   order_id               object 
 1   tracking_number        object 
 2   grand_total            object 
 3   weight                 float64
 4   pickup_address         object 
 5   destination_address    object 
 6   cod_payment            int64  
 7   cod                    float64
 8   seller_id              int64  
 9   company_name           object 
 10  courier_name           object 
 11  courier_service_value  object 
 12  status                 object 
 13  status_at              object 
dtypes: float64(2), int64(2), object(10)
memory usage: 1012.1+ MB


In [None]:
# Display first 10 rows
print("First 10 rows:")
df.head(10)

First 10 rows:


Unnamed: 0,order_id,tracking_number,grand_total,weight,pickup_address,destination_address,cod_payment,cod,seller_id,company_name,courier_name,courier_service_value,status,status_at
0,226299,10238180119257,8715.75,1.0,Plaza#9 Block Z Phase DHA Phase 3 Lahore,Cb 509 St No 5muslim Abad Dhoke Syedan Baraf K...,1,8715.75,3988,Brand_26,Courier_2,7,Booked,2025-01-01 00:02:01
1,226299,10238180119257,8715.75,1.0,Plaza#9 Block Z Phase DHA Phase 3 Lahore,Cb 509 St No 5muslim Abad Dhoke Syedan Baraf K...,1,8715.75,3988,Brand_26,Courier_2,7,Ready for Dispatch,2025-01-01 17:47:56
2,226299,10238180119257,8715.75,1.0,Plaza#9 Block Z Phase DHA Phase 3 Lahore,Cb 509 St No 5muslim Abad Dhoke Syedan Baraf K...,1,8715.75,3988,Brand_26,Courier_2,7,Dispatched,2025-01-01 17:48:01
3,226299,10238180119257,8715.75,1.0,Plaza#9 Block Z Phase DHA Phase 3 Lahore,Cb 509 St No 5muslim Abad Dhoke Syedan Baraf K...,1,8715.75,3988,Brand_26,Courier_2,7,Pending Delivery,2025-01-02 01:50:49
4,226299,10238180119257,8715.75,1.0,Plaza#9 Block Z Phase DHA Phase 3 Lahore,Cb 509 St No 5muslim Abad Dhoke Syedan Baraf K...,1,8715.75,3988,Brand_26,Courier_2,7,Delivered,2025-01-05 18:57:23
5,Z-5843152024,10114790934299,5113.0,1.0,DC Chowk Adjacent commissioner house Bahawal...,Waris Manzil Near Mehmood Gym Street # 4 New S...,1,5113.0,4368,Brand_154,Courier_2,1,Booked,2025-01-01 00:07:41
6,Z-5843152024,10114790934299,5113.0,1.0,DC Chowk Adjacent commissioner house Bahawal...,Waris Manzil Near Mehmood Gym Street # 4 New S...,1,5113.0,4368,Brand_154,Courier_2,1,Ready for Dispatch,2025-01-01 18:52:44
7,Z-5843152024,10114790934299,5113.0,1.0,DC Chowk Adjacent commissioner house Bahawal...,Waris Manzil Near Mehmood Gym Street # 4 New S...,1,5113.0,4368,Brand_154,Courier_2,1,Dispatched,2025-01-01 18:52:47
8,Z-5843152024,10114790934299,5113.0,1.0,DC Chowk Adjacent commissioner house Bahawal...,Waris Manzil Near Mehmood Gym Street # 4 New S...,1,5113.0,4368,Brand_154,Courier_2,1,Pending Delivery,2025-01-02 08:04:59
9,Z-5843152024,10114790934299,5113.0,1.0,DC Chowk Adjacent commissioner house Bahawal...,Waris Manzil Near Mehmood Gym Street # 4 New S...,1,5113.0,4368,Brand_154,Courier_2,1,Delivered,2025-01-03 13:00:15
