In [None]:
#Load Data From JSon Files

In [72]:
import json
import pandas as pd
from datetime import datetime, timedelta
import pytz

In [73]:
file_path = 'C:\\Users\\user\\Downloads\\Dataset.json'
with open(file_path, 'r') as file:
    data = json.load(file)

In [74]:
flattened_data = []

for item in data:
    tracking_number = item['trackDetails'][0]['trackingNumber']
    payments = item['trackDetails'][0].get('payments', [])
    payment_type = 'COD' if any(payment.get('type') == 'COD' for payment in payments) else 'Prepaid'

    # Extract date and time information
    pickup_datetime = item['trackDetails'][0]['datesOrTimes'][2]['dateOrTimestamp']
    delivery_datetime = item['trackDetails'][0]['datesOrTimes'][0]['dateOrTimestamp']

    # Convert to datetime objects
    pickup_datetime = datetime.fromisoformat(pickup_datetime).astimezone(pytz.timezone('Asia/Kolkata'))
    delivery_datetime = datetime.fromisoformat(delivery_datetime).astimezone(pytz.timezone('Asia/Kolkata'))

    # Calculate days taken for delivery
    days_taken = (delivery_datetime - pickup_datetime).days

    # Calculate number of delivery attempts
    out_for_delivery_count = len([event for event in item['trackDetails'][0]['events'] if event['eventType'] == 'OD'])
    delivered_count = len([event for event in item['trackDetails'][0]['events'] if event['eventType'] == 'DL'])
    delivery_attempts = out_for_delivery_count + delivered_count

    # Extract other necessary information
    shipment_weight = item['trackDetails'][0]['shipmentWeight']['value']

    # Function to get postal code from address data
    def get_postal_code(address_data):
        postal_code_keys = ['postalCode', 'postalCodeExtension']

        for key in postal_code_keys:
            if key in address_data:
                return address_data[key]

        return ''

    # Extract pickup address information
    pickup_address = item['trackDetails'][0].get('shipperAddress', {})
    pickup_pincode = get_postal_code(pickup_address)
    pickup_city = pickup_address.get('city', '')
    pickup_state = pickup_address.get('stateOrProvinceCode', '')

    # Extract drop address information
    drop_address = item['trackDetails'][0].get('destinationAddress', {})
    drop_pincode = get_postal_code(drop_address)
    drop_city = drop_address.get('city', '')
    drop_state = drop_address.get('stateOrProvinceCode', '')

    # Append the flattened data
    flattened_data.append({
        'Tracking number': tracking_number,
        'Payment type': payment_type,
        'Pickup Date Time (IST)': pickup_datetime,
        'Delivery Date Time (IST)': delivery_datetime,
        'Days taken for delivery': days_taken,
        'Shipment weight': shipment_weight,
        'Pickup Pincode': pickup_pincode,
        'Pickup City': pickup_city,
        'Pickup State': pickup_state,
        'Drop Pincode': drop_pincode,
        'Drop City': drop_city,
        'Drop State': drop_state,
        'Number of delivery attempts': delivery_attempts
    })

In [75]:
# Output as CSV
output_csv_path = 'tracking_summary.csv'
df = pd.DataFrame(flattened_data)
df.to_csv(output_csv_path, index=False)

In [77]:
# Output Summary Statistics as a CSV
summary_statistics = {
    'Mean days taken for delivery': df['Days taken for delivery'].mean(),
    'Median days taken for delivery': df['Days taken for delivery'].median(),
    'Mode days taken for delivery': df['Days taken for delivery'].mode().iloc[0],
    'Mean number of delivery attempts': df['Number of delivery attempts'].mean(),
    'Median number of delivery attempts': df['Number of delivery attempts'].median(),
    'Mode number of delivery attempts': df['Number of delivery attempts'].mode().iloc[0],
}

summary_statistics_df = pd.DataFrame(list(summary_statistics.items()), columns=['Metric', 'Value'])
summary_statistics_df.to_csv('summary_statistics.csv', index=False)