In [1]:
import pandas as pd
from datetime import datetime, time, timedelta

Question 1: Distance Matrix Calculation

In [3]:
def calculate_distance_matrix(dataset):
    df = pd.read_csv(dataset)
    distance_table = pd.pivot_table(df, values='distance', index='source', columns='destination', fill_value=0)
    distance_matrix = distance_table + distance_table.transpose()
    distance_matrix.values[[range(distance_matrix.shape[0])]*2] = 0
    for i in range(distance_matrix.shape[0]):
        for j in range(i + 1, distance_matrix.shape[1]):
            if distance_matrix.iloc[i, j] == 0:
                for k in range(distance_matrix.shape[0]):
                    if k != i and k != j and distance_matrix.iloc[i, k] != 0 and distance_matrix.iloc[k, j] != 0:
                        distance_matrix.iloc[i, j] = distance_matrix.iloc[i, k] + distance_matrix.iloc[k, j]
                        distance_matrix.iloc[j, i] = distance_matrix.iloc[i, j]
                        break
    return distance_matrix

Question 2: Unroll Distance Matrix

In [4]:
def unroll_distance_matrix(distance_matrix):
    columns = distance_matrix.columns
    indices = distance_matrix.index
    unrolled_distances = []
    for id_start in columns:
        for id_end in indices:
            if id_start != id_end:
                distance = distance_matrix.loc[id_end, id_start]
                unrolled_distances.append([id_start, id_end, distance])
    result_df = pd.DataFrame(unrolled_distances, columns=['id_start', 'id_end', 'distance'])
    return result_df

Question 3: Finding IDs within Percentage Threshold

In [5]:
def find_ids_within_ten_percentage_threshold(df, reference_id):
    reference_df = df[df['id_start'] ==reference_id]
    average_distance = reference_df['distance'].mean()
    lower_bound = average_distance - 0.1 * average_distance
    upper_bound = average_distance + 0.1 * average_distance
    filtered_df = df[(df['id_start'] != reference_id) & (df['distance'] >= lower_bound) & (df['distance'] <= upper_bound)]
    result_list = sorted(filtered_df['id_start'].unique())
    return result_list

uestion 4: Calculate Toll Rate

In [6]:
def calculate_toll_rate(df):
    df['moto'] = df['distance'] * 0.8
    df['car'] = df['distance'] * 1.2
    df['rv'] = df['distance'] * 1.5
    df['bus'] = df['distance'] * 2.2
    df['truck'] = df['distance'] * 3.6
    return df

Question 5: Calculate Time-Based Toll Rates

In [2]:
def calculate_time_based_toll_rates(df):
    df['start_datetime'] = pd.to_datetime(df['startDay'] + ' ' + df['startTime'])
    df['end_datetime'] = pd.to_datetime(df['endDay'] + ' ' + df['endTime'])
    df['start_day'] = df['start_datetime'].dt.day_name()
    df['end_day'] = df['end_datetime'].dt.day_name()
    df['start_time'] = df['start_datetime'].dt.time
    df['end_time'] = df['end_datetime'].dt.time
    weekday_ranges = [
        (time(0, 0, 0), time(10, 0, 0)),
        (time(10, 0, 0), time(18, 0, 0)),
        (time(18, 0, 0), time(23, 59, 59))
    ]
    for start, end in weekday_ranges:
        mask = (df['start_datetime'].dt.time >= start) & (df['start_datetime'].dt.time < end)
        df.loc[mask, ['moto', 'car', 'rv', 'bus', 'truck']] *= 0.8 if start == time(0, 0, 0) else 1.2
    weekend_mask = (df['start_datetime'].dt.weekday >= 5)
    df.loc[weekend_mask, ['moto', 'car', 'rv', 'bus', 'truck']] *= 0.7
    df = df.drop(columns=['start_datetime', 'end_datetime'])
    return df