In [3]:
#Question 9: Distance Matrix Calculation

import pandas as pd

def calculate_distance_matrix(file_path):

    data = pd.read_csv(file_path)
    
    
    toll_ids = pd.concat([data['id_start'], data['id_end']]).unique()
    toll_ids.sort()
    
   
    distance_matrix = pd.DataFrame(0, index=toll_ids, columns=toll_ids)
    
    
    for _, row in data.iterrows():
        id_start, id_end, distance = row['id_start'], row['id_end'], row['distance']
        distance_matrix.at[id_start, id_end] = distance
        distance_matrix.at[id_end, id_start] = distance
    

    for k in toll_ids:
        for i in toll_ids:
            for j in toll_ids:
                if distance_matrix.at[i, j] == 0 or (distance_matrix.at[i, k] + distance_matrix.at[k, j] < distance_matrix.at[i, j]):
                    distance_matrix.at[i, j] = distance_matrix.at[i, k] + distance_matrix.at[k, j]
    
   
    for id in toll_ids:
        distance_matrix.at[id, id] = 0
    
    return distance_matrix

file_path = 'C:\\Users\\91782\\Downloads\\dataset-2.csv'  
distance_matrix = calculate_distance_matrix(file_path)
print(distance_matrix.head())

         1001400  1001402  1001404  1001406  1001408  1001410  1001412  \
1001400      0.0      9.7     19.4     19.4     19.4     19.4     19.4   
1001402      9.7      0.0      9.7      9.7      9.7      9.7      9.7   
1001404     19.4      9.7      0.0     19.4     19.4     19.4     19.4   
1001406     19.4      9.7     19.4      0.0     19.4     19.4     19.4   
1001408     19.4      9.7     19.4     19.4      0.0     19.4     19.4   

         1001414  1001416  1001418  ...  1001462  1001464  1001466  1001468  \
1001400     19.4     19.4     19.4  ...     19.4     19.4     19.4     19.4   
1001402      9.7      9.7      9.7  ...      9.7      9.7      9.7      9.7   
1001404     19.4     19.4     19.4  ...     19.4     19.4     19.4     19.4   
1001406     19.4     19.4     19.4  ...     19.4     19.4     19.4     19.4   
1001408     19.4     19.4     19.4  ...     19.4     19.4     19.4     19.4   

         1001470  1001472  1001488  1004354  1004355  1004356  
1001400     19.4

In [4]:
#Question 10: Unroll Distance Matrix

import pandas as pd

def unroll_distance_matrix(distance_matrix):
   
    unrolled_data = []

    
    for id_start in distance_matrix.index:
        for id_end in distance_matrix.columns:
            if id_start != id_end:  # Exclude diagonal elements
                distance = distance_matrix.at[id_start, id_end]
                unrolled_data.append([id_start, id_end, distance])
    
    
    unrolled_df = pd.DataFrame(unrolled_data, columns=['id_start', 'id_end', 'distance'])
    
    return unrolled_df

unrolled_df = unroll_distance_matrix(distance_matrix)
print(unrolled_df.head())

   id_start   id_end  distance
0   1001400  1001402       9.7
1   1001400  1001404      19.4
2   1001400  1001406      19.4
3   1001400  1001408      19.4
4   1001400  1001410      19.4


In [8]:
#Question 11: Finding IDs within Percentage Threshold

import pandas as pd

def find_ids_within_ten_percentage_threshold(unrolled_df, reference_value):
   
    reference_rows = unrolled_df[unrolled_df['id_start'] == reference_value]
    
  
    average_distance = reference_rows['distance'].mean()
    
   
    lower_bound = average_distance * 0.9
    upper_bound = average_distance * 1.1
    
    
    filtered_rows = unrolled_df[
        (unrolled_df['distance'] >= lower_bound) &
        (unrolled_df['distance'] <= upper_bound)
    ]
    
   
    ids_within_threshold = filtered_rows['id_start'].unique()
    

    return sorted(ids_within_threshold)


reference_value = 1001400  # Replace with your reference value
ids_within_threshold = find_ids_within_ten_percentage_threshold(unrolled_df, reference_value)
print(ids_within_threshold)

[1001400, 1001404, 1001406, 1001408, 1001410, 1001412, 1001414, 1001416, 1001418, 1001420, 1001422, 1001424, 1001426, 1001428, 1001430, 1001432, 1001434, 1001436, 1001437, 1001438, 1001440, 1001442, 1001444, 1001446, 1001448, 1001450, 1001452, 1001454, 1001456, 1001458, 1001460, 1001461, 1001462, 1001464, 1001466, 1001468, 1001470, 1001472, 1001488, 1004354, 1004355, 1004356]


In [7]:
#Question 12: Calculate Toll Rate

import pandas as pd

def calculate_toll_rate(unrolled_df):
 
    rates = {
        'moto': 0.8,
        'car': 1.2,
        'rv': 1.5,
        'bus': 2.2,
        'truck': 3.6
    }
 
    for vehicle_type, rate in rates.items():
        unrolled_df[vehicle_type] = unrolled_df['distance'] * rate
    
    return unrolled_df

toll_rate_df = calculate_toll_rate(unrolled_df)
print(toll_rate_df.head())

   id_start   id_end  distance   moto    car     rv    bus  truck
0   1001400  1001402       9.7   7.76  11.64  14.55  21.34  34.92
1   1001400  1001404      19.4  15.52  23.28  29.10  42.68  69.84
2   1001400  1001406      19.4  15.52  23.28  29.10  42.68  69.84
3   1001400  1001408      19.4  15.52  23.28  29.10  42.68  69.84
4   1001400  1001410      19.4  15.52  23.28  29.10  42.68  69.84


In [10]:
#Question 13: Calculate Time-Based Toll Rates

import pandas as pd
from datetime import time

def calculate_time_based_toll_rates(unrolled_df):
   
    weekday_discounts = [
        (time(0, 0), time(10, 0), 0.8),   # 00:00:00 to 10:00:00
        (time(10, 0), time(18, 0), 1.2),  # 10:00:00 to 18:00:00
        (time(18, 0), time(23, 59), 0.8)  # 18:00:00 to 23:59:59
    ]
    weekend_discount = 0.7
    
    days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    
    time_based_rows = []
    
   
    for _, row in unrolled_df.iterrows():
        id_start = row['id_start']
        id_end = row['id_end']
        distance = row['distance'] 
        distances = {
            'moto': row['moto'],
            'car': row['car'],
            'rv': row['rv'],
            'bus': row['bus'],
            'truck': row['truck']
        }
        
        
        for day in days_of_week:
            is_weekend = day in ['Saturday', 'Sunday']
            
            if is_weekend:
               
                time_based_rows.append({
                    'id_start': id_start,
                    'id_end': id_end,
                    'distance': distance,  # Add distance
                    'start_day': day,
                    'start_time': time(0, 0),
                    'end_day': day,
                    'end_time': time(23, 59),
                    'moto': distances['moto'] * weekend_discount,
                    'car': distances['car'] * weekend_discount,
                    'rv': distances['rv'] * weekend_discount,
                    'bus': distances['bus'] * weekend_discount,
                    'truck': distances['truck'] * weekend_discount
                })
            else:
               
                for start_time, end_time, factor in weekday_discounts:
                    time_based_rows.append({
                        'id_start': id_start,
                        'id_end': id_end,
                        'distance': distance, 
                        'start_day': day,
                        'start_time': start_time,
                        'end_day': day,
                        'end_time': end_time,
                        'moto': distances['moto'] * factor,
                        'car': distances['car'] * factor,
                        'rv': distances['rv'] * factor,
                        'bus': distances['bus'] * factor,
                        'truck': distances['truck'] * factor
                    })
    
   
    time_based_df = pd.DataFrame(time_based_rows)
    
    return time_based_df


time_based_toll_df = calculate_time_based_toll_rates(toll_rate_df)
print(time_based_toll_df.head())

    id_start     id_end  distance start_day start_time  end_day  end_time  \
0  1001400.0  1001402.0       9.7    Monday   00:00:00   Monday  10:00:00   
1  1001400.0  1001402.0       9.7    Monday   10:00:00   Monday  18:00:00   
2  1001400.0  1001402.0       9.7    Monday   18:00:00   Monday  23:59:00   
3  1001400.0  1001402.0       9.7   Tuesday   00:00:00  Tuesday  10:00:00   
4  1001400.0  1001402.0       9.7   Tuesday   10:00:00  Tuesday  18:00:00   

    moto     car     rv     bus   truck  
0  6.208   9.312  11.64  17.072  27.936  
1  9.312  13.968  17.46  25.608  41.904  
2  6.208   9.312  11.64  17.072  27.936  
3  6.208   9.312  11.64  17.072  27.936  
4  9.312  13.968  17.46  25.608  41.904  
