# Question 1: Distance Matrix Calculation 

In [None]:
import pandas as pd

def calculate_distance_matrix(input_file):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(input_file)

    distances = {}

    for index, row in df.iterrows():
        distances.setdefault(row['id_start'], {})[row['id_end']] = row['distance']

        distances.setdefault(row['id_end'], {})[row['id_start']] = row['distance']

    ids = sorted(set(df['id_start']).union(df['id_end']))
    distance_matrix = pd.DataFrame(0.0, index=ids, columns=ids)

    for start in ids:
        for end in ids:
            if start != end:
                distance_matrix.at[start, end] = distances.get(start, {}).get(end, 0)

    for via in ids:
        for start in ids:
            for end in ids:
                if start != end and distances.get(start, {}).get(end, None) is None:
                    distance_matrix.at[start, end] += distance_matrix.at[start, via] * distance_matrix.at[via, end]

    return distance_matrix

input_file = 'dataset-3.csv'
result_df = calculate_distance_matrix(input_file)
print(result_df)


# Question 2: Unroll Distance Matrix

In [None]:
import pandas as pd

def unroll_distance_matrix(distance_matrix):
    ids = distance_matrix.index.tolist()

    unrolled_data = []

    for start in ids:
        for end in ids:
            if start != end:
                unrolled_data.append({'id_start': start, 'id_end': end, 'distance': distance_matrix.at[start, end]})

    unrolled_df = pd.DataFrame(unrolled_data)

    return unrolled_df

input_file = 'dataset-3.csv'
distance_matrix = calculate_distance_matrix(input_file)
unrolled_df = unroll_distance_matrix(distance_matrix)
print(unrolled_df)


# Question 3: Finding IDs within Percentage Threshold


In [None]:
import pandas as pd

def find_ids_within_ten_percentage_threshold(df, reference_value):
    reference_df = df[df['id_start'] == reference_value]

    average_distance = reference_df['distance'].mean()

    lower_threshold = average_distance - (average_distance * 0.1)
    upper_threshold = average_distance + (average_distance * 0.1)

    within_threshold_df = df[(df['distance'] >= lower_threshold) & (df['distance'] <= upper_threshold)]

    result_ids = within_threshold_df['id_start'].unique()
    result_ids.sort()

    return result_ids

input_file = 'dataset-3.csv'
distance_matrix = calculate_distance_matrix(input_file)
unrolled_df = unroll_distance_matrix(distance_matrix)

reference_value = 1 
result_ids = find_ids_within_ten_percentage_threshold(unrolled_df, reference_value)
print(result_ids)


# Question 4: Calculate Toll Rate


In [None]:
import pandas as pd

def calculate_toll_rate(df):
    # Create a copy of the input DataFrame to avoid modifying the original
    result_df = df.copy()

    # Define rate coefficients for each vehicle type
    rate_coefficients = {'moto': 0.8, 'car': 1.2, 'rv': 1.5, 'bus': 2.2, 'truck': 3.6}

    # Calculate toll rates for each vehicle type
    for vehicle_type, rate_coefficient in rate_coefficients.items():
        result_df[vehicle_type] = result_df['distance'] * rate_coefficient

    return result_df

# Example usage:
input_file = 'dataset-3.csv'
distance_matrix = calculate_distance_matrix(input_file)
unrolled_df = unroll_distance_matrix(distance_matrix)

# Add toll rates to the DataFrame
result_df_with_rates = calculate_toll_rate(unrolled_df)
print(result_df_with_rates)
