In [6]:
import pandas as pd

In [7]:
dataset_path = 'dataset-1.csv'
result_matrix = generate_car_matrix(dataset_path)

Question 1: Car Matrix Generation

In [8]:
def generate_car_matrix(dataset):
    df = pd.read_csv(dataset)
    car_matrix = pd.pivot_table(df, values='car', index='id_1', columns='id_2', fill_value=0)
    for i in range(min(car_matrix.shape[0], car_matrix.shape[1])):
        car_matrix.iloc[i, i] = 0
    return car_matrix

Question 2: Car Type Count Calculation

In [9]:
def get_type_count(dataset):
    df = pd.read_csv(dataset)
    conditions = [
        (df['car'] <= 15),
        (df['car'] > 15) & (df['car'] <= 25),
        (df['car'] > 25)
    ]
    choices = ['low', 'medium', 'high']
    df['car_type'] = np.select(conditions, choices, default='unknown')
    type_counts = df['car_type'].value_counts().to_dict()
    sorted_type_counts = dict(sorted(type_counts.items()))
    return sorted_type_counts

Question 3: Bus Count Index Retrieval

In [10]:
def get_bus_indexes(dataset):
    df = pd.read_csv(dataset)
    bus_mean = df['bus'].mean()
    bus_indexes = df[df['bus'] > 2 * bus_mean].index.tolist()
    bus_indexes.sort()
    return bus_indexes

Question 4: Route Filtering

In [11]:
def filter_routes(dataset):
    df = pd.read_csv(dataset)
    route_avg_truck = df.groupby('route')['truck'].mean()
    filtered_routes = route_avg_truck[route_avg_truck > 7].index.tolist()
    filtered_routes.sort()
    return filtered_routes

Question 5: Matrix Value Modification

In [12]:
def multiply_matrix(input_matrix):
    modified_matrix = input_matrix.copy()
    modified_matrix = modified_matrix.applymap(lambda x: x * 0.75 if x > 20 else x * 1.25)
    modified_matrix = modified_matrix.round(1)
    return modified_matrix

Question 6: Time Check

In [13]:
def time_check(df):
    df['start_datetime'] = pd.to_datetime(df['startDay'] + ' ' + df['startTime'])
    df['end_datetime'] = pd.to_datetime(df['endDay'] + ' ' + df['endTime'])
    df['duration'] = (df['end_datetime'] - df['start_datetime']).dt.total_seconds()
    incorrect_timestamps = (
        df.groupby(['id', 'id_2'])
        .apply(lambda group: not (
            group['start_datetime'].min().time() == pd.Timestamp('00:00:00').time() and
            group['end_datetime'].max().time() == pd.Timestamp('23:59:59').time() and
            group['duration'].sum() == 24 * 60 * 60
        ))
    )
    return incorrect_timestamps