## Question 1: Car Matrix Generation

In [None]:
import pandas as pd

def generate_car_matrix(file_path):
    df = pd.read_csv(file_path)
    new_df = df[['id_1', 'id_2', 'car']].copy()
    pivot_df = new_df.pivot(index='id_1', columns='id_2', values='car').fillna(0)
    pivot_df.values[[range(len(pivot_df))]*2] = 0

    return pivot_df

file_path = 'dataset-1 (1).csv'

result_df = generate_car_matrix(file_path)

# Display the resulting DataFrame
print(result_df)

## Question 2: Car Type Count Calculation

In [None]:
import pandas as pd

def get_type_count(df):
    # Add a new categorical column 'car_type' based on values of the 'car' column
    df['car_type'] = pd.cut(df['car'], bins=[float('-inf'), 15, 25, float('inf')],
                            labels=['low', 'medium', 'high'], right=False)

    # Calculate the count of occurrences for each car_type category
    type_count = df['car_type'].value_counts().to_dict()

    # Sort the dictionary alphabetically based on keys
    sorted_type_count = dict(sorted(type_count.items()))

    return sorted_type_count

# Example usage:
file_path = 'dataset-1 (1).csv'
df = pd.read_csv(file_path)

result = get_type_count(df)
print(result)


## Question 3: Bus Count Index Retrieval


In [None]:
import pandas as pd

def get_bus_indexes(df):
    bus_mean = df['bus'].mean()
    bus_indexes = df[df['bus'] > 2 * bus_mean].index.tolist()
    sorted_bus_indexes = sorted(bus_indexes)

    return sorted_bus_indexes

file_path = 'dataset-1 (1).csv'
df = pd.read_csv(file_path)

result = get_bus_indexes(df)
print(result)


## Question 4: Route Filtering


In [None]:
import pandas as pd

def filter_routes(df):
    route_truck_avg = df.groupby('route')['truck'].mean()
    selected_routes = route_truck_avg[route_truck_avg > 7].index.tolist()
    sorted_selected_routes = sorted(selected_routes)

    return sorted_selected_routes

file_path = 'dataset-1 (1).csv'
df = pd.read_csv(file_path)

result = filter_routes(df)
print(result)


## Question 5: Matrix Value Modification

In [None]:
import pandas as pd

def multiply_matrix(result_df):
    modified_df = result_df.copy()

    modified_df[modified_df > 20] *= 0.75
    modified_df[modified_df <= 20] *= 1.25

    modified_df = modified_df.round(1)

    return modified_df


result_df_modified = multiply_matrix(result_df)
print(result_df_modified)


## Question 6: Time Check

In [None]:
import pandas as pd

def check_timestamp_completeness(df):
    # Convert timestamp columns to datetime objects
    df['start_timestamp'] = pd.to_datetime(df['startDay'] + ' ' + df['startTime'])
    df['end_timestamp'] = pd.to_datetime(df['endDay'] + ' ' + df['endTime'])

    # Check if the timestamps cover a full 24-hour period and span all 7 days of the week
    completeness_check = (
        (df['start_timestamp'].dt.time == pd.Timestamp('00:00:00').time()) &
        (df['end_timestamp'].dt.time == pd.Timestamp('23:59:59').time()) &
        (df['start_timestamp'].dt.dayofweek == 0) &  # Monday
        (df['end_timestamp'].dt.dayofweek == 6)      # Sunday
    )

    # Create a multi-index boolean series
    completeness_series = completeness_check.groupby(['id', 'id_2']).all()

    return completeness_series

# Example usage:
csv_file_path = 'dataset-2.csv'  # Update with the actual path to your CSV file
df = pd.read_csv(csv_file_path)

result = check_timestamp_completeness(df)
print(result)
