In [None]:
# source: https://leetcode.com/problems/trips-and-users/solutions/6689144/simple-best-solution-by-iqbaldiit-53nt/
'''
    Table: Trips

    +-------------+----------+
    | Column Name | Type     |
    +-------------+----------+
    | id          | int      |
    | client_id   | int      |
    | driver_id   | int      |
    | city_id     | int      |
    | status      | enum     |
    | request_at  | varchar  |     
    +-------------+----------+
    id is the primary key (column with unique values) for this table.
    The table holds all taxi trips. Each trip has a unique id, while client_id and driver_id are foreign keys to the users_id at the Users table.
    Status is an ENUM (category) type of ('completed', 'cancelled_by_driver', 'cancelled_by_client').
    

    Table: Users

    +-------------+----------+
    | Column Name | Type     |
    +-------------+----------+
    | users_id    | int      |
    | banned      | enum     |
    | role        | enum     |
    +-------------+----------+
    users_id is the primary key (column with unique values) for this table.
    The table holds all users. Each user has a unique users_id, and role is an ENUM type of ('client', 'driver', 'partner').
    banned is an ENUM (category) type of ('Yes', 'No').
    

    The cancellation rate is computed by dividing the number of canceled (by client or driver) requests with unbanned users by the total number of requests with unbanned users on that day.

    Write a solution to find the cancellation rate of requests with unbanned users (both client and driver must not be banned) each day between "2013-10-01" and "2013-10-03" with at least one trip. Round Cancellation Rate to two decimal points.

    Return the result table in any order.

    The result format is in the following example.

    

    Example 1:

    Input: 
    Trips table:
    +----+-----------+-----------+---------+---------------------+------------+
    | id | client_id | driver_id | city_id | status              | request_at |
    +----+-----------+-----------+---------+---------------------+------------+
    | 1  | 1         | 10        | 1       | completed           | 2013-10-01 |
    | 2  | 2         | 11        | 1       | cancelled_by_driver | 2013-10-01 |
    | 3  | 3         | 12        | 6       | completed           | 2013-10-01 |
    | 4  | 4         | 13        | 6       | cancelled_by_client | 2013-10-01 |
    | 5  | 1         | 10        | 1       | completed           | 2013-10-02 |
    | 6  | 2         | 11        | 6       | completed           | 2013-10-02 |
    | 7  | 3         | 12        | 6       | completed           | 2013-10-02 |
    | 8  | 2         | 12        | 12      | completed           | 2013-10-03 |
    | 9  | 3         | 10        | 12      | completed           | 2013-10-03 |
    | 10 | 4         | 13        | 12      | cancelled_by_driver | 2013-10-03 |
    +----+-----------+-----------+---------+---------------------+------------+
    Users table:
    +----------+--------+--------+
    | users_id | banned | role   |
    +----------+--------+--------+
    | 1        | No     | client |
    | 2        | Yes    | client |
    | 3        | No     | client |
    | 4        | No     | client |
    | 10       | No     | driver |
    | 11       | No     | driver |
    | 12       | No     | driver |
    | 13       | No     | driver |
    +----------+--------+--------+
    Output: 
    +------------+-------------------+
    | Day        | Cancellation Rate |
    +------------+-------------------+
    | 2013-10-01 | 0.33              |
    | 2013-10-02 | 0.00              |
    | 2013-10-03 | 0.50              |
    +------------+-------------------+
    Explanation: 
    On 2013-10-01:
    - There were 4 requests in total, 2 of which were canceled.
    - However, the request with Id=2 was made by a banned client (User_Id=2), so it is ignored in the calculation.
    - Hence there are 3 unbanned requests in total, 1 of which was canceled.
    - The Cancellation Rate is (1 / 3) = 0.33
    On 2013-10-02:
    - There were 3 requests in total, 0 of which were canceled.
    - The request with Id=6 was made by a banned client, so it is ignored.
    - Hence there are 2 unbanned requests in total, 0 of which were canceled.
    - The Cancellation Rate is (0 / 2) = 0.00
    On 2013-10-03:
    - There were 3 requests in total, 1 of which was canceled.
    - The request with Id=8 was made by a banned client, so it is ignored.
    - Hence there are 2 unbanned request in total, 1 of which were canceled.
    - The Cancellation Rate is (1 / 2) = 0.50
'''

In [24]:
import pandas as pd

def trips_and_users(trips: pd.DataFrame, users: pd.DataFrame) -> pd.DataFrame:
    # filter data between '2013-10-01' AND '2013-10-03'
    df=trips[trips["request_at"].between("2013-10-01","2013-10-03")]

    # filter unbanned users
    clients=users[(users["role"]=="client") & (users["banned"]=="No")][["users_id"]]
    drivers=users[(users["role"]=="driver") & (users["banned"]=="No")][["users_id"]]

    # merge trip with unbanned clients and drivers
    df=(df.merge(clients,left_on="client_id",right_on="users_id",how="inner")
                .merge(drivers,left_on="driver_id",right_on="users_id",how="inner", suffixes=["_cl","_dr"])
        )

    # add a collum which indicate cancel
    df["is_cancel"]=df["status"].apply(lambda x: 1 if x not in ["completed"] else 0)

    # find number of total request and number of cancellation for each day
    df=(df.groupby("request_at").agg(
                total_request=("id","count")
                , cancelled_request=("is_cancel","sum")
            ).reset_index()
        )
    # calculate cancellation rate
    df["Cancellation Rate"]=(df["cancelled_request"]/df["total_request"]).round(2)
    df=df[["request_at","Cancellation Rate"]]
    df.columns=["Day","Cancellation Rate"]
    return df


In [23]:
# sample data
trips_data = {
    "id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    "client_id": [1, 2, 3, 4, 1, 2, 3, 2, 3, 4],
    "driver_id": [10, 11, 12, 13, 10, 11, 12, 12, 10, 13],
    "city_id": [1, 1, 6, 6, 1, 6, 6, 12, 12, 12],
    "status": [
        "completed", "cancelled_by_driver", "completed", "cancelled_by_client",
        "completed", "completed", "completed", "completed", "completed", "cancelled_by_driver"
    ],
    "request_at": [
        "2013-10-01", "2013-10-01", "2013-10-01", "2013-10-01",
        "2013-10-02", "2013-10-02", "2013-10-02", "2013-10-03",
        "2013-10-03", "2013-10-03"
    ]
}

users_data = {
    "users_id": [1, 2, 3, 4, 10, 11, 12, 13],
    "banned": ["No", "Yes", "No", "No", "No", "No", "No", "No"],
    "role": ["client", "client", "client", "client", "driver", "driver", "driver", "driver"]
}

df_trips=pd.DataFrame(trips_data)
df_users=pd.DataFrame(users_data)

# invoke function
trips_and_users(df_trips,df_users)

Unnamed: 0,Day,Cancellation Rate
0,2013-10-01,0.33
1,2013-10-02,0.0
2,2013-10-03,0.5
