## Importing Libraries

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window

**1364. Number of Trusted Contacts of a Customer (Medium)**

**Table: Customers**

| Column Name   | Type    |
|---------------|---------|
| customer_id   | int     |
| customer_name | varchar |
| email         | varchar |

customer_id is the column of unique values for this table.
Each row of this table contains the name and the email of a customer of an online shop.
 
**Table: Contacts**

| Column Name   | Type    |
|---------------|---------|
| user_id       | id      |
| contact_name  | varchar |
| contact_email | varchar |

(user_id, contact_email) is the primary key (combination of columns with unique values) for this table.
Each row of this table contains the name and email of one contact of customer with user_id.
This table contains information about people each customer trust. The contact may or may not exist in the Customers table.
 
**Table: Invoices**

| Column Name  | Type    |
|--------------|---------|
| invoice_id   | int     |
| price        | int     |
| user_id      | int     |

invoice_id is the column of unique values for this table.
Each row of this table indicates that user_id has an invoice with invoice_id and a price.
 
**Write a solution to find the following for each invoice_id:**
- customer_name: The name of the customer the invoice is related to.
- price: The price of the invoice.
- contacts_cnt: The number of contacts related to the customer.
- trusted_contacts_cnt: The number of contacts related to the customer and at the same time they are customers to the shop. (i.e their email exists in the Customers table.)

Return the result table ordered by invoice_id.

The result format is in the following example.

**Example 1:**

**Input:** 

**Customers table:**
| customer_id | customer_name | email              |
|-------------|---------------|--------------------|
| 1           | Alice         | alice@leetcode.com |
| 2           | Bob           | bob@leetcode.com   |
| 13          | John          | john@leetcode.com  |
| 6           | Alex          | alex@leetcode.com  |

**Contacts table:**
| user_id     | contact_name | contact_email      |
|-------------|--------------|--------------------|
| 1           | Bob          | bob@leetcode.com   |
| 1           | John         | john@leetcode.com  |
| 1           | Jal          | jal@leetcode.com   |
| 2           | Omar         | omar@leetcode.com  |
| 2           | Meir         | meir@leetcode.com  |
| 6           | Alice        | alice@leetcode.com |

**Invoices table:**
| invoice_id | price | user_id |
|------------|-------|---------|
| 77         | 100   | 1       |
| 88         | 200   | 1       |
| 99         | 300   | 2       |
| 66         | 400   | 2       |
| 55         | 500   | 13      |
| 44         | 60    | 6       |

**Output:** 
| invoice_id | customer_name | price | contacts_cnt | trusted_contacts_cnt |
|------------|---------------|-------|--------------|----------------------|
| 44         | Alex          | 60    | 1            | 1                    |
| 55         | John          | 500   | 0            | 0                    |
| 66         | Bob           | 400   | 2            | 0                    |
| 77         | Alice         | 100   | 3            | 2                    |
| 88         | Alice         | 200   | 3            | 2                    |
| 99         | Bob           | 300   | 2            | 0                    |

**Explanation:** 
- Alice has three contacts, two of them are trusted contacts (Bob and John).
- Bob has two contacts, none of them is a trusted contact.
- Alex has one contact and it is a trusted contact (Alice).
- John doesn't have any contacts.

In [0]:
customers_data_1364 = [
    (1, "Alice", "alice@leetcode.com"),
    (2, "Bob", "bob@leetcode.com"),
    (13, "John", "john@leetcode.com"),
    (6, "Alex", "alex@leetcode.com")
]

customers_columns_1364 = ["customer_id", "customer_name", "email"]
customers_df_1364 = spark.createDataFrame(customers_data_1364, customers_columns_1364)
customers_df_1364.show()

contacts_data_1364 = [
    (1, "Bob", "bob@leetcode.com"),
    (1, "John", "john@leetcode.com"),
    (1, "Jal", "jal@leetcode.com"),
    (2, "Omar", "omar@leetcode.com"),
    (2, "Meir", "meir@leetcode.com"),
    (6, "Alice", "alice@leetcode.com")
]
contacts_columns_1364 = ["user_id", "contact_name", "contact_email"]
contacts_df_1364 = spark.createDataFrame(contacts_data_1364, contacts_columns_1364)
contacts_df_1364.show()

invoices_data_1364 = [
    (77, 100, 1),
    (88, 200, 1),
    (99, 300, 2),
    (66, 400, 2),
    (55, 500, 13),
    (44, 60, 6)
]
invoices_columns_1364 = ["invoice_id", "price", "user_id"]
invoices_df_1364 = spark.createDataFrame(invoices_data_1364, invoices_columns_1364)
invoices_df_1364.show()

+-----------+-------------+------------------+
|customer_id|customer_name|             email|
+-----------+-------------+------------------+
|          1|        Alice|alice@leetcode.com|
|          2|          Bob|  bob@leetcode.com|
|         13|         John| john@leetcode.com|
|          6|         Alex| alex@leetcode.com|
+-----------+-------------+------------------+

+-------+------------+------------------+
|user_id|contact_name|     contact_email|
+-------+------------+------------------+
|      1|         Bob|  bob@leetcode.com|
|      1|        John| john@leetcode.com|
|      1|         Jal|  jal@leetcode.com|
|      2|        Omar| omar@leetcode.com|
|      2|        Meir| meir@leetcode.com|
|      6|       Alice|alice@leetcode.com|
+-------+------------+------------------+

+----------+-----+-------+
|invoice_id|price|user_id|
+----------+-----+-------+
|        77|  100|      1|
|        88|  200|      1|
|        99|  300|      2|
|        66|  400|      2|
|        55| 

In [0]:
contacts_cnt_df_1364 = contacts_df_1364.groupBy("user_id").agg(count("*").alias("contacts_cnt"))

In [0]:
trusted_contacts_df_1364 = contacts_df_1364\
                                .join(customers_df_1364,contacts_df_1364.contact_email == customers_df_1364.email,how="inner")\
                                    .groupBy("user_id").agg(count("*").alias("trusted_contacts_cnt"))

In [0]:
invoice_customer_df_1364 = invoices_df_1364\
                        .join(customers_df_1364,invoices_df_1364.user_id == customers_df_1364.customer_id,how="left")\
                            .select("invoice_id", "customer_name", "price", "user_id")

In [0]:
invoice_customer_df_1364 \
    .join(contacts_cnt_df_1364, "user_id", "left") \
    .join(trusted_contacts_df_1364, "user_id", "left") \
    .select(
        "invoice_id",
        "customer_name",
        "price",
        coalesce("contacts_cnt", lit(0)).alias("contacts_cnt"),
        coalesce("trusted_contacts_cnt", lit(0)).alias("trusted_contacts_cnt")
    ) \
    .orderBy("invoice_id").show()

+----------+-------------+-----+------------+--------------------+
|invoice_id|customer_name|price|contacts_cnt|trusted_contacts_cnt|
+----------+-------------+-----+------------+--------------------+
|        44|         Alex|   60|           1|                   1|
|        55|         John|  500|           0|                   0|
|        66|          Bob|  400|           2|                   0|
|        77|        Alice|  100|           3|                   2|
|        88|        Alice|  200|           3|                   2|
|        99|          Bob|  300|           2|                   0|
+----------+-------------+-----+------------+--------------------+

