In [34]:
import os
import requests
from pyspark.sql import SparkSession
from tmlt.analytics.keyset import KeySet
from tmlt.analytics.privacy_budget import PureDPBudget
from tmlt.analytics.query_builder import QueryBuilder
from tmlt.analytics.session import Session

# r = requests.get(
#     'https://tumult-public.s3.amazonaws.com/library-members.csv',
# )
# with open("members.csv", "w") as f:
#     f.write(r.text)
spark = SparkSession.builder.getOrCreate()
members_df = spark.read.csv("rie_survey2.csv", header=True, inferSchema=True)

session = Session.from_dataframe(
    privacy_budget=PureDPBudget(epsilon=float('inf')),
    source_id="members",
    dataframe=members_df,
)

In [35]:
itms_list = members_df.select('ITMs').collect()

In [36]:
itms_array = [str(row.ITMs) for row in itms_list]

In [37]:
itms_array

['Precision Engineering',
 'Marine and Offshore',
 'Aerospace',
 'Energy and Chemicals',
 'Food Manufacturing',
 'Wholesale Trade',
 'Logistics',
 'Sea Transport',
 'Construction',
 'Professional Services',
 'Information and Communications',
 'Financial Services',
 'Retail',
 'Non-ITM (Biomed Mfg)']

In [38]:
members_df.head(10)

[Row(ITMs='Precision Engineering', BERD=528.4),
 Row(ITMs='Marine and Offshore', BERD=34.2),
 Row(ITMs='Aerospace', BERD=81.8),
 Row(ITMs='Energy and Chemicals', BERD=300.8),
 Row(ITMs='Food Manufacturing', BERD=86.6),
 Row(ITMs='Wholesale Trade', BERD=940.2),
 Row(ITMs='Logistics', BERD=179.9),
 Row(ITMs='Sea Transport', BERD=180.9),
 Row(ITMs='Construction', BERD=14.7),
 Row(ITMs='Professional Services', BERD=295.0)]

In [39]:
itm_sectors = KeySet.from_dict({
    "ITMs": itms_array
})

In [61]:
edu_average_age_query = (
    QueryBuilder("members")
    .groupby(itm_sectors)
    .max("BERD", low=0, high=200)
)
edu_average_ages = session.evaluate(
    edu_average_age_query,
    privacy_budget=PureDPBudget(5),
)
edu_average_ages.sort("BERD_max").show(truncate=False)

+------------------------------+------------------+
|ITMs                          |BERD_max          |
+------------------------------+------------------+
|Wholesale Trade               |3.1531054650086787|
|Precision Engineering         |14.53246636296845 |
|Construction                  |24.662971662814215|
|Retail                        |26.613957238718854|
|Energy and Chemicals          |54.7658214115919  |
|Aerospace                     |70.0660596342069  |
|Professional Services         |86.22596197366721 |
|Non-ITM (Biomed Mfg)          |132.12276391375605|
|Food Manufacturing            |142.83544710221034|
|Marine and Offshore           |147.28233624664597|
|Financial Services            |151.04018772801254|
|Information and Communications|159.88855300389028|
|Logistics                     |188.3631281035168 |
|Sea Transport                 |192.17272231496432|
+------------------------------+------------------+

