In [None]:
import os
from pyspark.sql.functions import col
from pyspark.sql.types import IntegerType, DoubleType, BooleanType, DateType

In [None]:
storage_name = os.getenv("DLS_NAME")
filesystem_stage = os.getenv("DLS_FILESYSTEM_STAGE")
tenant_id = os.getenv("SP_TENANT_ID")
application_id = os.getenv("SP_CLIENT_ID")

service_credential = dbutils.secrets.get(scope="keyvault-managed", key="dlsserviceprincipalsecret")

spark.conf.set(f"fs.azure.account.auth.type.{storage_name}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{storage_name}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{storage_name}.dfs.core.windows.net", application_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{storage_name}.dfs.core.windows.net", service_credential)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{storage_name}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

In [None]:
lake_url = f"abfss://{filesystem_stage}@{storage_name}.dfs.core.windows.net"

athletes = spark.read.option("header", "true").option("inferSchema", "true").load(f"{lake_url}/Athletes.xlsx.csv", format="CSV")
coaches = spark.read.option("header", "true").option("inferSchema", "true").load(f"{lake_url}/Coaches.xlsx.csv", format="CSV")
entries_gender = spark.read.option("header", "true").option("inferSchema", "true").load(f"{lake_url}/EntriesGender.xlsx.csv", format="CSV")
medals = spark.read.option("header", "true").option("inferSchema", "true").load(f"{lake_url}/Medals.xlsx.csv", format="CSV")
teams = spark.read.option("header", "true").option("inferSchema", "true").load(f"{lake_url}/Teams.xlsx.csv", format="CSV")

In [None]:
athletes.printSchema()
entries_gender.printSchema()

In [None]:
entries_gender = entries_gender.withColumn("Female", col("Female").cast(IntegerType()))\
    .withColumn("Male", col("Male").cast(IntegerType()))\
    .withColumn("Total", col("Total").cast(IntegerType()))