# Cafe Rewards Offer Dataset
This notebook downloads data from Kaggle.

In [None]:
!pip install kaggle --quiet

Set up Kaggle API credentials. Upload your `kaggle.json` containing your Kaggle username and API key.

In [None]:
import os, json
from pathlib import Path
creds_path = Path('kaggle.json')
if creds_path.is_file():
    creds = json.loads(creds_path.read_text())
    os.environ['KAGGLE_USERNAME'] = creds['username']
    os.environ['KAGGLE_KEY'] = creds['key']
else:
    print('kaggle.json not found. Please upload it.')

Download the dataset from Kaggle

In [None]:
!kaggle datasets download arshmankhalid/caf-rewards-offer-dataset -p dados --unzip --force

Load the customers.csv file

In [None]:
import pandas as pd
customers = pd.read_csv('dados/customers.csv')
customers.head()

Create Delta tables in the `bronze` schema using PySpark.

In [None]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()
spark.sql("CREATE DATABASE IF NOT EXISTS bronze")
customers = spark.read.option('header','true').csv('dados/customers.csv')
customers.write.format('delta').mode('overwrite').saveAsTable('bronze.customers')
offers = spark.read.option('header','true').csv('dados/offers.csv')
offers.write.format('delta').mode('overwrite').saveAsTable('bronze.offers')
transactions = spark.read.option('header','true').csv('dados/transactions.csv')
transactions.write.format('delta').mode('overwrite').saveAsTable('bronze.transactions')
