# Gold layer

### import libraries

In [None]:
import pandas as pd
import boto3
from io import BytesIO

### Retrieving data from S3

In [None]:
s3 = boto3.client('s3')
bucket_name = "bucket-case-data-engineer-bees"
s3_file_name = "silver/"

silver = s3.list_objects_v2(Bucket=bucket_name, Prefix=s3_file_name)

In [None]:
silver = [obj['Key'] for obj in silver.get('Contents', []) if obj['Key'].endswith('.parquet')]

In [None]:
silver_dataframe = pd.DataFrame()
for file in silver:

    temp_silver = s3.get_object(Bucket=bucket_name, Key=file)

    temp_silver = pd.read_parquet(BytesIO(temp_silver['Body'].read()))
    temp_silver.head()
    silver_dataframe = pd.concat([silver_dataframe, temp_silver])

In [None]:
silver_dataframe.head()

### Aggregating data by brewery_type, country, and state_province

In [None]:
gold = silver_dataframe.groupby(['brewery_type', 'country', 'state_province']).size().reset_index(name='count')
gold.head()

### Sending data back to S3

In [None]:
s3_file_name = "gold/gold.parquet"

# buffering a parquet file in memory
parquet_buffer = BytesIO()
gold.to_parquet(parquet_buffer, index=False)

# sending data to S3
s3.put_object(Bucket=bucket_name, Key=s3_file_name, Body=parquet_buffer.getvalue())
print(f"File sent to s3://{bucket_name}/{s3_file_name}")

parquet_buffer.close()