# Silver layer

### importing libraries

In [None]:
import pandas as pd
import boto3
from io import StringIO, BytesIO    

### Retrieving data from S3

In [None]:
s3 = boto3.client('s3')
bucket_name = "bucket-case-data-engineer-bees"
s3_file_name = "bronze/raw_data.csv"

bronze = s3.get_object(Bucket=bucket_name, Key=s3_file_name)

### Transforming data into a pandas dataframe

In [None]:
bronze = bronze['Body'].read().decode('utf-8')

In [None]:
bronze = pd.read_csv(StringIO(bronze))
bronze.head()

### Partitioning data by country and state_province, and sending to S3

In [None]:
countries = bronze['country'].unique()

for country in countries:
    for state_province in bronze.loc[bronze['country'] == country, 'state_province']:
        silver = bronze.loc[
            (bronze['country'] == country)
            & (bronze['state_province'] == state_province)
        ]

        # buffering a parquet file in memory
        parquet_buffer = BytesIO()
        silver.to_parquet(parquet_buffer, index=False)

        # adding data into AWS S3
        temp_s3_file_name = f'silver/{country}/{state_province}.parquet'
        s3.put_object(Bucket=bucket_name, Key=temp_s3_file_name, Body=parquet_buffer.getvalue())
        parquet_buffer.close()