In [17]:
import boto3
import os
import pandas as pd
from datetime import datetime

import threading
import io
import sys

In [2]:
from dotenv import load_dotenv
load_dotenv('.env')

AWS_DEFAULT_REGION = os.getenv("AWS_DEFAULT_REGION") 
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")

In [4]:
# Starting a Session s3
session = boto3.Session(
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        region_name=AWS_DEFAULT_REGION
    )

s3 = session.resource("s3")

In [18]:
raw_bucket = 'instagram-raw'
analytics_bucket = 'intagram-analytics'

accounts = ['ic.campinas', 'ic.saobernardo']
types = ['details', 'posts']
today = datetime.now().date()

In [6]:
class UploadProgressPercentage(object):

    def __init__(self, filename):
        self._filename = filename
        self._size = float(os.path.getsize(filename))
        self._seen_so_far = 0
        self._lock = threading.Lock()

    def __call__(self, bytes_amount):
        # To simplify we'll assume this is hooked up
        # to a single filename.
        with self._lock:
            self._seen_so_far += bytes_amount
            percentage = (self._seen_so_far / self._size) * 100
            sys.stdout.write(
                "\r%s  %s / %s  (%.2f%%) \n" % (
                    self._filename, self._seen_so_far, self._size,
                    percentage))
            sys.stdout.flush()

# Push Data to s3
def push_data_to_s3(s3, bucket_name, accounts, types, today):

    bucket = s3.Bucket(name=bucket_name)
    for account in accounts:
        for type in types:
            file_path = f'../data/{account}/{account}-{type}-{today}.json'
            file_name = f'{account}/{account}-{type}-{today}.json'
            
            with open(file_path, 'rb') as data:
                bucket.upload_fileobj(data,
                    file_name,
                    Callback=UploadProgressPercentage(file_path)
                    )

In [9]:
bucket = s3.Bucket(name=raw_bucket)

In [14]:
file_path = f'../data/ic-campinas/ic_campinas-posts-2023-05-06.json'
file_name = f'ic_campinas/ic_campinas-posts-2023-05-06.json'

In [15]:
with open(file_path, 'rb') as data:
                bucket.upload_fileobj(data,
                    file_name,
                    Callback=UploadProgressPercentage(file_path)
                    )

../data/ic-campinas/ic_campinas-posts-2023-05-06.json  95011 / 95011.0  (100.00%) 


In [16]:
# Print out bucket names
for bucket in s3.buckets.all():
    print(bucket.name)

instagram-raw
intagram-analytics


In [None]:
# Check files presents in s3
for obj in s3.Bucket('instagram-raw').objects.all():
    print(obj)