In [8]:
import os
import json

import boto3
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

def get_s3_client():
    s3_client = boto3.client(
        "s3",
        aws_access_key_id=os.getenv("ACCESS_ID"),
        aws_secret_access_key=os.getenv("ACCESS_KEY"),
        region_name='ap-northeast-2'
    )
    
    return s3_client

def get_s3_data(paginator, bucket_name, key):
    file_list = []
    
    for page in paginator.paginate(Bucket=bucket_name, Prefix=key):
        file_list.extend([item["Key"].split(sep="/")[-1] for item in page["Contents"]])
        
    file_list = [item for item in file_list if item]
    
    return file_list

def extract_s3_data(s3_client, bucket_name, key):
    data = s3_client.get_object(Bucket=bucket_name, Key=key)
    data = data["Body"].read().decode("utf-8")
    data = json.loads(data)
    
    return data

In [7]:
client = get_s3_client()
paginator = client.get_paginator("list_objects_v2")

account_file_list = get_s3_data(paginator, os.getenv("BUCKET_NAME"), os.getenv("ACCOUNT_KEY"))
account_list = [extract_s3_data(client, os.getenv("BUCKET_NAME"), f"{os.getenv('ACCOUNT_KEY')}/{file_name}") for file_name in account_file_list]

account_list


[{'address': '0xc9fbcdf9ee25ddc38cff91449d52a79234f05220',
  'username': '_______byLSD',
  'profile_image_url': 'https://i.seadn.io/gcs/files/d0db4bda124b4e3451ecb49e1e9aa248.png?w=500&auto=format',
  'banner_image_url': '',
  'website': '',
  'social_media_accounts': [],
  'bio': '',
  'joined_date': '2022-12-07'},
 {'address': '0x5d4e840aee5c438934a4dbf8687d9624bca637d8',
  'username': 'GLOWA',
  'profile_image_url': 'https://i.seadn.io/gae/uINcT6nwbY8FPNhDZUMsoOAwfnJTKhIOJ1ii6t6ylH3lz9KWNbrfv6nD8w4VOvzmpI4mZWte1uzYjDjO5Llb3YOgzo7ERImo4M_ObZw?w=500&auto=format',
  'banner_image_url': 'https://i.seadn.io/gae/2DqiXTXj1I1sZbCiCnDf6iNWbxz2_bSy2BGde5rN4oPAg_Xwq5J8n4TOS2lg8UebT6UZKGkyjK64k_wRabFEc_uDIb-YGRH2qsY_3A?w=500&auto=format',
  'website': '',
  'social_media_accounts': [],
  'bio': '',
  'joined_date': '2021-02-02'},
 {'address': '0xef30a51e6eb1fb07a5529cc4a9b29f90d7070cfd',
  'username': 'P0ny_Deployer',
  'profile_image_url': 'https://i.seadn.io/gcs/files/83a0e7f060774b2e2bff6dc4

In [9]:
df_account = pd.DataFrame(account_list)
df_account

Unnamed: 0,address,username,profile_image_url,banner_image_url,website,social_media_accounts,bio,joined_date
0,0xc9fbcdf9ee25ddc38cff91449d52a79234f05220,_______byLSD,https://i.seadn.io/gcs/files/d0db4bda124b4e345...,,,[],,2022-12-07
1,0x5d4e840aee5c438934a4dbf8687d9624bca637d8,GLOWA,https://i.seadn.io/gae/uINcT6nwbY8FPNhDZUMsoOA...,https://i.seadn.io/gae/2DqiXTXj1I1sZbCiCnDf6iN...,,[],,2021-02-02
2,0xef30a51e6eb1fb07a5529cc4a9b29f90d7070cfd,P0ny_Deployer,https://i.seadn.io/gcs/files/83a0e7f060774b2e2...,,,[],,2022-07-14
3,0xc1deb3e48461dddc4a4791b11f89d22079d036fe,0010,https://i.seadn.io/gcs/files/a5986d8ea22422d61...,https://i.seadn.io/gae/NUGgbHIEEfSArSKpMgk9FuN...,,[],Artist,2021-03-18
4,0x911765e6cbf6f1dfc7c1bacc01be7e32612aa541,yungbloomy,https://i.seadn.io/gcs/files/9817e61e1f84e358f...,https://i.seadn.io/gcs/files/6c39b3e213520b629...,http://bloomybtw.com/,[],,2021-10-09
...,...,...,...,...,...,...,...,...
1590,0x28f1c63f2efa738c97e3a134cec7be0187ac05d1,GarageXYZ,https://i.seadn.io/gae/h3h8lDVDpZbnKXmvDkI1k8T...,https://i.seadn.io/gcs/files/0ecfa6fb5d6a43298...,https://mygarage.xyz/,"[{'platform': 'instagram', 'username': 'Garage...",GarageXYZ offers enthusiasts the chance to fin...,2022-03-09
1591,0x762ba8627a1f4ffc4b85595d3c0a7cae4b363499,Typical_Friends_Official,https://i.seadn.io/gcs/files/993d8f8a2ffda4695...,https://i.seadn.io/gcs/files/05d23f3bab9be149e...,https://typicalfriends.com/,[],A one stop digital collectibles shop,2023-02-26
1592,0xe78277ad3e7f14360a2161c84474e8b409fb2b68,FlowersForSociety,https://i.seadn.io/gae/vOIPwKJaiKQtsrYTtmXtH8z...,https://i.seadn.io/gae/K1wIFs9JHaVuGNZryzIbP_c...,www.flowersforsociety.com,"[{'platform': 'instagram', 'username': '@flowe...",„Flowers for Society“ is a new footwear metave...,2021-12-16
1593,0xa2e02bb03387d9a771e3176857851f263159470d,dreamingdigitally,https://i.seadn.io/gae/K_wquBySHD_yIzKP83Rt8a-...,https://i.seadn.io/gae/KEOpo7J9D0NCYf5JVIQ7Eh3...,https://lynkfire.com/dreamingdigitally,[],Multidisciplinary Canadian Artist & Architect 🌈✨,2021-06-25


In [11]:
df_account.to_excel("account.xlsx", index=False)

In [13]:
data_list = []

for file_name in account_file_list:
    data = extract_s3_data(client, os.getenv("BUCKET_NAME"), f"{os.getenv('ACCOUNT_KEY')}/{file_name}")
    data.update({"file_name": file_name})
    data_list.append(data)

In [14]:
df_account_v2 = pd.DataFrame(data_list)
df_account_v2.to_excel("account_v2.xlsx", index=False)