In [None]:
import json
import os

import boto3
import pandas as pd

from dotenv import load_dotenv

load_dotenv(override=True)

In [None]:
s3_client = boto3.client(
    "s3",
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
)

In [None]:
BUCKET_NAME = ""
BASE_SOURCE_KEY = ""
BASE_DESTINATION_KEY = ""

NETWORKS = []
DAYS = []

In [None]:
for network in NETWORKS:
    response = s3_client.list_objects_v2(
        Bucket=BUCKET_NAME,
        Prefix=os.path.join(BASE_SOURCE_KEY, f"{network}/"),
        Delimiter="/",
    )

    folders = [folder["Prefix"] for folder in response.get("CommonPrefixes", [])]

    for folder in folders:
        for day in DAYS:
            source_path = os.path.join(folder, day)
            dest_path = source_path.replace("opensea", "opensea_v2")

            for file in s3_client.list_objects_v2(
                Bucket=BUCKET_NAME, Prefix=source_path
            ).get("Contents", []):
                try:
                    file_name = file["Key"].split("/")[-1]

                    response = s3_client.get_object(Bucket=BUCKET_NAME, Key=file["Key"])
                    json_data = response["Body"].read().decode("utf-8")
                    data = json.loads(json_data)
                    df = pd.DataFrame(data)

                    df["volume"] = pd.to_numeric(df["volume"], errors="raise")
                    df["volumeUsd"] = pd.to_numeric(df["volumeUsd"], errors="raise")
                    df["volumeKrw"] = pd.to_numeric(df["volumeKrw"], errors="raise")

                    df["floorprice"] = pd.to_numeric(df["floorprice"], errors="raise")
                    df["floorpriceEth"] = pd.to_numeric(
                        df["floorpriceEth"], errors="raise"
                    )
                    df["floorpriceUsd"] = pd.to_numeric(
                        df["floorpriceUsd"], errors="raise"
                    )
                    df["floorpriceKrw"] = pd.to_numeric(
                        df["floorpriceKrw"], errors="raise"
                    )

                    df["totalSupply"] = pd.to_numeric(df["totalSupply"], errors="raise")

                    df.to_parquet("temp.parquet", compression="gzip")

                    s3_client.upload_file(
                        "temp.parquet",
                        BUCKET_NAME,
                        os.path.join(dest_path, file_name.replace(".json", ".parquet")),
                    )
                except Exception as E:
                    print(f"Error processing {file['Key']}", E)

In [None]:
df_errors = pd.read_csv("./data/errors.csv", sep=" ", header=None)
df_errors_polygon = df_errors[2].loc[df_errors[2].str.contains("polygon")]

In [None]:
for file_key in df_errors_polygon:
    try:
        response = s3_client.get_object(Bucket=BUCKET_NAME, Key=file_key)
        json_data = response["Body"].read().decode("utf-8")
        data = json.loads(json_data)
        df = pd.DataFrame(data)

        df["volume"] = pd.to_numeric(df["volume"], errors="raise")
        df["volumeUsd"] = pd.to_numeric(df["volumeUsd"], errors="raise")
        df["volumeKrw"] = pd.to_numeric(df["volumeKrw"], errors="raise")

        df["floorprice"] = pd.to_numeric(df["floorprice"], errors="raise")
        df["floorpriceEth"] = pd.to_numeric(df["floorpriceEth"], errors="raise")
        df["floorpriceUsd"] = pd.to_numeric(df["floorpriceUsd"], errors="raise")
        df["floorpriceKrw"] = pd.to_numeric(df["floorpriceKrw"], errors="raise")

        df["totalSupply"] = pd.to_numeric(df["totalSupply"], errors="raise")

        df.to_parquet("temp.parquet", compression="gzip")

        s3_client.upload_file(
            "temp.parquet",
            BUCKET_NAME,
            file_key.replace("json", "parquet").replace("opensea", "opensea_v2"),
        )
        # print(f"Successfully processed {file_key.replace('json', 'parquet').replace('opensea', 'opensea_v2')}")
    except Exception as E:
        print(f"Error processing {file_key}", E)