### 数据1

In [62]:
import numpy as np
import pandas as pd

df = pd.read_csv("bank.csv")
df.describe()

Unnamed: 0,Date,Currency,Account Number,CR Amount,DB Amount,Ref Num
count,882,882,882,882,882,882
unique,80,2,1,150,473,882
top,2010/10/23,USD,abc123,$0.00,$0.00,def123
freq,22,878,882,681,201,1


- Currency

In [63]:
print(df['Currency'].unique())
# 查看可知货币种类中存在误写的USDD，需要将其转换为USD
df['Currency'] = df['Currency'].replace('USDD', 'USD')
df.rename(columns={'Currency': 'currency'}, inplace=True)

['USD' 'USDD']


-  Amount and Type

In [64]:
df['CR Amount'] = df['CR Amount'].replace('[\$,]', '', regex=True).astype(float)
df['DB Amount'] = df['DB Amount'].replace('[\$,]', '', regex=True).astype(float) # 转化为数值
df['amount'] = df['DB Amount'] - df['CR Amount']
df['type'] = 'TRANSFER_OUT'
df.loc[df['amount'] < 0, 'type'] = 'TRANSFER_IN' # 标记为转入转出
df['amount'] = df['amount'].abs()
df['amount'] = df['amount'].map('${:,.2f}'.format) # 转换为$格式

- Datetime

In [65]:
# Date
df['datetime'] = pd.to_datetime(df['Date']).dt.strftime('%Y-%m-%dT17:00:00Z')

- Result

In [66]:
result_df = df[['datetime', 'currency', 'amount', 'type']]
result_df.head()

Unnamed: 0,datetime,currency,amount,type
0,2008-11-23T17:00:00Z,USD,"$1,000.00",TRANSFER_IN
1,2023-08-15T17:00:00Z,USD,"$2,000.00",TRANSFER_IN
2,2023-08-15T17:00:00Z,USD,$10.00,TRANSFER_OUT
3,2023-08-15T17:00:00Z,USD,$500.00,TRANSFER_OUT
4,2023-08-16T17:00:00Z,USD,"$100,000.00",TRANSFER_IN


### 数据2
Remark：任务文档里的交易费用TRANSFER_FEE似乎应该单列一列，而不该归在type里，因此新加了一个字段TRANSFER_FEE.

In [67]:
import requests
from datetime import datetime, timezone, timedelta

def get_transaction_data(wallet_address, start_date, end_date, local_timezone):
    
    api_endpoint = "https://www.bkcscan.com/api?module=account&action=txlist"

    # convert
    start_timestamp = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp())
    end_timestamp = int(datetime.strptime(end_date, "%Y-%m-%d").timestamp())

    # API parameters
    params = {
        "address": wallet_address,
        "starttimestamp": start_timestamp,
        "endtimestamp": end_timestamp
    }

    # Make API request
    response = requests.get(api_endpoint, params=params)
    data = response.json()

    # transaction data
    transactions = []
    for transaction in data["result"]:
        tx_type = "TRANSFER_IN" if transaction["to"].lower() == wallet_address.lower() else "TRANSFER_OUT"
        tx_hash = transaction["hash"]
        timestamp = datetime.utcfromtimestamp(int(transaction["timeStamp"])).replace(tzinfo=timezone.utc)
        timestamp = timestamp.astimezone(timezone(timedelta(hours=local_timezone)))
        datetime_str = timestamp.strftime('%Y-%m-%dT%H:%M:%SZ')
        contact_identity = transaction["to"] if tx_type == "TRANSFER_OUT" else transaction["from"]
        currency = "KUB" 
        amount = int(transaction["value"]) / 1e18  # Convert wei to KUB
        # 文档中并未直接给出交易费用，通过gasPrice和gasUsed计算
        fee = round(int(transaction["gasPrice"]) * int(transaction["gasUsed"]) / 1e18, 8)

        # Create transaction entry
        transaction_entry = {
            "type": tx_type,
            "txHash": tx_hash,
            "datetime": datetime_str,
            "contactIdentity": contact_identity,
            "currency": currency,
            "amount": amount,
            "TRANSFER_FEE": fee
        }

        transactions.append(transaction_entry)

    df = pd.DataFrame(transactions)

    return df

# 测试钱包地址
wallet_address = "0x5Cf6c83A471ECd030A67C6C1AFdD530bCD08e32D"
start_date = "2021-01-01"
end_date = "2021-12-31"
local_timezone = 8

result = get_transaction_data(wallet_address, start_date, end_date, local_timezone)

In [68]:
result.head()

Unnamed: 0,type,txHash,datetime,contactIdentity,currency,amount,TRANSFER_FEE
0,TRANSFER_OUT,0xc6fc26b51380b7d7cef961a434c2a6ea60cccf55df96...,2021-06-10T17:16:33Z,0x2527b10580f8cbc68452f16f33c6c3c3d45729f9,KUB,14.0,0.005406
1,TRANSFER_OUT,0xb684ebf4e94c7d9f3f6b46ef4d86406de9c48971df9e...,2021-06-10T16:45:06Z,0x085aef5be089ac245bce436a3620f289e3f57e5e,KUB,0.0,0.002225
2,TRANSFER_OUT,0x75e1031ab120ccaf639c069abfb533a7717785826b89...,2021-06-09T21:23:35Z,0x085aef5be089ac245bce436a3620f289e3f57e5e,KUB,0.0,0.002225
3,TRANSFER_OUT,0xa62a4cb86b0eb5ff50a901f881e0c6d896a07d3a7a44...,2021-06-09T21:23:19Z,0x8cae3f2079e4bb82122aafee7678e3bf86389486,KUB,6.0,0.007006
4,TRANSFER_OUT,0x75ecb0002652b8d67b21210a87b6578c595e1022db7c...,2021-06-09T20:27:05Z,0x2009a60434dc8c8f772c9969d64868bdc2bf17b2,KUB,0.0,0.00222
