### Prepare event data

In [1]:
import pandas as pd

In [2]:
# Get Ethereum and Polygon subsets
users = pd.read_csv('../data/subsets.csv')
polygon_addresses = users.loc[users['Polygon'] == 1, 'Address'].tolist()
ethereum_addresses = users.loc[users['Ethereum'] == 1, 'Address'].tolist()

### ERC-721

In [3]:
# Ethereum
# Create isSetFrom and isSetTo column
ethereum_721 = pd.read_csv('../data/ethereum_erc721.csv')
ethereum_721['isSetFrom'] = ethereum_721['from'].isin(ethereum_addresses)
ethereum_721['isSetTo'] = ethereum_721['to'].isin(ethereum_addresses)
# Drop duplicates
ethereum_721 = ethereum_721.drop_duplicates()
#ethereum_721.to_csv('ethereum_erc721_final.csv', index=False)
#ethereum_721


# Polygon
# Create isSetFrom and isSetTo column
polygon_721 = pd.read_csv('../data/polygon_erc721.csv')
polygon_721['isSetFrom'] = polygon_721['from'].isin(polygon_addresses)
polygon_721['isSetTo'] = polygon_721['to'].isin(polygon_addresses)
# Drop duplicates
polygon_721 = polygon_721.drop_duplicates()
#polygon_721.to_csv('polygon_erc721_final.csv', index=False)
#polygon_721

erc721 = pd.concat([ethereum_721,polygon_721], ignore_index=True)
#erc721.to_csv('../data/erc721.csv', index=False)
print(len(ethereum_721), len(polygon_721), len(erc721))

4618018 7550153 12168171


### ERC-20

In [4]:
# Ethereum
# Create isSetFrom and isSetTo column
ethereum_20 = pd.read_csv('../data/ethereum_erc20.csv')
ethereum_20['isSetFrom'] = ethereum_20['from'].isin(ethereum_addresses)
ethereum_20['isSetTo'] = ethereum_20['to'].isin(ethereum_addresses)
# Drop duplicates
ethereum_20 = ethereum_20.drop_duplicates()
#ethereum_20.to_csv('ethereum_erc20_final.csv', index=False)
#ethereum_20

# Polygon
# Create isSetFrom and isSetTo column
polygon_20 = pd.read_csv('../data/polygon_erc20.csv')
polygon_20['isSetFrom'] = polygon_20['from'].isin(polygon_addresses)
polygon_20['isSetTo'] = polygon_20['to'].isin(polygon_addresses)
# Drop duplicates
polygon_20 = polygon_20.drop_duplicates()
#polygon_721.to_csv('polygon_erc721_final.csv', index=False)
#polygon_721


# eth20 = pd.read_csv('ethereum_erc20_final.csv')
# pol20 = pd.read_csv('polygon_erc20_final.csv')
erc20 = pd.concat([ethereum_20,polygon_20], ignore_index=True)
erc20['value'] = erc20['value'].apply(int)
erc20['value'] = erc20['value'] / (10 ** erc20['tokenDecimal'])
erc20 = erc20.drop('tokenDecimal', axis=1)

#erc20.to_csv('../data/erc20.csv', index=False)
print(len(ethereum_20), len(polygon_20), len(erc20))

3214814 10946021 14160835


### ERC-1155

In [5]:
## Ethereum
# Create isSetFrom and isSetTo column
ethereum_1155 = pd.read_csv('../data/ethereum_erc721.csv')
ethereum_1155['isSetFrom'] = ethereum_1155['from'].isin(ethereum_addresses)
ethereum_1155['isSetTo'] = ethereum_1155['to'].isin(ethereum_addresses)
#erc721.to_csv('ethereum_erc721_isSet.csv', index=False)
# Drop duplicates
ethereum_1155 = ethereum_1155.drop_duplicates()
#ethereum_1155.to_csv('ethereum_erc1155_final.csv', index=False)
#ethereum_1155


# Polygon
# Create isSetFrom and isSetTo column
polygon_1155 = pd.read_csv('../data/polygon_erc1155.csv')
polygon_1155['isSetFrom'] = polygon_1155['from'].isin(polygon_addresses)
polygon_1155['isSetTo'] = polygon_1155['to'].isin(polygon_addresses)
# Drop duplicates
polygon_1155 = polygon_1155.drop_duplicates()
#polygon_721.to_csv('polygon_erc721_final.csv', index=False)
#polygon_721


# eth1155 = pd.read_csv('ethereum_erc1155_final.csv')
# pol1155 = pd.read_csv('polygon_erc1155_final.csv')
erc1155 = pd.concat([ethereum_1155, polygon_1155], ignore_index=True)
erc1155 = erc1155.rename(columns={'tokenValue': 'value'})

#erc1155.to_csv('../data/erc1155.csv', index=False)
print(len(ethereum_1155), len(polygon_1155), len(erc1155))

  exec(code_obj, self.user_global_ns, self.user_ns)


4618018 4361067 8979085


### Combine the tokenType dataframes

In [6]:
#erc20 = pd.read_csv('../../data/erc20.csv')
#erc721 = pd.read_csv('../../data/erc721.csv')
#erc1155 = pd.read_csv('../../data/erc1155.csv')

# Concatenate the three DataFrames
events = pd.concat([erc20, erc721, erc1155], ignore_index=True)
print(len(events))

if len(events) == len(erc20) + len(erc721) + len(erc1155):
    print('Length check passed')
# Save the combined DataFrame to a new CSV file
#events.to_csv('../../data/events.csv', index=False)
#events

35308091
Length check passed


### Add isSet

In [7]:
#events = pd.read_csv('events.csv')

# Filter rows where both 'isSetFrom' and 'isSetTo' are True
both_true_df = events[(events['isSetFrom'] == True) & (events['isSetTo'] == True)]

# Create two copies of both_true_df
df1 = both_true_df.copy()
df2 = both_true_df.copy()

# In df1, set 'isSet' to 'from', and in df2, set 'isSet' to 'to'
df1['isSet'] = 'from'
df2['isSet'] = 'to'

# Rows where only one of 'isSetFrom' and 'isSetTo' is True
only_from_true_df = events[(events['isSetFrom'] == True) & (events['isSetTo'] == False)].copy()
only_from_true_df['isSet'] = 'from'

only_to_true_df = events[(events['isSetFrom'] == False) & (events['isSetTo'] == True)].copy()
only_to_true_df['isSet'] = 'to'

# Concatenate all the DataFrames together
events_inSet = pd.concat([df1, df2, only_from_true_df, only_to_true_df])
events_inSet = events_inSet.drop(['isSetFrom','isSetTo'], axis=1)

# Sort by the original index for continuity of data
events_inSet = events_inSet.sort_index()

# Create userAddress column
events_inSet['userAddress'] = events_inSet.apply(lambda row: row['from'] if row['isSet'] == 'from' else row['to'], axis=1)

# Save as csv
events_inSet.to_csv('../data/transfer_events.csv')

### Filter event data (not needed for this work!)

In [33]:
events_inSet = pd.read_csv('../data/events_inSet.csv')
events_inSet.drop(['Unnamed: 0'], inplace=True, axis=1)

# Create userAddress column
events_inSet['userAddress'] = events_inSet.apply(lambda row: row['from'] if row['isSet'] == 'from' else row['to'], axis=1)

# Apply filter criteria
df = events_inSet[(events_inSet['isSet']=='to') & (events_inSet['tokenType']!=20) & (events_inSet['contractAddress']!= '0x2953399124f0cbb46d2cbacd8a89cf0599974963')]
# Sort by userAddress
df = df.sort_values(by='userAddress').reset_index(drop=True)
# Save as csv
df.to_csv('../../data/nftsReceived.csv')

# Print number of observations and unique users
unique_addresses = len(df.groupby('userAddress'))
observations = len(df)
print(f'Observations: {observations}\nUnique addresses: {unique_addresses}')

  events_inSet = pd.read_csv('../../data/events_inSet.csv')


Observations: 6287852
Unique addresses: 108273


### (Merge CSV files)

In [None]:
files = ['../data/transactions_polygon_0-20000.csv', '../data/transactions_polygon_20001-40000.csv', '../data/transactions_polygon_40001-60000.csv', '../data/transactions_polygon_60001-80000.csv', '../data/transactions_polygon_80001-100000.csv', '../data/transactions_polygon_100001-120000.csv', '../data/transactions_polygon_120001-129987.csv']

dfs = [pd.read_csv(file) for file in files]
df = pd.concat(dfs, axis=0, ignore_index=True)
df.to_csv('../data/polygon_transactions.csv', index=False)