In [66]:
import pandas as pd
from datetime import datetime, timedelta
import requests
import os


In [5]:
def analyze_user_activity(user_activity_data):
    """
    Analyzes user activity data and returns descriptive statistics.
    
    :param user_activity_data: List of JSON responses from the activity endpoint.
    :return: Dictionary containing key statistics.
    """
    # Convert to DataFrame
    df = pd.DataFrame(user_activity_data)
    
    if df.empty:
        return None
    
    # Convert timestamp to human-readable format
    first_trade_time = datetime.utcfromtimestamp(df["timestamp"].min()).strftime('%Y-%m-%d %H:%M:%S UTC')
    last_trade_time = datetime.utcfromtimestamp(df["timestamp"].max()).strftime('%Y-%m-%d %H:%M:%S UTC')
    
    # Compute average time between trades
    df = df.sort_values(by="timestamp")
    df["time_diff"] = df["timestamp"].diff()
    avg_time_between_trades = df["time_diff"].mean()
    
    # Convert average time between trades to human-readable format
    avg_time_between_trades_str = str(timedelta(seconds=avg_time_between_trades)) if not pd.isna(avg_time_between_trades) else "N/A"
    
    # Compute descriptive statistics
    stats = {
        "Total Trades": len(df),
        "Total Buy Trades": len(df[df["side"] == "BUY"]),
        "Total Sell Trades": len(df[df["side"] == "SELL"]),
        "Total Size Traded": df["size"].sum(),
        "Average Trade Size": df["size"].mean(),
        "Largest Trade Size": df["size"].max(),
        "Total USDC Spent": df["usdcSize"].sum(),
        "Average Price": df["price"].mean(),
        "Markets Traded": df["title"].nunique(),
        "First Trade Time": first_trade_time,
        "Last Trade Time": last_trade_time,
        "Average Time Between Trades": avg_time_between_trades_str,
    }
    
    return stats


In [147]:
def generate_user_statistics(user_ids, base_url):
    """
    Fetches activity data for multiple users and compiles statistics into a table.
    
    :param user_ids: List of user IDs to analyze.
    :param base_url: The base URL of the API.
    :return: DataFrame containing statistics for all users.
    """
    user_stats = []
    for user in user_ids:
        url = f"{base_url}/activity?user={user}&limit=500"
        response = requests.get(url, headers={"Accept": "application/json"})
        if response.status_code == 200:
            data = response.json()
            stats = analyze_user_activity(data)
            if stats:
                stats["User ID"] = user
                user_stats.append(stats)
        
    return pd.DataFrame(user_stats)


In [10]:
def fetch_users_from_trades(base_url, limit=500):
    """
    Fetches unique user IDs from the trades API.
    
    :param base_url: The base URL of the API.
    :param limit: The number of trades to fetch.
    :return: Set of unique user IDs.
    """
    url = f"{base_url}/trades?limit={limit}"
    response = requests.get(url, headers={"Accept": "application/json"})
    
    if response.status_code == 200:
        data = response.json()
        return {trade["proxyWallet"] for trade in data if "proxyWallet" in trade}
    
    print(f"Error fetching users from trades: {response.text}")
    return set()

In [80]:
def read_users_from_file(file_path):
    """
    Reads user data from a CSV file and returns it as a list.

    Args:
        file_path (str): Path to the CSV file.

    Returns:
        list: A list of user IDs (or other data) from the file.
    """
    try:
        # Check if the file exists
        if not os.path.exists(file_path):
            print(f"File {file_path} does not exist.")
            return []
        
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        
        # Convert the 'User ID' column to a list
        users = df["User ID"].tolist()
        
        return users
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return []

In [None]:
# Example usage
example_user_ids = ["0xf373fe97c47d5cbe936bf4ae433e2abb10c3002d", "0xa1234b56789cdef0123456789abcdef123456789"]
pm_base_url = "https://data-api.polymarket.com/"  # Replace with actual API base URL
initial_user_ids = fetch_users_from_trades(pm_base_url)



In [136]:
### WRITE DATA

data_dir = "/Users/kai_brusch/data"
file_path = os.path.join(data_dir, "pm_users2.csv")
os.makedirs(data_dir, exist_ok=True)
print(file_path)
df_new_users = pd.DataFrame(users, columns=["User ID"])
df_new_users.to_csv(file_path, index=False)

/Users/kai_brusch/data/pm_users2.csv


In [139]:
### READ DATA

users = set(read_users_from_file(file_path))
print(len(users))

1834


In [148]:

print(f"Local Users before update: {len(users)}")
new_users = fetch_users_from_trades(pm_base_url)
users = users.union(new_users)
print(f"Users after update {len(users)}")

Local Users before update: 2085
Users after update 2110


In [127]:
df = generate_user_statistics(list(users), pm_base_url)

In [156]:
sum(df['Total Size Traded'])/sum(df['Total Trades'])


398.2857357360702

In [150]:
df

Unnamed: 0,Total Trades,Total Buy Trades,Total Sell Trades,Total Size Traded,Average Trade Size,Largest Trade Size,Total USDC Spent,Average Price,Markets Traded,First Trade Time,Last Trade Time,Average Time Between Trades,User ID
0,42,22,4,68388.173694,1628.289850,33955.250000,539.581084,0.448238,18,2025-02-14 02:52:22 UTC,2025-03-16 22:31:11 UTC,18:02:24.609756,0xd0a03fb98504a31160cd8849d5c0716bc476e32a
1,84,41,31,11942.459718,142.172140,5500.000000,10970.009807,0.127988,40,2024-11-26 06:01:21 UTC,2025-03-16 21:55:33 UTC,"1 day, 7:59:55.807229",0x50d0c63a2a3db69d87a771232b50a2628c61f845
2,500,404,68,4929.851801,9.859704,259.000000,2878.857667,0.586468,83,2025-03-09 05:02:48 UTC,2025-03-16 23:21:29 UTC,0:22:24.130261,0x9edc2ce62c68b25eca4d0799d8212f9f67d1b088
3,12,8,3,198.000000,16.500000,44.000000,135.466000,0.734667,7,2025-03-14 13:19:11 UTC,2025-03-16 23:21:29 UTC,5:16:34.363636,0xd559de9ffd76c116d70ba8c4b9defacafe228dca
4,4,2,2,125.960444,31.490111,32.415185,122.433552,0.972000,1,2025-03-16 22:34:05 UTC,2025-03-16 22:41:19 UTC,0:02:24.666667,0xbb550d1ce83a0bb0ae7f585a7960816a6b4adc31
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1587,8,8,0,368.118079,46.014760,250.000000,49.999961,0.438250,8,2025-03-16 22:59:27 UTC,2025-03-16 23:19:33 UTC,0:02:52.285714,0xc8644d9a62d9c7285c13bcfa281c9c9415390a81
1588,133,97,29,77546.900405,583.059402,5807.000000,58113.147114,0.627348,16,2024-10-24 22:14:51 UTC,2025-03-16 22:46:55 UTC,"1 day, 2:00:14.575758",0x16d56c02164ae5d8061958dda65ec852a9825e0c
1589,15,11,4,1338.368120,89.224541,1000.000000,912.140148,0.928124,11,2025-03-03 16:20:26 UTC,2025-03-16 22:51:39 UTC,22:45:05.214286,0x74618a339c414ed4854861b26308c19cc334e282
1590,28,14,9,15493.558948,553.341391,4660.000000,500.228490,0.639321,13,2024-12-29 23:28:13 UTC,2025-03-16 22:41:19 UTC,"2 days, 20:24:55.777778",0x87a5f961f0b13c507e766c7040fea55d0599c154


In [129]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Assuming your DataFrame is named 'df'
columns_to_plot = [
    'Total Trades', 'Total Buy Trades', 'Total Sell Trades',
    'Total Size Traded', 'Average Trade Size', 'Largest Trade Size',
    'Total USDC Spent', 'Average Price', 'Markets Traded'
]

# Create a 3x3 subplot grid
fig = make_subplots(
    rows=3, cols=3,
    subplot_titles=columns_to_plot,
    vertical_spacing=0.1,
    horizontal_spacing=0.1
)

# Add histograms for each column
for i, col in enumerate(columns_to_plot, 1):
    row = (i - 1) // 3 + 1  # Row index (1-based)
    col_num = (i - 1) % 3 + 1  # Column index (1-based)
    fig.add_trace(
        go.Histogram(x=df[col], name=col, showlegend=False),
        row=row,
        col=col_num
    )

# Update layout
fig.update_layout(
    title_text="Histograms of Trading Metrics",
    height=900,
    width=1200,
    showlegend=False
)

# Show the figure
fig.show()

In [108]:
import plotly.express as px

fig = px.histogram(
    df, 
    x='Total Trades', 
    title='Total Trades Distribution (Auto Bins)',
    labels={'Total Trades': 'Number of Trades'}
)
fig.show()

In [160]:
df[df['Total USDC Spent'] < 4000]

Unnamed: 0,Total Trades,Total Buy Trades,Total Sell Trades,Total Size Traded,Average Trade Size,Largest Trade Size,Total USDC Spent,Average Price,Markets Traded,First Trade Time,Last Trade Time,Average Time Between Trades,User ID
0,42,22,4,68388.173694,1628.289850,33955.250000,539.581084,0.448238,18,2025-02-14 02:52:22 UTC,2025-03-16 22:31:11 UTC,18:02:24.609756,0xd0a03fb98504a31160cd8849d5c0716bc476e32a
2,500,404,68,4929.851801,9.859704,259.000000,2878.857667,0.586468,83,2025-03-09 05:02:48 UTC,2025-03-16 23:21:29 UTC,0:22:24.130261,0x9edc2ce62c68b25eca4d0799d8212f9f67d1b088
3,12,8,3,198.000000,16.500000,44.000000,135.466000,0.734667,7,2025-03-14 13:19:11 UTC,2025-03-16 23:21:29 UTC,5:16:34.363636,0xd559de9ffd76c116d70ba8c4b9defacafe228dca
4,4,2,2,125.960444,31.490111,32.415185,122.433552,0.972000,1,2025-03-16 22:34:05 UTC,2025-03-16 22:41:19 UTC,0:02:24.666667,0xbb550d1ce83a0bb0ae7f585a7960816a6b4adc31
6,22,9,13,346.000000,15.727273,67.000000,53.395000,0.191045,4,2025-03-14 14:52:11 UTC,2025-03-16 21:59:15 UTC,2:37:28.761905,0x0550e615e3d4f9d312478085ee163cfb7262de94
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1585,22,11,11,3313.000000,150.590909,391.000000,1910.942000,0.770909,11,2025-02-24 01:26:52 UTC,2025-03-16 22:36:37 UTC,23:51:53.571429,0x6be981a44febf02152b3c2db44c3fc245ab24699
1587,8,8,0,368.118079,46.014760,250.000000,49.999961,0.438250,8,2025-03-16 22:59:27 UTC,2025-03-16 23:19:33 UTC,0:02:52.285714,0xc8644d9a62d9c7285c13bcfa281c9c9415390a81
1589,15,11,4,1338.368120,89.224541,1000.000000,912.140148,0.928124,11,2025-03-03 16:20:26 UTC,2025-03-16 22:51:39 UTC,22:45:05.214286,0x74618a339c414ed4854861b26308c19cc334e282
1590,28,14,9,15493.558948,553.341391,4660.000000,500.228490,0.639321,13,2024-12-29 23:28:13 UTC,2025-03-16 22:41:19 UTC,"2 days, 20:24:55.777778",0x87a5f961f0b13c507e766c7040fea55d0599c154


In [162]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Histogram(
    x=df[df['Total USDC Spent'] < 3000]['Total USDC Spent'],
    nbinsx=100,  # Explicitly set number of bins
    name='Total Trades'
))
fig.update_layout(
    title='Total USDC Spent Distribution (30 Bins)',
    xaxis_title='Total USDC Spent',
    yaxis_title='Frequency'
)
fig.show()

In [168]:
fig = px.histogram(df, x="Total Size Traded",
                   marginal="rug", # or violin, rug
                   hover_data=df.columns)
fig.show()

In [167]:
fig = px.histogram(df, x="Total USDC Spent", y="Total Size Traded",
                   marginal="rug", # or violin, rug
                   hover_data=df.columns)
fig.show()

In [134]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Histogram(
    x=df['Average Trade Size'],
    nbinsx=100,  # Explicitly set number of bins
    name='Total Trades'
))
fig.update_layout(
    title='Average Trade Size Distribution (30 Bins)',
    xaxis_title='Average Trade Size',
    yaxis_title='Frequency'
)
fig.show()

In [119]:
df

Unnamed: 0,Total Trades,Total Buy Trades,Total Sell Trades,Total Size Traded,Average Trade Size,Largest Trade Size,Total USDC Spent,Average Price,Markets Traded,First Trade Time,Last Trade Time,Average Time Between Trades,User ID
0,42,22,4,68388.173694,1628.289850,33955.250000,539.581084,0.448238,18,2025-02-14 02:52:22 UTC,2025-03-16 22:31:11 UTC,18:02:24.609756,0xd0a03fb98504a31160cd8849d5c0716bc476e32a
1,84,41,31,11942.459718,142.172140,5500.000000,10970.009807,0.127988,40,2024-11-26 06:01:21 UTC,2025-03-16 21:55:33 UTC,"1 day, 7:59:55.807229",0x50d0c63a2a3db69d87a771232b50a2628c61f845
2,500,403,69,4936.851801,9.873704,259.000000,2881.627667,0.585908,83,2025-03-09 04:51:04 UTC,2025-03-16 22:49:35 UTC,0:22:21.705411,0x9edc2ce62c68b25eca4d0799d8212f9f67d1b088
3,4,2,2,125.960444,31.490111,32.415185,122.433552,0.972000,1,2025-03-16 22:34:05 UTC,2025-03-16 22:41:19 UTC,0:02:24.666667,0xbb550d1ce83a0bb0ae7f585a7960816a6b4adc31
4,22,9,13,346.000000,15.727273,67.000000,53.395000,0.191045,4,2025-03-14 14:52:11 UTC,2025-03-16 21:59:15 UTC,2:37:28.761905,0x0550e615e3d4f9d312478085ee163cfb7262de94
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1144,500,259,190,128695.020007,257.390040,10000.000000,73497.126472,0.483949,72,2025-03-08 16:24:50 UTC,2025-03-16 22:47:01 UTC,0:23:51.124248,0x0f37cb80dee49d55b5f6d9e595d52591d6371410
1145,133,97,29,77546.900405,583.059402,5807.000000,58113.147114,0.627348,16,2024-10-24 22:14:51 UTC,2025-03-16 22:46:55 UTC,"1 day, 2:00:14.575758",0x16d56c02164ae5d8061958dda65ec852a9825e0c
1146,15,11,4,1338.368120,89.224541,1000.000000,912.140148,0.928124,11,2025-03-03 16:20:26 UTC,2025-03-16 22:51:39 UTC,22:45:05.214286,0x74618a339c414ed4854861b26308c19cc334e282
1147,28,14,9,15493.558948,553.341391,4660.000000,500.228490,0.639321,13,2024-12-29 23:28:13 UTC,2025-03-16 22:41:19 UTC,"2 days, 20:24:55.777778",0x87a5f961f0b13c507e766c7040fea55d0599c154
