# Wallet Data Parser

This notebook reads `scraped_wallet_data.json`, parses the data into a Pandas DataFrame, and cleans the values for numerical analysis.

In [31]:
import pandas as pd
import json
import re
import numpy as np

## Helper Functions for Data Cleaning

In [32]:
def clean_numeric_value(value):
    if isinstance(value, (int, float)):
        return value
    if value is None or str(value).strip().lower() == 'n/a' or 'error' in str(value).strip().lower():
        return np.nan
    
    s_value = str(value).strip()
    # Replace common problematic space characters and currency symbols
    s_value = s_value.replace('\u00A0', ' ').replace('Â', '').replace('\xa0', ' ') # Handle non-breaking space and its common mis-encoding
    s_value = s_value.replace('$', '').replace(',', '')
    
    # Handle percentage first if it's a standalone percentage
    if s_value.endswith('%'):
        try:
            return float(s_value[:-1]) / 100.0
        except ValueError:
            return np.nan
    
    # Handle 'K' for thousands, 'M' for millions
    if 'K' in s_value.upper():
        s_value = s_value.upper().replace('K', '')
        try:
            return float(s_value) * 1000
        except ValueError:
            return np.nan
    elif 'M' in s_value.upper():
        s_value = s_value.upper().replace('M', '')
        try:
            return float(s_value) * 1000000
        except ValueError:
            return np.nan
    
    # Handle general numeric values (including leading +/-)
    try:
        return float(s_value)
    except ValueError:
        return np.nan

def split_value_percentage(value_str, base_col_name, suffix):
    """Splits strings like '+1 (33.33%)' or '1 (33.33%)' into two numeric columns."""
    abs_col = f"{base_col_name}_abs{suffix}"
    pct_col = f"{base_col_name}_pct{suffix}"
    
    if value_str is None or str(value_str).strip().lower() == 'n/a' or 'error' in str(value_str).strip().lower():
        return {abs_col: np.nan, pct_col: np.nan}
    
    # Replace non-breaking spaces and its common mis-encoding before regex
    cleaned_value_str = str(value_str).strip().replace('\u00A0', ' ').replace('Â', '').replace('\xa0', ' ')
    
    # Regex to capture optional sign/dollar, then number, then optional percentage in brackets
    match = re.match(r'([+-]?\$?[\d,\.]+)\s*\(([^)]+%)\)', cleaned_value_str)
    if match:
        abs_val_str, pct_val_str = match.groups()
        return {
            abs_col: clean_numeric_value(abs_val_str),
            pct_col: clean_numeric_value(pct_val_str)
        }
    else:
        # If no percentage part, assume the whole string is the absolute value
        return {abs_col: clean_numeric_value(value_str), pct_col: np.nan}

def parse_bal_value(value_str, suffix):
    """Parses '3.51 SOL ($507,23)' into SOL and USD values."""
    sol_col = f"bal_sol{suffix}"
    usd_col = f"bal_usd{suffix}"
    
    if value_str is None or str(value_str).strip().lower() == 'n/a' or 'error' in str(value_str).strip().lower():
        return {sol_col: np.nan, usd_col: np.nan}
    
    # Handle non-breaking spaces by replacing them with regular spaces and its common mis-encoding
    cleaned_value_str = str(value_str).strip().replace('\u00A0', ' ').replace('Â', '').replace('\xa0', ' ')
    # Updated regex to handle K/M in USD value
    match = re.match(r'([\d,\.]+)\s*SOL\s*\(\$([\d,\.KM]+)\)', cleaned_value_str)
    if match:
        sol_val_str, usd_val_str = match.groups()
        return {
            sol_col: clean_numeric_value(sol_val_str),
            usd_col: clean_numeric_value(usd_val_str)
        }
    else:
        return {sol_col: np.nan, usd_col: np.nan}

def parse_txs_value(value_str, suffix):
    """Parses '145/181' into current and total TXs."""
    current_col = f"txs_buy{suffix}"
    total_col = f"txs_sell{suffix}"
    
    if value_str is None or str(value_str).strip().lower() == 'n/a' or 'error' in str(value_str).strip().lower():
        return {current_col: np.nan, total_col: np.nan}
    
    parts = str(value_str).strip().split('/')
    if len(parts) == 2:
        return {
            current_col: clean_numeric_value(parts[0]),
            total_col: clean_numeric_value(parts[1])
        }
    else:
        return {current_col: np.nan, total_col: np.nan}

def parse_duration_value(value_str, suffix):
    """Parses '2h', '1d' into hours."""
    col_name = f"avgDuration_hours{suffix}"
    
    if value_str is None or str(value_str).strip().lower() == 'n/a' or 'error' in str(value_str).strip().lower():
        return {col_name: np.nan}
    
    cleaned_value_str = str(value_str).strip().lower()
    match_h = re.match(r'(\d+)\s*h', cleaned_value_str)
    match_d = re.match(r'(\d+)\s*d', cleaned_value_str)

    if match_h:
        return {col_name: clean_numeric_value(match_h.group(1))}
    elif match_d:
        return {col_name: clean_numeric_value(match_d.group(1)) * 24} # Convert days to hours
    else:
        return {col_name: np.nan}

## Load and Process Data

In [33]:
file_path = 'scraped_wallet_data.json'
try:
    with open(file_path, 'r') as f:
        data = json.load(f)
except FileNotFoundError:
    print(f"Error: {file_path} not found.")
    data = []
except json.JSONDecodeError:
    print(f"Error: Could not decode JSON from {file_path}.")
    data = []

processed_data = []
if isinstance(data, list):
    for record in data:
        processed_record = {'address': record.get('address')}
        if 'timestamp' in record:
             processed_record['timestamp'] = record.get('timestamp')

        for key, value in record.items():
            if key == 'address' or key == 'timestamp':
                continue
            
            base_name = key
            suffix = ''
            for s in ['_7d', '_1d', '_30d', '_all']:
                if key.endswith(s):
                    base_name = key[:-len(s)]
                    suffix = s
                    break
            
            if base_name == 'totalPnL':
                processed_record.update(split_value_percentage(value, base_name, suffix))
            elif base_name == 'bal':
                processed_record.update(parse_bal_value(value, suffix))
            elif base_name == 'txs7D': # Note: base_name is 'txs7D', but output columns are 'txs_buy', 'txs_sell'
                processed_record.update(parse_txs_value(value, suffix))
            elif base_name == 'avgDuration7D': # Note: base_name is 'avgDuration7D', but output column is 'avgDuration_hours'
                processed_record.update(parse_duration_value(value, suffix))
            elif base_name.startswith('dist') or base_name in ['blacklist', 'soldBought', 'didntBuy', 'buySell5Secs']:
                processed_record.update(split_value_percentage(value, base_name, suffix))
            else:
                # Default numeric cleaning for other fields
                processed_record[key] = clean_numeric_value(value)
        
        processed_data.append(processed_record)

df = pd.DataFrame(processed_data)

# Reorder columns to have address and timestamp first, if they exist
cols = list(df.columns)
if 'timestamp' in cols:
    cols.insert(0, cols.pop(cols.index('timestamp')))
if 'address' in cols:
    cols.insert(0, cols.pop(cols.index('address')))
df = df[cols]

## Display DataFrame Info and Head

In [34]:
if not df.empty:
    print("DataFrame Info:")
    df.info()
    print("\nDataFrame Head:")
    display(df.head())
else:
    print("DataFrame is empty. Check if scraped_wallet_data.json exists and contains data.")

DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1092 entries, 0 to 1091
Columns: 131 entries, address to error
dtypes: float64(129), object(2)
memory usage: 1.1+ MB

DataFrame Head:


Unnamed: 0,address,timestamp,pnlPercentage_7d,pnlAbsolute_7d,winRate_7d,totalPnL_abs_7d,totalPnL_pct_7d,unrealizedProfits_7d,bal_sol_7d,bal_usd_7d,txs_buy_7d,txs_sell_7d,avgDuration_hours_7d,totalCost7D_7d,tokenAvgCost7D_7d,tokenAvgRealizedProfits7D_7d,distOver500_abs_7d,distOver500_pct_7d,dist200To500_abs_7d,dist200To500_pct_7d,dist0To200_abs_7d,dist0To200_pct_7d,dist0ToMinus50_abs_7d,dist0ToMinus50_pct_7d,distMinus50_abs_7d,distMinus50_pct_7d,blacklist_abs_7d,blacklist_pct_7d,soldBought_abs_7d,soldBought_pct_7d,didntBuy_abs_7d,didntBuy_pct_7d,buySell5Secs_abs_7d,buySell5Secs_pct_7d,pnlPercentage_1d,pnlAbsolute_1d,winRate_1d,totalPnL_abs_1d,totalPnL_pct_1d,unrealizedProfits_1d,bal_sol_1d,bal_usd_1d,txs_buy_1d,txs_sell_1d,avgDuration_hours_1d,totalCost7D_1d,tokenAvgCost7D_1d,tokenAvgRealizedProfits7D_1d,distOver500_abs_1d,distOver500_pct_1d,dist200To500_abs_1d,dist200To500_pct_1d,dist0To200_abs_1d,dist0To200_pct_1d,dist0ToMinus50_abs_1d,dist0ToMinus50_pct_1d,distMinus50_abs_1d,distMinus50_pct_1d,blacklist_abs_1d,blacklist_pct_1d,soldBought_abs_1d,soldBought_pct_1d,didntBuy_abs_1d,didntBuy_pct_1d,buySell5Secs_abs_1d,buySell5Secs_pct_1d,pnlPercentage_30d,pnlAbsolute_30d,winRate_30d,totalPnL_abs_30d,totalPnL_pct_30d,unrealizedProfits_30d,bal_sol_30d,bal_usd_30d,txs_buy_30d,txs_sell_30d,avgDuration_hours_30d,totalCost7D_30d,tokenAvgCost7D_30d,tokenAvgRealizedProfits7D_30d,distOver500_abs_30d,distOver500_pct_30d,dist200To500_abs_30d,dist200To500_pct_30d,dist0To200_abs_30d,dist0To200_pct_30d,dist0ToMinus50_abs_30d,dist0ToMinus50_pct_30d,distMinus50_abs_30d,distMinus50_pct_30d,blacklist_abs_30d,blacklist_pct_30d,soldBought_abs_30d,soldBought_pct_30d,didntBuy_abs_30d,didntBuy_pct_30d,buySell5Secs_abs_30d,buySell5Secs_pct_30d,pnlPercentage_all,pnlAbsolute_all,winRate_all,totalPnL_abs_all,totalPnL_pct_all,unrealizedProfits_all,bal_sol_all,bal_usd_all,txs_buy_all,txs_sell_all,avgDuration_hours_all,totalCost7D_all,tokenAvgCost7D_all,tokenAvgRealizedProfits7D_all,distOver500_abs_all,distOver500_pct_all,dist200To500_abs_all,dist200To500_pct_all,dist0To200_abs_all,dist0To200_pct_all,dist0ToMinus50_abs_all,dist0ToMinus50_pct_all,distMinus50_abs_all,distMinus50_pct_all,blacklist_abs_all,blacklist_pct_all,soldBought_abs_all,soldBought_pct_all,didntBuy_abs_all,didntBuy_pct_all,buySell5Secs_abs_all,buySell5Secs_pct_all,error
0,5w86UhoyacntqKPiyXmVkfp1gMRKrPifKMfsQLHGAdzC,2025-06-14T19:21:55.454Z,0.2461,7666.84,0.5714,7666.84,0.2461,0.0,0.0,0.0,1430.0,164.0,11.0,80900.0,8089.72,766.68,2.0,0.2,0.0,,5.0,0.5,1.0,0.1,2.0,0.2,0.0,0.0,0.0,0.0,6.0,0.6,6.0,0.6,0.1252,3472.27,0.5714,7666.84,0.2461,0.0,0.0,0.0,1430.0,164.0,11.0,80900.0,8089.72,766.68,2.0,0.2,0.0,,5.0,0.5,1.0,0.1,2.0,0.2,0.0,0.0,0.0,0.0,6.0,0.6,6.0,0.6,0.2461,7666.84,0.5714,7666.84,0.2461,0.0,0.0,0.0,1430.0,164.0,11.0,80900.0,8089.72,766.68,2.0,0.2,0.0,,5.0,0.5,1.0,0.1,2.0,0.2,0.0,0.0,0.0,0.0,6.0,0.6,6.0,0.6,0.2461,7666.84,0.5714,7666.84,0.2461,0.0,0.0,0.0,1430.0,164.0,11.0,80900.0,8089.72,766.68,2.0,0.2,0.0,,5.0,0.5,1.0,0.1,2.0,0.2,0.0,0.0,0.0,0.0,6.0,0.6,6.0,0.6,
1,5q1cM2LAqQfp7BzbHX4aYpsVetG6qcdx8z6GVGmsDBYo,2025-06-14T19:21:58.087Z,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2,CidkBSVNKewZDDDXge53X4TT7PfgE1D9jgAQziMCrj5E,2025-06-14T19:22:00.960Z,0.916,9957.2,0.9,9957.2,0.916,0.0,0.0,0.0,201.0,121.0,5.0,21000.0,1908.51,905.2,0.0,,3.0,0.2727,7.0,0.6364,0.0,,1.0,0.0909,0.0,0.0,0.0,0.0,5.0,0.5,5.0,0.5,1.1913,9033.51,0.9,9957.2,0.916,0.0,0.0,0.0,201.0,121.0,5.0,21000.0,1908.51,905.2,0.0,,3.0,0.2727,7.0,0.6364,0.0,,1.0,0.0909,0.0,0.0,0.0,0.0,5.0,0.5,5.0,0.5,0.916,9957.2,0.9,9957.2,0.916,0.0,0.0,0.0,201.0,121.0,5.0,21000.0,1908.51,905.2,0.0,,3.0,0.2727,7.0,0.6364,0.0,,1.0,0.0909,0.0,0.0,0.0,0.0,5.0,0.5,5.0,0.5,0.916,9957.2,0.9,9957.2,0.916,0.0,0.0,0.0,201.0,121.0,5.0,21000.0,1908.51,905.2,0.0,,3.0,0.2727,7.0,0.6364,0.0,,1.0,0.0909,0.0,0.0,0.0,0.0,5.0,0.5,5.0,0.5,
3,Bvqh6h6qtBLYvxkSJTGCKXXMD6yTREYzvKYBrL7w1rX1,2025-06-14T19:22:03.389Z,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
4,5s7CExB5DXwRxXZ3PrLn2dc8XaT2DDmxtBN6HyCz9TKb,2025-06-14T19:22:06.101Z,0.2913,2199.56,0.875,2199.56,0.2913,0.0,0.0,0.0,152.0,122.0,2.0,17300.0,1574.01,199.96,0.0,,1.0,0.0909,9.0,0.8182,1.0,0.0909,0.0,,0.0,0.0,0.0,0.0,5.0,0.5,5.0,0.5,0.2995,2064.26,0.875,2199.56,0.2913,0.0,0.0,0.0,152.0,122.0,2.0,17300.0,1574.01,199.96,0.0,,1.0,0.0909,9.0,0.8182,1.0,0.0909,0.0,,0.0,0.0,0.0,0.0,5.0,0.5,5.0,0.5,0.2913,2199.56,0.875,2199.56,0.2913,0.0,0.0,0.0,152.0,122.0,2.0,17300.0,1574.01,199.96,0.0,,1.0,0.0909,9.0,0.8182,1.0,0.0909,0.0,,0.0,0.0,0.0,0.0,5.0,0.5,5.0,0.5,0.2913,2199.56,0.875,2199.56,0.2913,0.0,0.0,0.0,152.0,122.0,2.0,17300.0,1574.01,199.96,0.0,,1.0,0.0909,9.0,0.8182,1.0,0.0909,0.0,,0.0,0.0,0.0,0.0,5.0,0.5,5.0,0.5,


In [38]:
pd.set_option('display.max_rows', 10)
df

Unnamed: 0,address,timestamp,pnlPercentage_7d,pnlAbsolute_7d,winRate_7d,totalPnL_abs_7d,totalPnL_pct_7d,unrealizedProfits_7d,bal_sol_7d,bal_usd_7d,txs_buy_7d,txs_sell_7d,avgDuration_hours_7d,totalCost7D_7d,tokenAvgCost7D_7d,tokenAvgRealizedProfits7D_7d,distOver500_abs_7d,distOver500_pct_7d,dist200To500_abs_7d,dist200To500_pct_7d,dist0To200_abs_7d,dist0To200_pct_7d,dist0ToMinus50_abs_7d,dist0ToMinus50_pct_7d,distMinus50_abs_7d,distMinus50_pct_7d,blacklist_abs_7d,blacklist_pct_7d,soldBought_abs_7d,soldBought_pct_7d,didntBuy_abs_7d,didntBuy_pct_7d,buySell5Secs_abs_7d,buySell5Secs_pct_7d,pnlPercentage_1d,pnlAbsolute_1d,winRate_1d,totalPnL_abs_1d,totalPnL_pct_1d,unrealizedProfits_1d,bal_sol_1d,bal_usd_1d,txs_buy_1d,txs_sell_1d,avgDuration_hours_1d,totalCost7D_1d,tokenAvgCost7D_1d,tokenAvgRealizedProfits7D_1d,distOver500_abs_1d,distOver500_pct_1d,dist200To500_abs_1d,dist200To500_pct_1d,dist0To200_abs_1d,dist0To200_pct_1d,dist0ToMinus50_abs_1d,dist0ToMinus50_pct_1d,distMinus50_abs_1d,distMinus50_pct_1d,blacklist_abs_1d,blacklist_pct_1d,soldBought_abs_1d,soldBought_pct_1d,didntBuy_abs_1d,didntBuy_pct_1d,buySell5Secs_abs_1d,buySell5Secs_pct_1d,pnlPercentage_30d,pnlAbsolute_30d,winRate_30d,totalPnL_abs_30d,totalPnL_pct_30d,unrealizedProfits_30d,bal_sol_30d,bal_usd_30d,txs_buy_30d,txs_sell_30d,avgDuration_hours_30d,totalCost7D_30d,tokenAvgCost7D_30d,tokenAvgRealizedProfits7D_30d,distOver500_abs_30d,distOver500_pct_30d,dist200To500_abs_30d,dist200To500_pct_30d,dist0To200_abs_30d,dist0To200_pct_30d,dist0ToMinus50_abs_30d,dist0ToMinus50_pct_30d,distMinus50_abs_30d,distMinus50_pct_30d,blacklist_abs_30d,blacklist_pct_30d,soldBought_abs_30d,soldBought_pct_30d,didntBuy_abs_30d,didntBuy_pct_30d,buySell5Secs_abs_30d,buySell5Secs_pct_30d,pnlPercentage_all,pnlAbsolute_all,winRate_all,totalPnL_abs_all,totalPnL_pct_all,unrealizedProfits_all,bal_sol_all,bal_usd_all,txs_buy_all,txs_sell_all,avgDuration_hours_all,totalCost7D_all,tokenAvgCost7D_all,tokenAvgRealizedProfits7D_all,distOver500_abs_all,distOver500_pct_all,dist200To500_abs_all,dist200To500_pct_all,dist0To200_abs_all,dist0To200_pct_all,dist0ToMinus50_abs_all,dist0ToMinus50_pct_all,distMinus50_abs_all,distMinus50_pct_all,blacklist_abs_all,blacklist_pct_all,soldBought_abs_all,soldBought_pct_all,didntBuy_abs_all,didntBuy_pct_all,buySell5Secs_abs_all,buySell5Secs_pct_all,error
0,5w86UhoyacntqKPiyXmVkfp1gMRKrPifKMfsQLHGAdzC,2025-06-14T19:21:55.454Z,0.2461,7666.84,0.5714,7666.84,0.2461,0.0,0.000,0.00,1430.0,164.0,11.0,80900.0,8089.72,766.680,2.0,0.2,0.0,,5.0,0.5000,1.0,0.1000,2.0,0.2000,0.0,0.0,0.0,0.0,6.0,0.6000,6.0,0.6000,0.1252,3472.27,0.5714,7666.84,0.2461,0.0,0.000,0.00,1430.0,164.0,11.0,80900.0,8089.72,766.680,2.0,0.2,0.0,,5.0,0.5000,1.0,0.1000,2.0,0.2000,0.0,0.0,0.0,0.0,6.0,0.6000,6.0,0.6000,0.2461,7666.84,0.5714,7666.84,0.2461,0.0,0.000,0.00,1430.0,164.0,11.0,80900.0,8089.72,766.680,2.0,0.2,0.0,,5.0,0.5000,1.0,0.1000,2.0,0.2000,0.0,0.0,0.0,0.0,6.0,0.6000,6.0,0.6000,0.2461,7666.84,0.5714,7666.84,0.2461,0.0,0.000,0.00,1430.0,164.0,11.0,80900.0,8089.72,766.680,2.0,0.2,0.0,,5.0,0.5000,1.0,0.1000,2.0,0.2000,0.0,0.0,0.0,0.0,6.0,0.6000,6.0,0.6000,
1,5q1cM2LAqQfp7BzbHX4aYpsVetG6qcdx8z6GVGmsDBYo,2025-06-14T19:21:58.087Z,0.0000,0.00,0.0000,,,0.0,0.000,0.00,0.0,0.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0000,0.00,0.0000,,,0.0,0.000,0.00,0.0,0.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0000,0.00,0.0000,,,0.0,0.000,0.00,0.0,0.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0000,0.00,0.0000,,,0.0,0.000,0.00,0.0,0.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,
2,CidkBSVNKewZDDDXge53X4TT7PfgE1D9jgAQziMCrj5E,2025-06-14T19:22:00.960Z,0.9160,9957.20,0.9000,9957.20,0.9160,0.0,0.000,0.00,201.0,121.0,5.0,21000.0,1908.51,905.200,0.0,,3.0,0.2727,7.0,0.6364,0.0,,1.0,0.0909,0.0,0.0,0.0,0.0,5.0,0.5000,5.0,0.5000,1.1913,9033.51,0.9000,9957.20,0.9160,0.0,0.000,0.00,201.0,121.0,5.0,21000.0,1908.51,905.200,0.0,,3.0,0.2727,7.0,0.6364,0.0,,1.0,0.0909,0.0,0.0,0.0,0.0,5.0,0.5000,5.0,0.5000,0.9160,9957.20,0.9000,9957.20,0.9160,0.0,0.000,0.00,201.0,121.0,5.0,21000.0,1908.51,905.200,0.0,,3.0,0.2727,7.0,0.6364,0.0,,1.0,0.0909,0.0,0.0,0.0,0.0,5.0,0.5000,5.0,0.5000,0.9160,9957.20,0.9000,9957.20,0.9160,0.0,0.000,0.00,201.0,121.0,5.0,21000.0,1908.51,905.200,0.0,,3.0,0.2727,7.0,0.6364,0.0,,1.0,0.0909,0.0,0.0,0.0,0.0,5.0,0.5000,5.0,0.5000,
3,Bvqh6h6qtBLYvxkSJTGCKXXMD6yTREYzvKYBrL7w1rX1,2025-06-14T19:22:03.389Z,0.0000,0.00,0.0000,,,0.0,0.000,0.00,0.0,0.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0000,0.00,0.0000,,,0.0,0.000,0.00,0.0,0.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0000,0.00,0.0000,,,0.0,0.000,0.00,0.0,0.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0000,0.00,0.0000,,,0.0,0.000,0.00,0.0,0.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,
4,5s7CExB5DXwRxXZ3PrLn2dc8XaT2DDmxtBN6HyCz9TKb,2025-06-14T19:22:06.101Z,0.2913,2199.56,0.8750,2199.56,0.2913,0.0,0.000,0.00,152.0,122.0,2.0,17300.0,1574.01,199.960,0.0,,1.0,0.0909,9.0,0.8182,1.0,0.0909,0.0,,0.0,0.0,0.0,0.0,5.0,0.5000,5.0,0.5000,0.2995,2064.26,0.8750,2199.56,0.2913,0.0,0.000,0.00,152.0,122.0,2.0,17300.0,1574.01,199.960,0.0,,1.0,0.0909,9.0,0.8182,1.0,0.0909,0.0,,0.0,0.0,0.0,0.0,5.0,0.5000,5.0,0.5000,0.2913,2199.56,0.8750,2199.56,0.2913,0.0,0.000,0.00,152.0,122.0,2.0,17300.0,1574.01,199.960,0.0,,1.0,0.0909,9.0,0.8182,1.0,0.0909,0.0,,0.0,0.0,0.0,0.0,5.0,0.5000,5.0,0.5000,0.2913,2199.56,0.8750,2199.56,0.2913,0.0,0.000,0.00,152.0,122.0,2.0,17300.0,1574.01,199.960,0.0,,1.0,0.0909,9.0,0.8182,1.0,0.0909,0.0,,0.0,0.0,0.0,0.0,5.0,0.5000,5.0,0.5000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1087,C87SwY5oNi5eYSxj1MzhY7nDJy5BEVuTRdud6SMckJXk,2025-06-14T20:57:37.073Z,0.0128,666.30,0.0000,666.30,0.0127,0.0,0.203,29.04,8010.0,8139.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,,,,,,,,,0.0128,666.30,0.0000,666.30,0.0127,0.0,0.203,29.04,8010.0,8139.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,,,,,,,,,0.0128,666.30,0.0000,666.30,0.0127,0.0,0.203,29.04,8010.0,8139.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,,,,,,,,,0.0127,666.30,0.0000,666.30,0.0127,0.0,0.203,29.04,8010.0,8139.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,,,,,,,,,
1088,FXbvf5sJFWXmABBaPK7fpBBZt9bJPJ3YsnCGBXedZjLw,2025-06-14T20:57:40.304Z,0.0069,98.56,0.5946,591.08,0.0117,0.0,0.201,28.67,3347.0,3273.0,72.0,14800.0,55.79,0.002,0.0,,0.0,,160.0,0.6038,105.0,0.3962,0.0,,0.0,0.0,0.0,0.0,201.0,0.7585,201.0,0.7585,0.0030,8.07,0.5946,591.08,0.0117,0.0,0.201,28.67,3347.0,3273.0,72.0,14800.0,55.79,0.002,0.0,,0.0,,160.0,0.6038,105.0,0.3962,0.0,,0.0,0.0,0.0,0.0,201.0,0.7585,201.0,0.7585,0.0117,591.08,0.5946,591.08,0.0117,0.0,0.201,28.67,3347.0,3273.0,72.0,14800.0,55.79,0.002,0.0,,0.0,,160.0,0.6038,105.0,0.3962,0.0,,0.0,0.0,0.0,0.0,201.0,0.7585,201.0,0.7585,0.0117,591.08,0.5946,591.08,0.0117,0.0,0.201,28.67,3347.0,3273.0,72.0,14800.0,55.79,0.002,0.0,,0.0,,160.0,0.6038,105.0,0.3962,0.0,,0.0,0.0,0.0,0.0,201.0,0.7585,201.0,0.7585,
1089,3q2i23mpHmZjMYhzDHCfeVwVQX2ZTdp83b4ZEqTivXSJ,2025-06-14T20:57:43.531Z,0.0086,386.78,0.0000,533.36,0.0101,0.0,0.201,28.62,6592.0,6666.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,42.0,0.2800,42.0,0.2800,0.0041,68.90,0.0000,533.36,0.0101,0.0,0.201,28.62,6592.0,6666.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,42.0,0.2800,42.0,0.2800,0.0102,533.36,0.0000,533.36,0.0101,0.0,0.201,28.62,6592.0,6666.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,42.0,0.2800,42.0,0.2800,0.0101,533.36,0.0000,533.36,0.0101,0.0,0.201,28.62,6592.0,6666.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,0.0,0.0,42.0,0.2800,42.0,0.2800,
1090,B4rW5RpGhJ7fUNF2GQpSRDLDzEyB3vKpch6Zr1Xps4kE,2025-06-14T20:57:46.890Z,0.0098,499.73,0.0000,499.73,0.0098,0.0,0.205,29.23,7719.0,7917.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,,,,,,,,,0.0098,499.73,0.0000,499.73,0.0098,0.0,0.205,29.23,7719.0,7917.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,,,,,,,,,0.0098,499.73,0.0000,499.73,0.0098,0.0,0.205,29.23,7719.0,7917.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,,,,,,,,,0.0098,499.73,0.0000,499.73,0.0098,0.0,0.205,29.23,7719.0,7917.0,,0.0,0.00,0.000,0.0,,0.0,,0.0,,0.0,,0.0,,,,,,,,,,


In [42]:
pnl_mask = (
    (df["pnlPercentage_1d"] > 0.1) 
    & (df["pnlPercentage_7d"] > 0.1)
    & (df["pnlPercentage_30d"] > 0.1)
    & (df["pnlPercentage_all"] > 0.1)
    & (df["pnlAbsolute_1d"] > -100)
    & (df["pnlAbsolute_7d"] > 3000)
    & (df["unrealizedProfits_7d"] > -100)
    & (df["winRate_1d"] > 0.5)
    & (df["winRate_7d"] > 0.65)
    & (df["winRate_30d"] > 0.65)
    & (df["winRate_all"] > 0.65)
    & (df["txs_buy_7d"] > 70)
)

phising_mask = (df["buySell5Secs_pct_7d"] < 0.1)

filtered_df = df[pnl_mask & phising_mask]
filtered_df


Unnamed: 0,address,timestamp,pnlPercentage_7d,pnlAbsolute_7d,winRate_7d,totalPnL_abs_7d,totalPnL_pct_7d,unrealizedProfits_7d,bal_sol_7d,bal_usd_7d,txs_buy_7d,txs_sell_7d,avgDuration_hours_7d,totalCost7D_7d,tokenAvgCost7D_7d,tokenAvgRealizedProfits7D_7d,distOver500_abs_7d,distOver500_pct_7d,dist200To500_abs_7d,dist200To500_pct_7d,dist0To200_abs_7d,dist0To200_pct_7d,dist0ToMinus50_abs_7d,dist0ToMinus50_pct_7d,distMinus50_abs_7d,distMinus50_pct_7d,blacklist_abs_7d,blacklist_pct_7d,soldBought_abs_7d,soldBought_pct_7d,didntBuy_abs_7d,didntBuy_pct_7d,buySell5Secs_abs_7d,buySell5Secs_pct_7d,pnlPercentage_1d,pnlAbsolute_1d,winRate_1d,totalPnL_abs_1d,totalPnL_pct_1d,unrealizedProfits_1d,bal_sol_1d,bal_usd_1d,txs_buy_1d,txs_sell_1d,avgDuration_hours_1d,totalCost7D_1d,tokenAvgCost7D_1d,tokenAvgRealizedProfits7D_1d,distOver500_abs_1d,distOver500_pct_1d,dist200To500_abs_1d,dist200To500_pct_1d,dist0To200_abs_1d,dist0To200_pct_1d,dist0ToMinus50_abs_1d,dist0ToMinus50_pct_1d,distMinus50_abs_1d,distMinus50_pct_1d,blacklist_abs_1d,blacklist_pct_1d,soldBought_abs_1d,soldBought_pct_1d,didntBuy_abs_1d,didntBuy_pct_1d,buySell5Secs_abs_1d,buySell5Secs_pct_1d,pnlPercentage_30d,pnlAbsolute_30d,winRate_30d,totalPnL_abs_30d,totalPnL_pct_30d,unrealizedProfits_30d,bal_sol_30d,bal_usd_30d,txs_buy_30d,txs_sell_30d,avgDuration_hours_30d,totalCost7D_30d,tokenAvgCost7D_30d,tokenAvgRealizedProfits7D_30d,distOver500_abs_30d,distOver500_pct_30d,dist200To500_abs_30d,dist200To500_pct_30d,dist0To200_abs_30d,dist0To200_pct_30d,dist0ToMinus50_abs_30d,dist0ToMinus50_pct_30d,distMinus50_abs_30d,distMinus50_pct_30d,blacklist_abs_30d,blacklist_pct_30d,soldBought_abs_30d,soldBought_pct_30d,didntBuy_abs_30d,didntBuy_pct_30d,buySell5Secs_abs_30d,buySell5Secs_pct_30d,pnlPercentage_all,pnlAbsolute_all,winRate_all,totalPnL_abs_all,totalPnL_pct_all,unrealizedProfits_all,bal_sol_all,bal_usd_all,txs_buy_all,txs_sell_all,avgDuration_hours_all,totalCost7D_all,tokenAvgCost7D_all,tokenAvgRealizedProfits7D_all,distOver500_abs_all,distOver500_pct_all,dist200To500_abs_all,dist200To500_pct_all,dist0To200_abs_all,dist0To200_pct_all,dist0ToMinus50_abs_all,dist0ToMinus50_pct_all,distMinus50_abs_all,distMinus50_pct_all,blacklist_abs_all,blacklist_pct_all,soldBought_abs_all,soldBought_pct_all,didntBuy_abs_all,didntBuy_pct_all,buySell5Secs_abs_all,buySell5Secs_pct_all,error
924,EoLUwibS7saCh9Ju8b66W17EmqjbnyVu4a4ThPKU4kv6,2025-06-14T20:48:22.690Z,1.3851,980500.0,0.7009,,,7600000.0,0.099,14.24,129.0,190.0,24.0,1300000.0,10700.0,8309.55,40.0,0.339,6.0,0.0508,29.0,0.2458,15.0,0.1271,28.0,0.2373,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.6826,157100.0,0.7009,,,7600000.0,0.099,14.24,129.0,190.0,24.0,1300000.0,10700.0,8309.55,40.0,0.339,6.0,0.0508,29.0,0.2458,15.0,0.1271,28.0,0.2373,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2382,1200000.0,0.7009,,,7600000.0,0.099,14.24,129.0,190.0,24.0,1300000.0,10700.0,8309.55,40.0,0.339,6.0,0.0508,29.0,0.2458,15.0,0.1271,28.0,0.2373,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2382,1200000.0,0.7009,,,7600000.0,0.099,14.24,129.0,190.0,24.0,1300000.0,10700.0,8309.55,40.0,0.339,6.0,0.0508,29.0,0.2458,15.0,0.1271,28.0,0.2373,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
955,5k3gHNWXjMwFhjmVvhEff4a9hur11Yqf7TkL3S3RWCHg,2025-06-14T20:50:04.671Z,1.6376,1100000.0,0.72,,,955100.0,0.099,14.24,124.0,184.0,24.0,1000000.0,9433.2,9471.0,41.0,0.3694,9.0,0.0811,22.0,0.1982,14.0,0.1261,25.0,0.2252,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.3487,184300.0,0.72,,,955100.0,0.099,14.24,124.0,184.0,24.0,1000000.0,9433.2,9471.0,41.0,0.3694,9.0,0.0811,22.0,0.1982,14.0,0.1261,25.0,0.2252,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.3758,1100000.0,0.72,,,955100.0,0.099,14.24,124.0,184.0,24.0,1000000.0,9433.2,9471.0,41.0,0.3694,9.0,0.0811,22.0,0.1982,14.0,0.1261,25.0,0.2252,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.3758,1100000.0,0.72,,,955100.0,0.099,14.24,124.0,184.0,24.0,1000000.0,9433.2,9471.0,41.0,0.3694,9.0,0.0811,22.0,0.1982,14.0,0.1261,25.0,0.2252,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1022,AB66dViodr6qHMkknoAVTqBXyMZH3415XBCJsAZxVM9p,2025-06-14T20:54:05.494Z,0.3055,7014.54,0.6803,7014.54,0.3055,0.0,90.86,13000.0,133.0,132.0,,22600.0,183.94,57.03,0.0,,2.0,0.0163,82.0,0.6667,39.0,0.3171,0.0,,0.0,0.0,0.0,0.0,7.0,0.068,7.0,0.068,0.5103,4126.09,0.6803,7014.54,0.3055,0.0,90.86,13000.0,133.0,132.0,,22600.0,183.94,57.03,0.0,,2.0,0.0163,82.0,0.6667,39.0,0.3171,0.0,,0.0,0.0,0.0,0.0,7.0,0.068,7.0,0.068,0.3055,7014.54,0.6803,7014.54,0.3055,0.0,90.86,13000.0,133.0,132.0,,22600.0,183.94,57.03,0.0,,2.0,0.0163,82.0,0.6667,39.0,0.3171,0.0,,0.0,0.0,0.0,0.0,7.0,0.068,7.0,0.068,0.3055,7014.54,0.6803,7014.54,0.3055,0.0,90.86,13000.0,133.0,132.0,,22600.0,183.94,57.03,0.0,,2.0,0.0163,82.0,0.6667,39.0,0.3171,0.0,,0.0,0.0,0.0,0.0,7.0,0.068,7.0,0.068,
1037,3XWVVqGQJyioff7S4iYy7KNRLCpGNoF9YScMj9oxWThd,2025-06-14T20:54:54.018Z,0.2461,5659.05,0.6613,5762.75,0.2506,103.69,81.52,11600.0,135.0,136.0,,22600.0,181.1,45.27,0.0,,3.0,0.024,80.0,0.64,42.0,0.336,0.0,,0.0,0.0,0.0,0.0,9.0,0.0874,9.0,0.0874,0.3138,2670.98,0.6613,5762.75,0.2506,103.69,81.52,11600.0,135.0,136.0,,22600.0,181.1,45.27,0.0,,3.0,0.024,80.0,0.64,42.0,0.336,0.0,,0.0,0.0,0.0,0.0,9.0,0.0874,9.0,0.0874,0.2461,5659.05,0.6613,5762.75,0.2506,103.69,81.52,11600.0,135.0,136.0,,22600.0,181.1,45.27,0.0,,3.0,0.024,80.0,0.64,42.0,0.336,0.0,,0.0,0.0,0.0,0.0,9.0,0.0874,9.0,0.0874,0.2461,5659.05,0.6613,5762.75,0.2506,103.69,81.52,11600.0,135.0,136.0,,22600.0,181.1,45.27,0.0,,3.0,0.024,80.0,0.64,42.0,0.336,0.0,,0.0,0.0,0.0,0.0,9.0,0.0874,9.0,0.0874,
1042,C25BvAMzHJuwhSGhtoEGNQTnY1Egkucv42Hrr3Fcfj6b,2025-06-14T20:55:10.469Z,0.1099,5154.28,0.815,5154.28,0.1034,0.0,23.01,3280.79,197.0,214.0,,49100.0,245.39,25.77,0.0,,1.0,0.005,162.0,0.81,37.0,0.185,0.0,,0.0,0.0,0.0,0.0,10.0,0.0855,10.0,0.0855,0.1152,4344.85,0.815,5154.28,0.1034,0.0,23.01,3280.79,197.0,214.0,,49100.0,245.39,25.77,0.0,,1.0,0.005,162.0,0.81,37.0,0.185,0.0,,0.0,0.0,0.0,0.0,10.0,0.0855,10.0,0.0855,0.1099,5154.28,0.815,5154.28,0.1034,0.0,23.01,3280.79,197.0,214.0,,49100.0,245.39,25.77,0.0,,1.0,0.005,162.0,0.81,37.0,0.185,0.0,,0.0,0.0,0.0,0.0,10.0,0.0855,10.0,0.0855,0.1034,5154.28,0.815,5154.28,0.1034,0.0,23.01,3280.79,197.0,214.0,,49100.0,245.39,25.77,0.0,,1.0,0.005,162.0,0.81,37.0,0.185,0.0,,0.0,0.0,0.0,0.0,10.0,0.0855,10.0,0.0855,
