# Polymarket Trade Analysis
Parquet dosyasindaki trade verilerini inceleme


In [44]:
import pandas as pd
import numpy as np

# Parquet dosyasini yukle
df = pd.read_parquet('Data/trades_0x6031b6ee_partial.parquet')
print(f"Toplam satir sayisi: {len(df):,}")
print(f"\nKolonlar: {list(df.columns)}")


Toplam satir sayisi: 1,500

Kolonlar: ['timestamp', 'trader', 'side', 'market_slug', 'event_slug', 'market_question', 'outcome', 'outcome_index', 'amount', 'price', 'pnl', 'market_resolved', 'winning_outcome', 'condition_id', 'asset', 'transaction_hash']


In [45]:
# Temel istatistikler
print("=" * 50)
print("TEMEL ISTATISTIKLER")
print("=" * 50)
print(f"Toplam trade sayisi: {len(df):,}")
print(f"Benzersiz market sayisi: {df['market_question'].nunique()}")
print(f"Benzersiz market_slug sayisi: {df['market_slug'].nunique()}")
print(f"Benzersiz condition_id sayisi: {df['condition_id'].nunique()}")
print(f"Benzersiz transaction_hash sayisi: {df['transaction_hash'].nunique()}")


TEMEL ISTATISTIKLER
Toplam trade sayisi: 1,500
Benzersiz market sayisi: 30
Benzersiz market_slug sayisi: 30
Benzersiz condition_id sayisi: 30
Benzersiz transaction_hash sayisi: 1500


In [46]:
# Tarih araligi analizi
print("=" * 50)
print("TARIH ARALIGI")
print("=" * 50)
df['timestamp'] = pd.to_datetime(df['timestamp'])
min_date = df['timestamp'].min()
max_date = df['timestamp'].max()
date_range = max_date - min_date

print(f"Ilk trade: {min_date}")
print(f"Son trade: {max_date}")
print(f"Toplam sure: {date_range}")
print(f"Toplam saat: {date_range.total_seconds() / 3600:.2f} saat")
print(f"Toplam gun: {date_range.total_seconds() / 86400:.2f} gun")


TARIH ARALIGI
Ilk trade: 2025-12-08 10:32:03
Son trade: 2025-12-08 13:06:21
Toplam sure: 0 days 02:34:18
Toplam saat: 2.57 saat
Toplam gun: 0.11 gun


In [47]:
# Duplike transaction hash kontrolu
print("=" * 50)
print("DUPLIKE KONTROL")
print("=" * 50)
tx_counts = df['transaction_hash'].value_counts()
duplicates = tx_counts[tx_counts > 1]

print(f"Tekrar eden transaction_hash sayisi: {len(duplicates)}")
print(f"Toplam duplike satir: {duplicates.sum() - len(duplicates)}")

if len(duplicates) > 0:
    print(f"\nEn cok tekrar eden 10 transaction:")
    print(duplicates.head(10))


DUPLIKE KONTROL
Tekrar eden transaction_hash sayisi: 0
Toplam duplike satir: 0


In [48]:
# Bir duplike transaction'in detaylarina bakalim
if len(duplicates) > 0:
    sample_tx = duplicates.index[0]
    print(f"Ornek duplike transaction: {sample_tx}")
    print("\nAyni transaction hash'e sahip satirlar:")
    display(df[df['transaction_hash'] == sample_tx])


In [49]:
# Trader bazinda analiz
print("=" * 50)
print("TRADER ANALIZI")
print("=" * 50)
trader_counts = df['trader'].value_counts()
print(f"Benzersiz trader sayisi: {len(trader_counts)}")
print(f"\nTrader dagilimi:")
print(trader_counts)


TRADER ANALIZI
Benzersiz trader sayisi: 1

Trader dagilimi:
trader
0x6031b6eed1c97e853c6e0f03ad3ce3529351f96d    1500
Name: count, dtype: int64


In [50]:
# Side (BUY/SELL) dagilimi
print("=" * 50)
print("SIDE DAGILIMI")
print("=" * 50)
print(df['side'].value_counts())


SIDE DAGILIMI
side
BUY    1500
Name: count, dtype: int64


In [51]:
# En aktif marketler
print("=" * 50)
print("EN AKTIF 20 MARKET")
print("=" * 50)
market_counts = df['market_question'].value_counts()
print(market_counts.head(20))


EN AKTIF 20 MARKET
market_question
Bitcoin Up or Down - December 8, 6:45AM-7:00AM ET     115
Bitcoin Up or Down - December 8, 6AM ET                94
Bitcoin Up or Down - December 8, 6:15AM-6:30AM ET      92
Bitcoin Up or Down - December 8, 6:00AM-6:15AM ET      86
Bitcoin Up or Down - December 8, 7AM ET                77
Bitcoin Up or Down - December 8, 7:45AM-8:00AM ET      72
Bitcoin Up or Down - December 8, 5:30AM-5:45AM ET      68
Bitcoin Up or Down - December 8, 7:15AM-7:30AM ET      65
Bitcoin Up or Down - December 8, 7:30AM-7:45AM ET      61
Ethereum Up or Down - December 8, 7:15AM-7:30AM ET     61
Bitcoin Up or Down - December 8, 6:30AM-6:45AM ET      58
Ethereum Up or Down - December 8, 6:15AM-6:30AM ET     56
Ethereum Up or Down - December 8, 5:45AM-6:00AM ET     56
Bitcoin Up or Down - December 8, 5:45AM-6:00AM ET      54
Bitcoin Up or Down - December 8, 8:00AM-8:15AM ET      48
Ethereum Up or Down - December 8, 6:00AM-6:15AM ET     48
Ethereum Up or Down - December 8, 7AM

In [52]:
# Saatlik trade dagilimi
print("=" * 50)
print("SAATLIK TRADE DAGILIMI")
print("=" * 50)
df['hour'] = df['timestamp'].dt.floor('H')
hourly = df.groupby('hour').size()
print(f"Ortalama saatlik trade: {hourly.mean():.0f}")
print(f"Max saatlik trade: {hourly.max()}")
print(f"Min saatlik trade: {hourly.min()}")
print(f"\nSaatlik dagilim:")
print(hourly)


SAATLIK TRADE DAGILIMI
Ortalama saatlik trade: 375
Max saatlik trade: 635
Min saatlik trade: 85

Saatlik dagilim:
hour
2025-12-08 10:00:00    264
2025-12-08 11:00:00    635
2025-12-08 12:00:00    516
2025-12-08 13:00:00     85
dtype: int64


  df['hour'] = df['timestamp'].dt.floor('H')


In [53]:
# Ilk 10 trade
print("=" * 50)
print("ILK 10 TRADE (en eski)")
print("=" * 50)
display(df.nsmallest(10, 'timestamp')[['timestamp', 'side', 'market_question', 'outcome', 'amount', 'price', 'transaction_hash']])


ILK 10 TRADE (en eski)


Unnamed: 0,timestamp,side,market_question,outcome,amount,price,transaction_hash
1499,2025-12-08 10:32:03,BUY,"Bitcoin Up or Down - December 8, 5:30AM-5:45AM ET",Up,10.0,0.38,0xca571c55fc6b2a6fa609c9ff6b37ae88010fef39b4f2...
1497,2025-12-08 10:32:05,BUY,"Bitcoin Up or Down - December 8, 5:30AM-5:45AM ET",Down,7.74,0.59,0x643d0050fa70132ad0a7e6c67d796e1394b0635d37b8...
1498,2025-12-08 10:32:05,BUY,"Ethereum Up or Down - December 8, 5:30AM-5:45A...",Down,10.0,0.52,0x8c3d97460ba3a8c757e3396ff797db1cc6329114fdca...
1492,2025-12-08 10:32:11,BUY,"Bitcoin Up or Down - December 8, 5:30AM-5:45AM ET",Down,5.0,0.58,0x795fc74d80931e961ca04a67cc144474a28175f2abfe...
1493,2025-12-08 10:32:11,BUY,"Bitcoin Up or Down - December 8, 5:30AM-5:45AM ET",Down,16.0,0.61,0x868ee866a36edc5fbae8297b71b23f08d58a9a083173...
1494,2025-12-08 10:32:11,BUY,"Bitcoin Up or Down - December 8, 5:30AM-5:45AM ET",Down,16.0,0.61,0xd2c27426bc63ba26fa2c08ca3ae4508cd73680dacea3...
1495,2025-12-08 10:32:11,BUY,"Bitcoin Up or Down - December 8, 5:30AM-5:45AM ET",Down,16.0,0.61,0x98011ce323cbb07d7cdc82166e5e67a3ffe4b24e7fd5...
1496,2025-12-08 10:32:11,BUY,"Bitcoin Up or Down - December 8, 5:30AM-5:45AM ET",Down,10.0,0.61,0xbc40919bcd579a0780a7172197bcf807fa5784f8776a...
1491,2025-12-08 10:32:13,BUY,"Ethereum Up or Down - December 8, 5:30AM-5:45A...",Up,12.0,0.43,0x18379deb490aa497386459e7d026abdfe8db854c348d...
1490,2025-12-08 10:32:15,BUY,"Ethereum Up or Down - December 8, 5:30AM-5:45A...",Up,3.0,0.43,0x384e8ce987842c72717552a3d9c7634c06bfdbf217d6...


In [54]:
# Son 10 trade
print("=" * 50)
print("SON 10 TRADE (en yeni)")
print("=" * 50)
display(df.nlargest(10, 'timestamp')[['timestamp', 'side', 'market_question', 'outcome', 'amount', 'price', 'transaction_hash']])


SON 10 TRADE (en yeni)


Unnamed: 0,timestamp,side,market_question,outcome,amount,price,transaction_hash
0,2025-12-08 13:06:21,BUY,"Bitcoin Up or Down - December 8, 8:00AM-8:15AM ET",Down,5.0,0.23,0xb7531137cae968a4d7412a7887e0357b1e6b6f82f970...
1,2025-12-08 13:06:21,BUY,"Ethereum Up or Down - December 8, 8:00AM-8:15A...",Down,6.0,0.16,0xba60baca73b76c31d4c36c45e6eb6ef7ce6ccf81c16d...
2,2025-12-08 13:06:21,BUY,"Bitcoin Up or Down - December 8, 8:00AM-8:15AM ET",Down,16.0,0.23,0x596f6668daa4d9d57ccfb512876ee5fd6fcd3386a8cc...
3,2025-12-08 13:06:19,BUY,"Bitcoin Up or Down - December 8, 8AM ET",Down,16.0,0.39,0x5c0d152c07e5d836f8eaeac89391f2e3de38d1bcc4c1...
4,2025-12-08 13:06:19,BUY,"Bitcoin Up or Down - December 8, 8AM ET",Down,6.02,0.39,0x865f67ad5fc2c12f7f1ec2eba6ca1742b12a57e2ab14...
5,2025-12-08 13:06:19,BUY,"Bitcoin Up or Down - December 8, 8:00AM-8:15AM ET",Down,4.0,0.22,0x45ecc212808942bdddc32e66d9d187e0f83a0adc3a00...
6,2025-12-08 13:06:19,BUY,"Ethereum Up or Down - December 8, 8AM ET",Down,12.0,0.32,0x5cb2b6b682c4bc960b93251bba5f5dee7db551e3ba6a...
7,2025-12-08 13:06:17,BUY,"Bitcoin Up or Down - December 8, 8:00AM-8:15AM ET",Down,5.0,0.23,0xe4823b91254f87ec8ac11f59d5118399f4b743839ed5...
8,2025-12-08 13:06:15,BUY,"Ethereum Up or Down - December 8, 8:00AM-8:15A...",Up,12.0,0.85,0x7689a5d2f92ccaa406e2e3adebacc100f126b9ada473...
9,2025-12-08 13:06:15,BUY,"Ethereum Up or Down - December 8, 8:00AM-8:15A...",Up,12.0,0.85,0xb8ec1847c005455e99736e6f920618aca05e50623175...


In [55]:
# SONUC: Duplike olmayan benzersiz trade sayisi
print("=" * 50)
print("SONUC")
print("=" * 50)
unique_trades = df.drop_duplicates(subset=['transaction_hash'])
print(f"Toplam satir: {len(df):,}")
print(f"Benzersiz transaction_hash: {len(unique_trades):,}")
print(f"Duplike satir sayisi: {len(df) - len(unique_trades):,}")
print(f"Duplike orani: {(len(df) - len(unique_trades)) / len(df) * 100:.1f}%")


SONUC
Toplam satir: 1,500
Benzersiz transaction_hash: 1,500
Duplike satir sayisi: 0
Duplike orani: 0.0%


In [56]:
# Eger duplike varsa, duplike paternini incele
# Ayni timestamp + ayni market + ayni side = gercekten ayni trade mi?
if len(df) > len(unique_trades):
    print("=" * 50)
    print("DUPLIKE PATTERN ANALIZI")
    print("=" * 50)
    
    # Tam duplike satirlari bul (tum kolonlar ayni)
    full_duplicates = df[df.duplicated(keep=False)]
    print(f"Tam duplike satir sayisi (tum kolonlar ayni): {len(full_duplicates)}")
    
    # Sadece transaction_hash farkli mi kontrol et
    key_cols = ['timestamp', 'trader', 'side', 'market_slug', 'outcome', 'amount', 'price']
    partial_duplicates = df[df.duplicated(subset=key_cols, keep=False)]
    print(f"Anahtar kolonlar ayni olan satir sayisi: {len(partial_duplicates)}")
    
    if len(partial_duplicates) > 0:
        print("\nOrnek duplike grup:")
        sample = partial_duplicates.head(5)
        display(sample[key_cols + ['transaction_hash']])
