In [1]:
import requests
import pandas as pd
import numpy as np
import json
import time
import datetime
from pprint import pprint

In [2]:
pyusdc_feed_id = "0xc1da1b73d7f01e7ddd54b3766cf7fcd644395ad14f70aa706ec5384c59e76692"
usdc_feed_id = "0xeaa020c61cc479712813461ce153894a96a6c00b21ed0cfc2798d1f9a9e9c94a"
session = requests.Session()

In [3]:
def get_prices(timestamp: int, feed_token_a: str, feed_token_b: str, retries: int=3, backoff: float = 2.0, session=session):
  url = "https://benchmarks.pyth.network/v1/updates/price/"
  search = str(timestamp) + "?ids=" + feed_token_a + "&ids=" + feed_token_b + "&encoding=hex&parsed=true"
  url += search

  token_price_a, token_price_b, token_ema_a, token_ema_b = 0.0, 0.0, 0.0, 0.0

  for attempt in range(retries):
    try:
      res = session.get(url, timeout=20)
      if res.status_code == 429:
        wait = int(res.headers.get("Retry-After", 60))
        print(f"⚠️ Rate-limited. Waiting {wait}s before retrying...")
        time.sleep(wait)
        continue
      res.raise_for_status()
      data = res.json()
      break
    except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError) as e:
      print(f"⚠️ Attempt {attempt + 1}/{retries} failed: {e}")
      if attempt < retries - 1:
        sleep_time = backoff ** attempt
        print(f"⏳ Retrying in {sleep_time}s...")
        time.sleep(sleep_time)
      else:
        print("❌ All retries failed, skipping this timestamp.")
        return None
    except Exception as e:
      print(f"❌ Unexpected error: {e}")
      return token_price_a, token_price_b, token_ema_a, token_ema_b

  if not data.get('parsed', None):
    print("Price fetch failed :)")
    return 1

  parsed = {item['id']: item for item in data['parsed']}

  price_a = parsed[feed_token_a[2:]]
  price_b = parsed[feed_token_b[2:]]

  token_price_a = int(price_a['price']['price']) * 10 **(price_a['price']['expo'])
  token_price_b = int(price_b['price']['price']) * 10 **(price_b['price']['expo'])

  token_ema_a = int(price_a['ema_price']['price']) * 10 **(price_a['ema_price']['expo'])
  token_ema_b = int(price_b['ema_price']['price']) * 10 **(price_b['ema_price']['expo'])

  return token_price_a, token_price_b, token_ema_a, token_ema_b

In [5]:
def add_prices(df, token_feed_a, token_feed_b):
  batch_num = 0
  previous = 0
  cache = {}

  df['token_price_a'], df['token_price_b'], df['token_ema_a'], df['token_ema_b'] = 0.0, 0.0, 0.0, 0.0

  for idx, row in df.iterrows():
    print(f"Processing row {idx}")
    ts = int(row['block_time'])
    if ts not in cache:
      cache[ts] = get_prices(ts, token_feed_a, token_feed_b)
      time.sleep(0.4)
    result = cache[ts]
    df.loc[idx, 'token_price_a'], df.loc[idx, 'token_price_b'], df.loc[idx, 'token_ema_a'], df.loc[idx, 'token_ema_b'] = result[0], result[1], result[2], result[3]

    if (idx + 1) % 5000 == 0:
      filename = f"pyusd_usdc_with_price_{batch_num}.csv"
      df.iloc[:idx+1].to_csv(f"/content/drive/MyDrive/colosseum_hackathon/datasets/pyusd_usdc_pool/{filename}", index=False)
      print(f"✅ Saved progress at {idx + 1}")
      batch_num += 1

  return df


In [6]:
df = pd.read_csv("/content/drive/MyDrive/colosseum_hackathon/datasets/pyusd_usdc_pool/pyusd_usdc_batch_9.csv")

In [None]:
pyusd_usdc_df = add_prices(df, pyusdc_feed_id, usdc_feed_id)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processing row 4165
Processing row 4166
Processing row 4167
Processing row 4168
Processing row 4169
Processing row 4170
Processing row 4171
Processing row 4172
Processing row 4173
Processing row 4174
Processing row 4175
Processing row 4176
Processing row 4177
Processing row 4178
Processing row 4179
Processing row 4180
Processing row 4181
Processing row 4182
Processing row 4183
Processing row 4184
Processing row 4185
Processing row 4186
Processing row 4187
Processing row 4188
Processing row 4189
Processing row 4190
Processing row 4191
Processing row 4192
Processing row 4193
Processing row 4194
Processing row 4195
Processing row 4196
Processing row 4197
Processing row 4198
Processing row 4199
Processing row 4200
Processing row 4201
Processing row 4202
Processing row 4203
Processing row 4204
Processing row 4205
Processing row 4206
Processing row 4207
Processing row 4208
Processing row 4209
Processing row 4210
Processing row 

In [21]:
sol_pengu_df.to_csv("/content/drive/MyDrive/colosseum_hackathon/datasets/sol_usdc_pool/sol_usdc_swaps.csv", index=False)