In [None]:
import requests
import pandas as pd
from pymongo import MongoClient
from dotenv import load_dotenv
import os

load_dotenv()

In [None]:
# Configuration constants
MONGO_URL = os.getenv('MONGO_URL')
if not MONGO_URL:
    raise ValueError("MONGO_URL environment variable is not set")
DB_NAME = "gemrate"
MARKET_INFO_COLLECTION = "ebay_graded_items"
INDEX_API_URL = "https://price.collectorcrypt.com/api/indexes/modern"
MARKET_FILE = "market.csv"
INDEX_FILE = "index.csv"

client = MongoClient(MONGO_URL)
db = client[DB_NAME]
collection = db[MARKET_INFO_COLLECTION]

In [None]:
pipeline = [
    {
        "$match": {
            "gemrate_hybrid_data.specid": {"$exists": True},
            "item_data.format": "auction",
            "gemrate_hybrid_data": {"$exists": True},
            "item_data": {"$exists": True},
            "gemrate_data": {"$exists": True}
        }
    },
    {
        "$project": {
            "gemrate_hybrid_data.specid": 1,
            "item_data.date": 1,
            "grading_company": 1,
            "gemrate_data.grade": 1,
            "item_data.price": 1,
            "item_data.number_of_bids": 1,
            "item_data.seller_name": 1,
            "item_data.best_offer_accepted": 1,
            "_id": 1
        }
    }
]

results = collection.aggregate(pipeline, maxTimeMS=6000000, allowDiskUse=True)
df = pd.DataFrame(list(results))
df = pd.json_normalize(df.to_dict('records'))
print(f"Processing complete. Rows loaded: {len(df)}")
df

In [None]:
df.to_csv(MARKET_FILE, index=False)

In [None]:
def fetch_index_data(url):
    """
    Downloads index price data from the API and returns as a DataFrame.
    
    Args:
        url: API endpoint URL
    
    Returns:
        DataFrame with 'date' and 'index_value' columns
    """
    response = requests.get(url)
    response.raise_for_status()
    index_data = response.json()
    
    index_df = pd.DataFrame(index_data)
    index_df['date'] = pd.to_datetime(index_df['date'])
    index_df = index_df.rename(columns={'value': 'index_value'})
    
    print(f"Downloaded {len(index_df)} index data points")
    print(f"Index date range: {index_df['date'].min()} to {index_df['date'].max()}")
    
    return index_df

index_df = fetch_index_data(INDEX_API_URL)
index_df.to_csv(INDEX_FILE, index=False)
print("Saved to index.csv")
index_df.head()

Downloaded 86 index data points
Index date range: 2025-09-08 00:00:00 to 2025-12-04 00:00:00
Saved to index.csv


Unnamed: 0,date,index_value
0,2025-09-08,1000.0
1,2025-09-09,1022.056562
2,2025-09-10,1039.675686
3,2025-09-11,1031.257319
4,2025-09-12,944.969578
