# Prosperity 3 Daily VWAP Price Analysis

This notebook extracts unique daily VWAP prices by product and day from Prosperity 3 data.

In [10]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

## Loading and Processing Prosperity 3 Data

First, we'll define functions to load and process the price data from Prosperity 3.

In [11]:
# Define the data path
datapath = "Prosperity 3 Data/"

def load_price_data(round_num, day_num):
    """
    Load price data for a specific round and day.
    
    Parameters:
        round_num (int): Round number
        day_num (int): Day number
        
    Returns:
        pd.DataFrame: DataFrame with price data
    """
    filename = f"Round {round_num}/prices_round_{round_num}_day_{day_num}.csv"
    filepath = os.path.join(datapath, filename)
    
    if not os.path.exists(filepath):
        print(f"Data file {filename} not found in {datapath}")
        return None
    
    # Load the data into a pandas DataFrame
    data = pd.read_csv(filepath, sep=';')
    
    # Add day information
    if 'day' in data.columns:
        # Use existing day column
        data['DAY'] = data['day']
    else:
        data['DAY'] = day_num
    
    return data

def load_all_price_data(round_num):
    """
    Load all price data for a specific round.
    
    Parameters:
        round_num (int): Round number
        
    Returns:
        pd.DataFrame: DataFrame with all price data
    """
    all_data = pd.DataFrame()
    
    for day_num in range(-2, 1):
        data = load_price_data(round_num, day_num)
        if data is not None:
            # Add day offset to timestamp for continuity
            data['timestamp'] += np.power(10, 6) * (day_num+2)
            all_data = pd.concat([all_data, data])
    
    return all_data

In [12]:
def calculate_vwap(data, product):
    """
    Calculate the Volume-Weighted Average Price (VWAP) for a specific product.
    
    Parameters:
        data (pd.DataFrame): DataFrame with price data
        product (str): Product name
        
    Returns:
        pd.Series: VWAP series
    """
    # Filter for the specific product
    product_data = data[data['product'] == product].copy().fillna(0)
    if len(product_data) == 0:
        print(f"No data found for product {product}")
        return pd.DataFrame()
    
    # Check if mid_price is already in the dataframe
    if 'mid_price' not in product_data.columns:
        # Calculate mid price
        product_data['mid_price'] = (product_data['ask_price_1'] + product_data['bid_price_1']) / 2
    
    # Calculate volume - handle missing columns gracefully
    volume_cols = ['ask_volume_1', 'ask_volume_2', 'ask_volume_3', 'bid_volume_1', 'bid_volume_2', 'bid_volume_3']
    for col in volume_cols:
        if col not in product_data.columns:
            product_data[col] = 0
    
    product_data['volume'] = (
        product_data['ask_volume_1'] + product_data['ask_volume_2'] + product_data['ask_volume_3'] +
        product_data['bid_volume_1'] + product_data['bid_volume_2'] + product_data['bid_volume_3']
    )
    dolvol = 0

    for i in range(1, 4):
        dolvol += product_data[f'ask_price_{i}'] * product_data[f'ask_volume_{i}']
        dolvol += product_data[f'bid_price_{i}'] * product_data[f'bid_volume_{i}']
    # Calculate VWAP
    product_data['price_volume'] = dolvol
    vwap = product_data.groupby('timestamp')['price_volume'].sum() / product_data.groupby('timestamp')['volume'].sum()
    
    # Add day information
    day_info = product_data.groupby('timestamp')['DAY'].first()
    vwap_df = pd.DataFrame({'vwap': vwap, 'day': day_info})
    
    return vwap_df

### Load Prosperity 3 Data

In [13]:
# Load Prosperity 3 data
print("Loading Prosperity 3 data...")
try:
    prosperity3_data = load_all_price_data(3)
    if len(prosperity3_data) > 0:
        print(f"Loaded {len(prosperity3_data)} rows of data")
        
        # Get unique products
        prosperity3_products = prosperity3_data['product'].unique()
        print(f"Unique products in Prosperity 3: {prosperity3_products}")
    else:
        print("No data found for Prosperity 3")
        prosperity3_products = []
except Exception as e:
    print(f"Error loading Prosperity 3 data: {e}")
    prosperity3_data = pd.DataFrame()
    prosperity3_products = []

Loading Prosperity 3 data...
Data file Round 3/prices_round_3_day_-2.csv not found in Prosperity 3 Data/
Data file Round 3/prices_round_3_day_-1.csv not found in Prosperity 3 Data/
Loaded 140000 rows of data
Unique products in Prosperity 3: ['VOLCANIC_ROCK_VOUCHER_10500' 'DJEMBES' 'CROISSANTS' 'JAMS'
 'VOLCANIC_ROCK_VOUCHER_10000' 'KELP' 'VOLCANIC_ROCK_VOUCHER_9750'
 'PICNIC_BASKET1' 'PICNIC_BASKET2' 'VOLCANIC_ROCK_VOUCHER_9500'
 'RAINFOREST_RESIN' 'VOLCANIC_ROCK_VOUCHER_10250' 'SQUID_INK'
 'VOLCANIC_ROCK']


In [14]:
# Calculate VWAP for each product in Prosperity 3
prosperity3_vwap = {}

if len(prosperity3_products) > 0:
    for product in prosperity3_products:
        print(f"Calculating VWAP for {product}...")
        vwap_df = calculate_vwap(prosperity3_data, product)
        if not vwap_df.empty:
            prosperity3_vwap[product] = vwap_df
            print(f"  Calculated VWAP with {len(vwap_df)} data points")
        else:
            print(f"  No VWAP data for {product}")
else:
    print("No products found in Prosperity 3 data")

Calculating VWAP for VOLCANIC_ROCK_VOUCHER_10500...
  Calculated VWAP with 10000 data points
Calculating VWAP for DJEMBES...
  Calculated VWAP with 10000 data points
Calculating VWAP for CROISSANTS...
  Calculated VWAP with 10000 data points
Calculating VWAP for JAMS...
  Calculated VWAP with 10000 data points
Calculating VWAP for VOLCANIC_ROCK_VOUCHER_10000...
  Calculated VWAP with 10000 data points
Calculating VWAP for KELP...
  Calculated VWAP with 10000 data points
Calculating VWAP for VOLCANIC_ROCK_VOUCHER_9750...
  Calculated VWAP with 10000 data points
Calculating VWAP for PICNIC_BASKET1...
  Calculated VWAP with 10000 data points
Calculating VWAP for PICNIC_BASKET2...
  Calculated VWAP with 10000 data points
Calculating VWAP for VOLCANIC_ROCK_VOUCHER_9500...
  Calculated VWAP with 10000 data points
Calculating VWAP for RAINFOREST_RESIN...
  Calculated VWAP with 10000 data points
Calculating VWAP for VOLCANIC_ROCK_VOUCHER_10250...
  Calculated VWAP with 10000 data points
Calcul

In [16]:
# Create a timestamp-level VWAP dataframe for Prosperity 3
prosperity3_timestamp_df = pd.DataFrame()

if len(prosperity3_vwap) > 0:
    # First, collect all unique timestamps across all products
    all_timestamps = set()
    for product, vwap_df in prosperity3_vwap.items():
        all_timestamps.update(vwap_df.index)
    
    # Create a DataFrame with all timestamps as index
    prosperity3_timestamp_df = pd.DataFrame(index=sorted(list(all_timestamps)))
    
    # Add each product's VWAP data
    for product, vwap_df in prosperity3_vwap.items():
        # For each day, create a column with product_d# naming
        for day in vwap_df['day'].unique():
            day_num = int(day) + 3  # Convert from -2,-1,0 to 1,2,3
            col_name = f"{product.lower()}_d{day_num}"
            
            # Get VWAP values for this product and day
            day_data = vwap_df[vwap_df['day'] == day]
            prosperity3_timestamp_df[col_name] = day_data['vwap']
    
    print(f"Created timestamp VWAP dataframe with {len(prosperity3_timestamp_df.columns)} columns")
    print(f"Number of timestamps: {len(prosperity3_timestamp_df)}")
    
    # Save the data to CSV
    prosperity3_timestamp_df.to_csv('prosperity3_timestamp_vwap.csv')
    print("Saved Prosperity 3 timestamp VWAP data to prosperity3_timestamp_vwap.csv")
else:
    print("No VWAP data available for Prosperity 3")

Created timestamp VWAP dataframe with 14 columns
Number of timestamps: 10000
Saved Prosperity 3 timestamp VWAP data to prosperity3_timestamp_vwap.csv


## Compare with Prosperity 1 and 2 Data

In [19]:
# Load the Prosperity 1 and 2 data
try:
    prosperity12_daily_df = pd.read_csv('prosperity12_daily_vwap.csv')
    print("Loaded Prosperity 1 and 2 daily VWAP data")
    print(f"Shape: {prosperity12_daily_df.shape}")
    
    # Display the first few rows
    prosperity12_daily_df.head()
except FileNotFoundError:
    print("Prosperity 1 and 2 data file not found. Please run the other notebook first.")

Loaded Prosperity 1 and 2 daily VWAP data
Shape: (1, 10)


In [18]:
# Find common products between Prosperity 1, 2, and 3
if 'prosperity12_daily_df' in locals() and not prosperity12_daily_df.empty:
    p12_products = set([col.split('_d')[0] for col in prosperity12_daily_df.columns])
    
    if not prosperity3_daily_df_renamed.empty:
        p3_products = set([col.split('_d')[0] for col in prosperity3_daily_df_renamed.columns])
        
        common_products = p12_products.intersection(p3_products)
        print(f"Common products: {common_products}")
        
        p12_only = p12_products - p3_products
        print(f"Products only in Prosperity 1 & 2: {p12_only}")
        
        p3_only = p3_products - p12_products
        print(f"Products only in Prosperity 3: {p3_only}")
    else:
        print("No Prosperity 3 data available for comparison")
        print(f"Products in Prosperity 1 & 2: {p12_products}")
else:
    print("No Prosperity 1 & 2 data available for comparison")

NameError: name 'prosperity3_daily_df_renamed' is not defined