# 01_data_exploration.ipynb

## Project: Bitcoin in a 3-Asset Portfolio (BTC, S&P 500, Gold)

### Main Objective:
The objective of this notebook is to build and prepare two datasets that will be used in the replication of Philipp Schottler's study on Bitcoin in a diversified portfolio.

This notebook is focused on data import, cleaning, transformation, and aggregation to a monthly frequency.

---

## Study Replication:
Philipp Schottler analyzes the role of Bitcoin within a diversified portfolio consisting of Bitcoin, the S&P 500, and Gold. His study focuses on the period between September 2014 and November 2021.

---

## This notebook will create two datasets:

1. `final_df_study`: Dataset restricted to the original study period → from September 2014 to November 2021.

2. `final_df_extended`: Extended dataset including all available data → from September 2014 to December 2024 (latest available date for Gold data).

---

## This notebook covers:
- Import and cleaning of data for:
  - Bitcoin (BTC)
  - S&P 500
  - Gold (XAU/USD)
  - CPI (Inflation Index)

- Transformation of the data to monthly frequency.

- Calculation of monthly average prices for each asset.

- Creation of the final datasets ready for:
  - Study Replication
  - Extended Analysis (including recent data)

---

## The full analysis will be developed in the next notebook:

> `02_study_replication.ipynb`

---


In [47]:
import os
import time
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
from pyspark.sql import SparkSession
from pyspark.sql import functions as F

In [48]:
# Crear carpeta temporal Spark
temp_path = os.path.join(os.getcwd(), 'spark-temp')
os.makedirs(temp_path, exist_ok=True)

# Definir variables de entorno
os.environ['JAVA_HOME'] = os.environ['CONDA_PREFIX'] + '\Library'
os.environ['SPARK_LOCAL_DIRS'] = temp_path

print('JAVA_HOME:', os.environ.get('JAVA_HOME'))
print('SPARK_LOCAL_DIRS:', os.environ.get('SPARK_LOCAL_DIRS'))

JAVA_HOME: C:\Users\TESTER\anaconda3\envs\btc_portfolio\Library
SPARK_LOCAL_DIRS: C:\Users\TESTER\Desktop\Laboral\GIT\btc-3-asset-portfolio-extension\notebooks\spark-temp


In [49]:
# Crear Spark Session y medir tiempo
start_time = time.time()

spark = SparkSession.builder \
.appName('btcproject') \
.config('spark.driver.memory', '512m') \
.config('spark.executor.memory', '512m') \
.config('spark.local.dir', temp_path) \
.getOrCreate()

spark.sparkContext.setLogLevel('ERROR')

end_time = time.time()
print('Spark Version:', spark.version)
print(f'Tiempo total en crear SparkSession: {round(end_time - start_time, 2)} segundos')

Spark Version: 3.5.4
Tiempo total en crear SparkSession: 0.01 segundos


# Data Cleaning
## Btc Dataset

In [50]:
df_btc = spark.read \
.option("header", True) \
.option("sep", ";") \
.option("inferSchema", True) \
.csv('../data/BTC_All_graph_coinmarketcap.csv')

df_btc.show(5)
df_btc.printSchema()
df_btc.select(
    F.min("timestamp").alias("Fecha Minima"),
    F.max("timestamp").alias("Fecha Maxima")
).show()

+----+--------------+--------------+--------------+--------------+------+---------------+-------------------+
|name|          open|          high|           low|         close|volume|      marketCap|          timestamp|
+----+--------------+--------------+--------------+--------------+------+---------------+-------------------+
|2781|135.3000030518|147.4880065918|132.1000061035|         139.0|   0.0|  1.542813125E9|2013-04-01 02:00:00|
|2781|         139.0|139.8899993896| 79.0999984741|         129.0|   0.0|    1.4478702E9|2013-05-01 02:00:00|
|2781|128.8150024414|129.7799987793|          88.5| 96.6139984131|   0.0|1.09695777135E9|2013-06-01 02:00:00|
|2781| 97.5100021362|  111.34400177| 65.5260009766|106.0899963379|   0.0| 1.2187778335E9|2013-07-01 02:00:00|
|2781|106.2129974365|140.8899993896|101.2109985352|135.3500061035|   0.0|1.57467205125E9|2013-08-01 02:00:00|
+----+--------------+--------------+--------------+--------------+------+---------------+-------------------+
only showi

In [51]:
df_btc.select([
    F.count(F.when(F.col(c).isNull(), c)).alias(f"{c}_nulls") 
    for c in ["open", "high", "low", "close"]
]).show()

+----------+----------+---------+-----------+
|open_nulls|high_nulls|low_nulls|close_nulls|
+----------+----------+---------+-----------+
|         0|         0|        0|          0|
+----------+----------+---------+-----------+



In [52]:
monthly_df_btc = (
    df_btc
    .groupBy(F.date_trunc("month", "timestamp").alias("month"))
    .agg(
        F.avg("close").alias("avg_close_btc"),
        F.avg("high").alias("avg_high_btc"),
        F.avg("low").alias("avg_low_btc"),
        F.avg("volume").alias("avg_volume_btc"),
        F.avg("marketCap").alias("avg_marketCap_btc")
    )
    .withColumn("month", F.col("month").cast("date"))
)


monthly_df_btc_study = monthly_df_btc.filter(
    (F.col("month") >= "2014-09-01") & (F.col("month") <= "2021-11-01")
)

monthly_df_btc_extended = monthly_df_btc.filter(
    (F.col("month") >= "2014-09-01") & (F.col("month") <= "2024-12-01")
)

monthly_df_btc_new = monthly_df_btc.filter(
    (F.col("month") >= "2021-11-01") & (F.col("month") <= "2024-12-01")
)

monthly_df_btc_study.show()

+----------+---------------+----------------+----------------+-------------------+------------------+
|     month|  avg_close_btc|    avg_high_btc|     avg_low_btc|     avg_volume_btc| avg_marketCap_btc|
+----------+---------------+----------------+----------------+-------------------+------------------+
|2018-09-01|        6625.56|         7388.43|         6197.52|          4.00228E9|   1.1460851947E11|
|2020-01-01|  9350.52936518|   9553.12613251|   6914.99590793|  2.943248971913E10|1.7011277816132E11|
|2015-05-01| 230.1900024414|  247.8040008545|  228.5729980469|          1.47308E7|   3.27375642525E9|
|2016-02-01| 437.6969909668|  448.0459899902|  367.9570007324|          6.06947E7|     6.681444705E9|
|2020-05-01|  9461.05891806|   9996.74335304|   8374.32297508|  2.777329029851E10|1.7399715192968E11|
|2016-05-01| 531.3859863281|  553.9600219727|  437.3890075684|           1.3845E8|   8.29303575505E9|
|2018-05-01| 7494.169921875|          9964.5| 7090.6801757813|      5.127130112E9|

## SP 500 Dataset

In [53]:
df_sp500 = spark.read \
.option("header", True) \
.option("inferSchema", True) \
.csv('../data/spy_sp500.csv')

df_sp500.show(5)
df_sp500.printSchema()
df_sp500.select(
    F.min("Date").alias("Fecha Minima"),
    F.max("Date").alias("Fecha Maxima")
).show()

+----------+------------------+------------------+------------------+------------------+-------+---+-------+----+-----+----+
|      Date|              Open|              High|               Low|             Close| Volume|Day|Weekday|Week|Month|Year|
+----------+------------------+------------------+------------------+------------------+-------+---+-------+----+-----+----+
|1993-01-29|24.543517320288366|24.543517320288366|24.421410268943646|24.526073455810547|1003200| 29|      4|   4|    1|1993|
|1993-02-01|24.543515257916205|24.700510025024414|24.543515257916205|24.700510025024414| 480500|  1|      0|   5|    2|1993|
|1993-02-02|24.683072202385436|24.770291538789625|24.630740600542925| 24.75284767150879| 201300|  2|      1|   5|    2|1993|
|1993-02-03|24.787723774025462|25.031937801355763| 24.77027991493044|25.014493942260742| 529400|  3|      2|   5|    2|1993|
|1993-02-04|25.101717743608685|25.171493192513783|24.822615947988297| 25.11916160583496| 531500|  4|      3|   5|    2|1993|


In [54]:
df_sp500.select([
    F.count(F.when(F.col(c).isNull(), c)).alias(f"{c}_nulls") 
    for c in ["open", "high", "low", "close"]
]).show()

+----------+----------+---------+-----------+
|open_nulls|high_nulls|low_nulls|close_nulls|
+----------+----------+---------+-----------+
|         0|         0|        0|          0|
+----------+----------+---------+-----------+



In [55]:
monthly_df_sp500 = (
    df_sp500
    .groupBy(F.date_trunc("month", "Date").alias("month"))
    .agg(
        F.avg("Close").alias("avg_close_sp500"),
        F.avg("High").alias("avg_high_sp500"),
        F.avg("Low").alias("avg_low_sp500"),
        F.avg("Volume").alias("avg_volume_sp500")
    )
    .withColumn("month", F.col("month").cast("date"))
    .orderBy(F.desc("month"))
)

monthly_df_sp500_study = monthly_df_sp500.filter(
    (F.col("month") >= "2014-09-01") & (F.col("month") <= "2021-11-01")
)

monthly_df_sp500_extended = monthly_df_sp500.filter(
    (F.col("month") >= "2014-09-01") & (F.col("month") <= "2024-12-01")
)

monthly_df_sp500_new = monthly_df_sp500.filter(
    (F.col("month") >= "2021-11-01") & (F.col("month") <= "2024-12-01")
)

monthly_df_sp500_study.show()


+----------+------------------+------------------+------------------+--------------------+
|     month|   avg_close_sp500|    avg_high_sp500|     avg_low_sp500|    avg_volume_sp500|
+----------+------------------+------------------+------------------+--------------------+
|2021-11-01| 444.1805172874814|446.05744520951407| 442.4339263312481|6.3588166666666664E7|
|2021-10-01|424.12422543480284|425.63512179095517|421.67282235552295|           7.18412E7|
|2021-09-01| 422.2905680338542|424.83313808331746|420.64819539532715| 8.312188571428572E7|
|2021-08-01|422.57470148259944|423.56206086341194| 420.8363430779173| 5.700006363636363E7|
|2021-07-01| 413.6143362862723|414.81123224994406| 411.3215066278909| 6.771927142857143E7|
|2021-06-01|  401.328969782049| 402.3121021813759|  399.807237304437| 5.827965454545455E7|
|2021-05-01|394.15084686279295|395.94878713156766|392.14634263989615|         7.7361795E7|
|2021-04-01| 391.2456984747024|392.24783102138616| 389.4652461354201|  6.96241238095238E7|

## Gold USD 

In [56]:
df_gold = spark.read \
.option("header", True) \
.option("sep", ",") \
.option("inferSchema", True) \
.csv('../data/XAU_USD Historical Data.csv')

df_gold.show(5)
df_gold.printSchema()
df_gold.select(
    F.min("Date").alias("Fecha Minima"),
    F.max("Date").alias("Fecha Maxima")
).show()

+----------+--------+--------+--------+--------+----+--------+
|      Date|   Price|    Open|    High|     Low|Vol.|Change %|
+----------+--------+--------+--------+--------+----+--------+
|04/11/2025|3,224.27|3,189.01|3,245.36|3,185.23|NULL|   1.11%|
|04/10/2025|3,188.98|3,083.75|3,191.43|3,071.03|NULL|   3.47%|
|04/09/2025|3,082.18|2,984.08|3,099.87|2,970.01|NULL|   3.30%|
|04/08/2025|2,983.78|2,983.39|3,022.84|2,974.45|NULL|   0.04%|
|04/07/2025|2,982.54|3,038.02|3,055.88|2,956.60|NULL|  -1.80%|
+----------+--------+--------+--------+--------+----+--------+
only showing top 5 rows

root
 |-- Date: string (nullable = true)
 |-- Price: string (nullable = true)
 |-- Open: string (nullable = true)
 |-- High: string (nullable = true)
 |-- Low: string (nullable = true)
 |-- Vol.: string (nullable = true)
 |-- Change %: string (nullable = true)

+------------+------------+
|Fecha Minima|Fecha Maxima|
+------------+------------+
|  01/01/2014|  12/31/2024|
+------------+------------+



In [57]:
df_gold = (
    df_gold
    .withColumn("Date", F.to_date(F.col("Date"), "MM/dd/yyyy"))
    .withColumn("Price", F.regexp_replace(F.col("Price"), ",", "").cast("double"))
    .withColumn("Open", F.regexp_replace(F.col("Open"), ",", "").cast("double"))
    .withColumn("High", F.regexp_replace(F.col("High"), ",", "").cast("double"))
    .withColumn("Low", F.regexp_replace(F.col("Low"), ",", "").cast("double"))
)

df_gold.printSchema()
df_gold.show(5)

root
 |-- Date: date (nullable = true)
 |-- Price: double (nullable = true)
 |-- Open: double (nullable = true)
 |-- High: double (nullable = true)
 |-- Low: double (nullable = true)
 |-- Vol.: string (nullable = true)
 |-- Change %: string (nullable = true)

+----------+-------+-------+-------+-------+----+--------+
|      Date|  Price|   Open|   High|    Low|Vol.|Change %|
+----------+-------+-------+-------+-------+----+--------+
|2025-04-11|3224.27|3189.01|3245.36|3185.23|NULL|   1.11%|
|2025-04-10|3188.98|3083.75|3191.43|3071.03|NULL|   3.47%|
|2025-04-09|3082.18|2984.08|3099.87|2970.01|NULL|   3.30%|
|2025-04-08|2983.78|2983.39|3022.84|2974.45|NULL|   0.04%|
|2025-04-07|2982.54|3038.02|3055.88| 2956.6|NULL|  -1.80%|
+----------+-------+-------+-------+-------+----+--------+
only showing top 5 rows



In [58]:
monthly_df_gold = (
    df_gold
    .groupBy(F.date_trunc("month", "Date").alias("month"))
    .agg(
        F.avg("Price").alias("avg_price_gold"),
        F.avg("Open").alias("avg_open_gold"),
        F.avg("High").alias("avg_high_gold"),
        F.avg("Low").alias("avg_low_gold")
    )
    .withColumn("month", F.col("month").cast("date"))
    .orderBy("month")
)

monthly_df_gold_study = monthly_df_gold.filter(
    (F.col("month") >= "2014-09-01") & (F.col("month") <= "2021-11-01")
)

monthly_df_gold_extended = monthly_df_gold.filter(
    (F.col("month") >= "2014-09-01") & (F.col("month") <= "2024-12-01")
)

monthly_df_gold_new = monthly_df_gold.filter(
    (F.col("month") >= "2021-11-01") & (F.col("month") <= "2024-12-31")
)

monthly_df_gold_study.show(5)


+----------+------------------+------------------+------------------+------------------+
|     month|    avg_price_gold|     avg_open_gold|     avg_high_gold|      avg_low_gold|
+----------+------------------+------------------+------------------+------------------+
|2014-09-01|1238.0895454545455|1241.5922727272725|1247.2545454545452| 1232.622727272727|
|2014-10-01|1223.1108695652176|1224.8213043478263|1231.7330434782612|1215.9695652173912|
|2014-11-01|         1176.8785|1177.3314999999998|1187.0754999999997|1166.0729999999999|
|2014-12-01|1199.4636363636364|1198.7154545454546|1212.0322727272726|1188.0995454545455|
|2015-01-01|1251.5736363636365| 1247.011818181818|1259.1463636363637|           1238.46|
+----------+------------------+------------------+------------------+------------------+
only showing top 5 rows



## CPI dataset

### This DataFrame has:
### 1) observation_date: The date.
### 2) CPIAUCSL_NBD20140101: The CPI rebased so January 1, 2014 = 100.
### A value of 100.11008 on 2014-02-01 means prices increased by ~0.11008% since 2014-01-01.



In [59]:
df_cpi = spark.read \
.option("header", True) \
.option("inferSchema", True) \
.csv('../data/CPI_US.csv')

df_cpi.show(5)
df_cpi.printSchema()
df_cpi.select(
    F.min("observation_date").alias("Fecha Minima"),
    F.max("observation_date").alias("Fecha Maxima")
).show()

+----------------+--------------------+
|observation_date|CPIAUCSL_NBD20140101|
+----------------+--------------------+
|      2014-01-01|               100.0|
|      2014-02-01|           100.11008|
|      2014-03-01|           100.31451|
|      2014-04-01|           100.50151|
|      2014-05-01|           100.69277|
+----------------+--------------------+
only showing top 5 rows

root
 |-- observation_date: date (nullable = true)
 |-- CPIAUCSL_NBD20140101: double (nullable = true)

+------------+------------+
|Fecha Minima|Fecha Maxima|
+------------+------------+
|  2014-01-01|  2025-03-01|
+------------+------------+



In [60]:
monthly_df_cpi = (
    df_cpi
    .withColumnRenamed("observation_date", "month")
    .withColumnRenamed("CPIAUCSL_NBD20140101", "avg_cpi")
    .orderBy(F.desc("month"))
)

monthly_df_cpi_study = monthly_df_cpi.filter(
    (F.col("month") >= "2014-09-01") & (F.col("month") <= "2021-11-01")
)

monthly_df_cpi_extended = monthly_df_cpi.filter(
    (F.col("month") >= "2014-09-01") & (F.col("month") <= "2024-12-01")
)

monthly_df_cpi_new = monthly_df_cpi.filter(
    (F.col("month") >= "2021-11-01") & (F.col("month") <= "2024-12-01")
)

monthly_df_cpi_study.show(5)


+----------+---------+
|     month|  avg_cpi|
+----------+---------+
|2021-11-01|118.50328|
|2021-10-01|117.52746|
|2021-09-01|116.42838|
|2021-08-01|115.92261|
|2021-07-01|115.58813|
+----------+---------+
only showing top 5 rows



# Join Final

In [61]:
# Final Dataset - Study Period (2014-09 to 2021-11)
final_df_study = (
    monthly_df_btc_study
    .join(monthly_df_sp500_study, on="month", how="inner")
    .join(monthly_df_gold_study, on="month", how="inner")
    .join(monthly_df_cpi_study, on="month", how="inner")
    .orderBy("month")
)

# Final Dataset - Extended Period (2014-09 to 2024-12)
final_df_extended = (
    monthly_df_btc_extended
    .join(monthly_df_sp500_extended, on="month", how="inner")
    .join(monthly_df_gold_extended, on="month", how="inner")
    .join(monthly_df_cpi_extended, on="month", how="inner")
    .orderBy("month")
)


final_df_study.toPandas().to_csv("../data/final_df_study.csv", index=False)
final_df_extended.toPandas().to_csv("../data/final_df_extended.csv", index=False)


In [62]:
print("Study Dataset (2014-09 to 2021-11)")
final_df_study.toPandas()

Study Dataset (2014-09 to 2021-11)


Unnamed: 0,month,avg_close_btc,avg_high_btc,avg_low_btc,avg_volume_btc,avg_marketCap_btc,avg_close_sp500,avg_high_sp500,avg_low_sp500,avg_volume_sp500,avg_price_gold,avg_open_gold,avg_high_gold,avg_low_gold,avg_cpi
0,2014-09-01,386.944000,493.928009,372.239990,3.470730e+07,5.158621e+09,166.232205,166.869427,165.517781,1.003077e+08,1238.089545,1241.592273,1247.254545,1232.622727,100.93035
1,2014-10-01,338.321014,411.697998,289.295990,1.254540e+07,4.549893e+09,161.773412,162.867610,160.568778,1.714377e+08,1223.110870,1224.821304,1231.733043,1215.969565,100.91037
2,2014-11-01,378.046997,457.092987,320.626007,9.194440e+06,5.125958e+09,171.014914,171.322306,170.375505,8.268904e+07,1176.878500,1177.331500,1187.075500,1166.073000,100.72039
3,2014-12-01,320.192993,384.037994,304.231995,1.394290e+07,4.377511e+09,172.147657,173.362337,171.450040,1.384169e+08,1199.463636,1198.715455,1212.032273,1188.099545,100.40971
4,2015-01-01,217.464005,320.434998,171.509995,2.334820e+07,2.997692e+09,170.200545,171.452795,168.986943,1.591753e+08,1251.573636,1247.011818,1259.146364,1238.460000,99.77007
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,2021-07-01,41626.195676,42235.547709,29360.955838,8.779790e+11,7.814314e+11,413.614336,414.811232,411.321507,6.771927e+07,1805.646364,1804.011818,1814.727273,1795.146818,115.58813
83,2021-08-01,47166.687945,50482.076408,37458.003993,1.065642e+12,8.868699e+11,422.574701,423.562061,420.836343,5.700006e+07,1786.160000,1786.463182,1796.674545,1773.033182,115.92261
84,2021-09-01,43790.895625,52853.763796,39787.609798,1.102140e+12,8.246192e+11,422.290568,424.833138,420.648195,8.312189e+07,1777.277727,1780.397727,1791.654545,1767.758636,116.42838
85,2021-10-01,61318.957767,66930.387271,43320.022979,1.153078e+12,1.156486e+12,424.124225,425.635122,421.672822,7.184120e+07,1776.299524,1775.510000,1786.997619,1765.320952,117.52746


In [63]:

print("Extended Dataset (2014-09 to 2024-12)")
final_df_extended.toPandas()

Extended Dataset (2014-09 to 2024-12)


Unnamed: 0,month,avg_close_btc,avg_high_btc,avg_low_btc,avg_volume_btc,avg_marketCap_btc,avg_close_sp500,avg_high_sp500,avg_low_sp500,avg_volume_sp500,avg_price_gold,avg_open_gold,avg_high_gold,avg_low_gold,avg_cpi
0,2014-09-01,386.944000,493.928009,372.239990,3.470730e+07,5.158621e+09,166.232205,166.869427,165.517781,1.003077e+08,1238.089545,1241.592273,1247.254545,1232.622727,100.93035
1,2014-10-01,338.321014,411.697998,289.295990,1.254540e+07,4.549893e+09,161.773412,162.867610,160.568778,1.714377e+08,1223.110870,1224.821304,1231.733043,1215.969565,100.91037
2,2014-11-01,378.046997,457.092987,320.626007,9.194440e+06,5.125958e+09,171.014914,171.322306,170.375505,8.268904e+07,1176.878500,1177.331500,1187.075500,1166.073000,100.72039
3,2014-12-01,320.192993,384.037994,304.231995,1.394290e+07,4.377511e+09,172.147657,173.362337,171.450040,1.384169e+08,1199.463636,1198.715455,1212.032273,1188.099545,100.40971
4,2015-01-01,217.464005,320.434998,171.509995,2.334820e+07,2.997692e+09,170.200545,171.452795,168.986943,1.591753e+08,1251.573636,1247.011818,1259.146364,1238.460000,99.77007
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119,2024-08-01,58969.898366,65593.244771,49121.237378,1.076531e+12,1.164523e+12,543.052790,546.400317,539.118593,5.657268e+07,2471.187727,2468.971818,2487.876364,2450.023636,133.50915
120,2024-09-01,63329.498129,66480.694710,52598.699662,9.352285e+11,1.251462e+12,557.957886,560.639557,554.611596,5.225307e+07,2571.027143,2565.121429,2583.977143,2552.207619,133.81515
121,2024-10-01,70215.185633,73577.209658,58895.207808,1.049276e+12,1.388608e+12,575.450625,577.757217,573.013605,4.243777e+07,2691.375217,2686.455652,2703.706522,2672.630435,134.11819
122,2024-11-01,96449.055813,99655.501079,66803.649996,2.214093e+12,1.908653e+12,589.617047,591.443891,586.573798,4.509215e+07,2649.864762,2654.383810,2674.145714,2630.376667,134.49432


In [64]:
final_df_new = (
    monthly_df_btc_new
    .join(monthly_df_sp500_new, on="month", how="inner")
    .join(monthly_df_gold_new, on="month", how="inner")
    .join(monthly_df_cpi_new, on="month", how="inner")
    .orderBy("month")
)

final_df_new.printSchema()

final_df_new.toPandas().to_csv('../data/final_df_new.csv', index=False)


root
 |-- month: date (nullable = true)
 |-- avg_close_btc: double (nullable = true)
 |-- avg_high_btc: double (nullable = true)
 |-- avg_low_btc: double (nullable = true)
 |-- avg_volume_btc: double (nullable = true)
 |-- avg_marketCap_btc: double (nullable = true)
 |-- avg_close_sp500: double (nullable = true)
 |-- avg_high_sp500: double (nullable = true)
 |-- avg_low_sp500: double (nullable = true)
 |-- avg_volume_sp500: double (nullable = true)
 |-- avg_price_gold: double (nullable = true)
 |-- avg_open_gold: double (nullable = true)
 |-- avg_high_gold: double (nullable = true)
 |-- avg_low_gold: double (nullable = true)
 |-- avg_cpi: double (nullable = true)

