# Data Pipelining With Polygon

## Python Imports

In [1]:
# Standard Library
import datetime
import io
import os
import random
import sys
import time
import warnings

from datetime import datetime, timedelta
from pathlib import Path

# Data Handling
import numpy as np
import pandas as pd

# Data Visualization
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
from matplotlib.ticker import FormatStrFormatter, FuncFormatter, MultipleLocator

# Data Sources
import yfinance as yf

# Statistical Analysis
import statsmodels.api as sm

# Machine Learning
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Suppress warnings
warnings.filterwarnings("ignore")

## Add Directories To Path

In [2]:
# Add the source subdirectory to the system path to allow import config from settings.py
current_directory = Path(os.getcwd())
website_base_directory = current_directory.parent.parent.parent
src_directory = website_base_directory / "src"
sys.path.append(str(src_directory)) if str(src_directory) not in sys.path else None

# Import settings.py
from settings import config

# Add configured directories from config to path
SOURCE_DIR = config("SOURCE_DIR")
sys.path.append(str(Path(SOURCE_DIR))) if str(Path(SOURCE_DIR)) not in sys.path else None

# Add other configured directories
BASE_DIR = config("BASE_DIR")
CONTENT_DIR = config("CONTENT_DIR")
POSTS_DIR = config("POSTS_DIR")
PAGES_DIR = config("PAGES_DIR")
PUBLIC_DIR = config("PUBLIC_DIR")
SOURCE_DIR = config("SOURCE_DIR")
DATA_DIR = config("DATA_DIR")
DATA_MANUAL_DIR = config("DATA_MANUAL_DIR")

# Print system path
for i, path in enumerate(sys.path):
    print(f"{i}: {path}")

0: /usr/lib/python313.zip
1: /usr/lib/python3.13
2: /usr/lib/python3.13/lib-dynload
3: 
4: /home/jared/python-virtual-envs/general_313/lib/python3.13/site-packages
5: /home/jared/Cloud_Storage/Dropbox/Websites/jaredszajkowski.github.io_congo/src


## Track Index Dependencies

In [3]:
# Create file to track markdown dependencies
dep_file = Path("index_dep.txt")
dep_file.write_text("")

0

## Python Functions

In [4]:
from export_track_md_deps import export_track_md_deps
from polygon_fetch_full_history import polygon_fetch_full_history
from polygon_pull_data import polygon_pull_data

## Function Usage

### Polygon Fetch Full History

In [5]:
from load_api_keys import load_api_keys
from polygon import RESTClient

# Load API keys from the environment
api_keys = load_api_keys()

# Get the environment variable for where data is stored
DATA_DIR = config("DATA_DIR")

# Open client connection
client = RESTClient(api_key=api_keys["POLYGON_KEY"])

# Create an empty DataFrame
df = pd.DataFrame({
    'Date': pd.Series(dtype="datetime64[ns]"),
    'open': pd.Series(dtype="float64"),
    'high': pd.Series(dtype="float64"),
    'low': pd.Series(dtype="float64"),
    'close': pd.Series(dtype="float64"),
    'volume': pd.Series(dtype="float64"),
    'vwap': pd.Series(dtype="float64"),
    'transactions': pd.Series(dtype="int64"),
    'otc': pd.Series(dtype="object")
})

# Example usage - minute
df = polygon_fetch_full_history(
    client=client,
    ticker="AMZN",
    timespan="day",
    multiplier=1,
    adjusted=True,
    existing_history_df=df,
    current_start=datetime(2025, 1, 1),
    free_tier=True,
    verbose=True,
)

Pulling day data for 2025-01-01 00:00:00 thru 2025-06-30 00:00:00 for AMZN...



New data:
                   Date     open     high      low   close       volume  \
0   2025-01-02 05:00:00  222.030  225.150  218.190  220.22   33956579.0   
1   2025-01-03 05:00:00  222.505  225.360  221.620  224.19   27515606.0   
2   2025-01-06 05:00:00  226.780  228.835  224.840  227.61   31849831.0   
3   2025-01-07 05:00:00  227.900  228.381  221.460  222.11   28084164.0   
4   2025-01-08 05:00:00  223.185  223.520  220.200  222.13   25033292.0   
..                  ...      ...      ...      ...     ...          ...   
117 2025-06-24 04:00:00  212.135  214.340  211.045  212.77   38378757.0   
118 2025-06-25 04:00:00  214.615  216.030  211.110  211.99   31755698.0   
119 2025-06-26 04:00:00  213.120  218.035  212.010  217.12   50480814.0   
120 2025-06-27 04:00:00  219.920  223.300  216.740  223.30  119217138.0   
121 2025-06-30 04:00:00  223.520  223.820  219.120  219.39   58887780.0   

         vwap  transactions   otc  
0    221.2745        449631  None  
1    223.7050    

Pulling day data for 2025-06-29 04:00:00 thru 2025-12-26 04:00:00 for AMZN...

New data:
                   Date     open     high     low   close      volume  \
0   2025-06-30 04:00:00  223.520  223.820  219.12  219.39  58887780.0   
1   2025-07-01 04:00:00  219.500  221.875  217.93  220.46  39256830.0   
2   2025-07-02 04:00:00  219.730  221.600  219.06  219.92  30894178.0   
3   2025-07-03 04:00:00  221.820  224.010  221.36  223.41  29632353.0   
4   2025-07-07 04:00:00  223.000  224.290  222.37  223.47  36604139.0   
..                  ...      ...      ...     ...     ...         ...   
120 2025-12-18 05:00:00  225.705  229.225  224.41  226.76  50267599.0   
121 2025-12-19 05:00:00  226.760  229.125  225.58  227.35  85539391.0   
122 2025-12-22 05:00:00  228.610  229.480  226.71  228.43  32189837.0   
123 2025-12-23 05:00:00  229.055  232.445  228.73  232.14  29230205.0   
124 2025-12-24 05:00:00  232.130  232.950  231.33  232.38  11392602.0   

         vwap  transactions   otc 

Pulling day data for 2025-12-23 05:00:00 thru 2026-06-21 05:00:00 for AMZN...

New data:
                 Date     open     high     low   close      volume      vwap  \
0 2025-12-23 05:00:00  229.055  232.445  228.73  232.14  29230205.0  231.3567   
1 2025-12-24 05:00:00  232.130  232.950  231.33  232.38  11392602.0  232.3350   
2 2025-12-26 05:00:00  232.035  232.990  231.18  232.52  15945174.0  232.4646   
3 2025-12-29 05:00:00  231.940  232.600  230.77  232.07  19791422.0  231.8925   

   transactions   otc  
0        435723  None  
1        193487  None  
2        277601  None  
3        352623  None  
Combined data:
                   Date     open     high     low   close      volume  \
0   2025-01-02 05:00:00  222.030  225.150  218.19  220.22  33956579.0   
1   2025-01-03 05:00:00  222.505  225.360  221.62  224.19  27515606.0   
2   2025-01-06 05:00:00  226.780  228.835  224.84  227.61  31849831.0   
3   2025-01-07 05:00:00  227.900  228.381  221.46  222.11  28084164.0   
4   2

In [6]:
# Copy this <!-- INSERT_polygon_fetch_full_history_HERE --> to index_temp.md
export_track_md_deps(dep_file=dep_file, md_filename="polygon_fetch_full_history.md", content=df.head().to_markdown(floatfmt=".5f"))

✅ Exported and tracked: polygon_fetch_full_history.md


### Polygon Pull Data

In [7]:
current_year = datetime.now().year
current_month = datetime.now().month
current_day = datetime.now().day

# Example usage - daily
df = polygon_pull_data(
    base_directory=DATA_DIR,
    ticker="AMZN",
    source="Polygon",
    asset_class="Equities",
    start_date=datetime(current_year - 2, current_month, current_day),
    timespan="day",
    multiplier=1,
    adjusted=True,
    force_existing_check=True,
    free_tier=True,
    verbose=True,
    excel_export=True,
    pickle_export=True,
    output_confirmation=True,
)

time.sleep(12)  # Sleep for 2 seconds to avoid hitting rate limits

File found...updating the AMZN day data.
Existing data:
                   Date     open     high       low   close      volume  \
0   2023-07-28 04:00:00  129.690  133.010  129.3300  132.21  46269781.0   
1   2023-07-31 04:00:00  133.200  133.870  132.3800  133.68  41901516.0   
2   2023-08-01 04:00:00  133.550  133.690  131.6199  131.69  42250989.0   
3   2023-08-02 04:00:00  130.154  130.230  126.8200  128.21  50988614.0   
4   2023-08-03 04:00:00  127.480  129.840  126.4100  128.91  90855736.0   
..                  ...      ...      ...       ...     ...         ...   
601 2025-12-18 05:00:00  225.705  229.225  224.4100  226.76  50267599.0   
602 2025-12-19 05:00:00  226.760  229.125  225.5800  227.35  85539391.0   
603 2025-12-22 05:00:00  228.610  229.480  226.7100  228.43  32189837.0   
604 2025-12-23 05:00:00  229.055  232.445  228.7300  232.14  29230205.0   
605 2025-12-24 05:00:00  232.130  232.950  231.3300  232.38  11392602.0   

         vwap  transactions   otc  
0    13

New data:
                   Date     open     high     low   close      volume  \
0   2024-01-02 05:00:00  151.540  152.380  148.39  149.93  47339424.0   
1   2024-01-03 05:00:00  149.200  151.050  148.33  148.47  49425495.0   
2   2024-01-04 05:00:00  145.590  147.380  144.05  144.57  56039807.0   
3   2024-01-05 05:00:00  144.690  146.590  144.53  145.24  45153147.0   
4   2024-01-08 05:00:00  146.740  149.400  146.15  149.10  46757053.0   
..                  ...      ...      ...     ...     ...         ...   
118 2024-06-21 04:00:00  187.800  189.275  185.86  189.08  72931754.0   
119 2024-06-24 04:00:00  189.330  191.000  185.33  185.57  50610379.0   
120 2024-06-25 04:00:00  186.810  188.840  185.42  186.34  45898475.0   
121 2024-06-26 04:00:00  186.920  194.800  186.26  193.61  65103893.0   
122 2024-06-27 04:00:00  195.005  199.840  194.20  197.85  74397491.0   

         vwap  transactions   otc  
0    149.8527        476433  None  
1    149.4058        416955  None  
2    

Pulling day data for 2024-06-26 04:00:00 thru 2024-12-23 04:00:00 for AMZN...

New data:
                   Date     open      high     low   close      volume  \
0   2024-06-26 04:00:00  186.920  194.8000  186.26  193.61  65103893.0   
1   2024-06-27 04:00:00  195.005  199.8400  194.20  197.85  74397491.0   
2   2024-06-28 04:00:00  197.730  198.8500  192.50  193.25  76930192.0   
3   2024-07-01 04:00:00  193.490  198.2957  192.82  197.20  41192011.0   
4   2024-07-02 04:00:00  197.280  200.4300  195.93  200.00  45600013.0   
..                  ...      ...       ...     ...     ...         ...   
120 2024-12-16 05:00:00  230.230  233.0000  228.01  232.93  37552096.0   
121 2024-12-17 05:00:00  232.390  232.7300  227.85  231.15  35948131.0   
122 2024-12-18 05:00:00  230.770  231.3999  220.11  220.52  43281443.0   
123 2024-12-19 05:00:00  224.910  226.0900  222.92  223.29  39918739.0   
124 2024-12-20 05:00:00  219.840  226.2100  218.73  224.92  88279184.0   

         vwap  transac

Pulling day data for 2024-12-19 05:00:00 thru 2025-06-17 05:00:00 for AMZN...

New data:
                   Date     open    high       low   close      volume  \
0   2024-12-19 05:00:00  224.910  226.09  222.9200  223.29  39918739.0   
1   2024-12-20 05:00:00  219.840  226.21  218.7300  224.92  88279184.0   
2   2024-12-23 05:00:00  225.010  226.88  223.9000  225.06  28070007.0   
3   2024-12-24 05:00:00  226.940  229.14  226.1300  229.05  15007497.0   
4   2024-12-26 05:00:00  228.500  228.50  226.6706  227.05  16174500.0   
..                  ...      ...     ...       ...     ...         ...   
117 2025-06-11 04:00:00  217.410  218.40  212.8900  213.20  39325981.0   
118 2025-06-12 04:00:00  211.780  213.58  211.3300  213.24  27639991.0   
119 2025-06-13 04:00:00  209.960  214.05  209.6200  212.10  29337763.0   
120 2025-06-16 04:00:00  212.310  217.06  211.6000  216.10  33284158.0   
121 2025-06-17 04:00:00  215.195  217.41  214.5600  214.82  32086262.0   

         vwap  transac

Pulling day data for 2025-06-16 04:00:00 thru 2025-12-13 04:00:00 for AMZN...

New data:
                   Date     open    high       low   close      volume  \
0   2025-06-16 04:00:00  212.310  217.06  211.6000  216.10  33284158.0   
1   2025-06-17 04:00:00  215.195  217.41  214.5600  214.82  32086262.0   
2   2025-06-18 04:00:00  215.090  217.96  212.3400  212.52  44360509.0   
3   2025-06-20 04:00:00  214.680  214.89  208.2709  209.69  75350733.0   
4   2025-06-23 04:00:00  209.790  210.39  207.3101  208.47  37311725.0   
..                  ...      ...     ...       ...     ...         ...   
121 2025-12-08 05:00:00  229.590  230.83  226.2700  226.89  35019072.0   
122 2025-12-09 05:00:00  226.840  228.57  225.1100  227.92  25840496.0   
123 2025-12-10 05:00:00  228.805  232.42  228.4600  231.78  38790432.0   
124 2025-12-11 05:00:00  230.710  232.11  228.6901  230.28  28248522.0   
125 2025-12-12 05:00:00  229.870  230.08  225.1200  226.19  35636939.0   

         vwap  transac

Pulling day data for 2025-12-11 05:00:00 thru 2026-06-09 05:00:00 for AMZN...

New data:
                  Date     open     high       low   close      volume  \
0  2025-12-11 05:00:00  230.710  232.110  228.6901  230.28  28248522.0   
1  2025-12-12 05:00:00  229.870  230.080  225.1200  226.19  35636939.0   
2  2025-12-15 05:00:00  227.930  227.930  221.5000  222.54  47286085.0   
3  2025-12-16 05:00:00  223.035  223.660  221.1304  222.56  39293678.0   
4  2025-12-17 05:00:00  224.655  225.190  220.9900  221.27  44030569.0   
5  2025-12-18 05:00:00  225.705  229.225  224.4100  226.76  50267599.0   
6  2025-12-19 05:00:00  226.760  229.125  225.5800  227.35  85539391.0   
7  2025-12-22 05:00:00  228.610  229.480  226.7100  228.43  32189837.0   
8  2025-12-23 05:00:00  229.055  232.445  228.7300  232.14  29230205.0   
9  2025-12-24 05:00:00  232.130  232.950  231.3300  232.38  11392602.0   
10 2025-12-26 05:00:00  232.035  232.990  231.1800  232.52  15945174.0   
11 2025-12-29 05:00:00 

Exporting AMZN day data to Pickle...
The first and last date of day data for AMZN is: 


Unnamed: 0,Date,open,high,low,close,volume,vwap,transactions,otc
0,2023-07-28 04:00:00,129.69,133.01,129.33,132.21,46269781.0,131.8837,413438,


Unnamed: 0,Date,open,high,low,close,volume,vwap,transactions,otc
607,2025-12-29 05:00:00,231.94,232.6,230.77,232.07,19791422.0,231.8925,352623,


Number of rows after data update: 608
Number of rows added during update: 2
Polygon data complete for AMZN day data.
--------------------


In [8]:
# Copy this <!-- INSERT_polygon_pull_data_HERE --> to index_temp.md
export_track_md_deps(dep_file=dep_file, md_filename="polygon_pull_data.md", content=df.head().to_markdown(floatfmt=".5f"))

✅ Exported and tracked: polygon_pull_data.md
