# Data Pipelining With Polygon

## Python Imports

In [1]:
# Standard Library
import datetime
import io
import os
import random
import sys
import time
import warnings

from datetime import datetime, timedelta
from pathlib import Path

# Data Handling
import numpy as np
import pandas as pd

# Data Visualization
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
from matplotlib.ticker import FormatStrFormatter, FuncFormatter, MultipleLocator

# Data Sources
import yfinance as yf

# Statistical Analysis
import statsmodels.api as sm

# Machine Learning
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Suppress warnings
warnings.filterwarnings("ignore")

## Add Directories To Path

In [2]:
# Add the source subdirectory to the system path to allow import config from settings.py
current_directory = Path(os.getcwd())
website_base_directory = current_directory.parent.parent.parent
src_directory = website_base_directory / "src"
sys.path.append(str(src_directory)) if str(src_directory) not in sys.path else None

# Import settings.py
from settings import config

# Add configured directories from config to path
SOURCE_DIR = config("SOURCE_DIR")
sys.path.append(str(Path(SOURCE_DIR))) if str(Path(SOURCE_DIR)) not in sys.path else None

# Add other configured directories
BASE_DIR = config("BASE_DIR")
CONTENT_DIR = config("CONTENT_DIR")
POSTS_DIR = config("POSTS_DIR")
PAGES_DIR = config("PAGES_DIR")
PUBLIC_DIR = config("PUBLIC_DIR")
SOURCE_DIR = config("SOURCE_DIR")
DATA_DIR = config("DATA_DIR")
DATA_MANUAL_DIR = config("DATA_MANUAL_DIR")

# Print system path
for i, path in enumerate(sys.path):
    print(f"{i}: {path}")

0: /usr/lib/python313.zip
1: /usr/lib/python3.13
2: /usr/lib/python3.13/lib-dynload
3: 
4: /home/jared/python-virtual-envs/general_313/lib/python3.13/site-packages
5: /home/jared/Cloud_Storage/Dropbox/Websites/jaredszajkowski.github.io_congo/src


## Track Index Dependencies

In [3]:
# Create file to track markdown dependencies
dep_file = Path("index_dep.txt")
dep_file.write_text("")

0

## Python Functions

In [4]:
from export_track_md_deps import export_track_md_deps
from polygon_fetch_full_history import polygon_fetch_full_history
from polygon_pull_data import polygon_pull_data

## Function Usage

### Polygon Fetch Full History

In [5]:
from load_api_keys import load_api_keys
from polygon import RESTClient

# Load API keys from the environment
api_keys = load_api_keys()

# Get the environment variable for where data is stored
DATA_DIR = config("DATA_DIR")

# Open client connection
client = RESTClient(api_key=api_keys["POLYGON_KEY"])

# Create an empty DataFrame
df = pd.DataFrame({
    'Date': pd.Series(dtype="datetime64[ns]"),
    'open': pd.Series(dtype="float64"),
    'high': pd.Series(dtype="float64"),
    'low': pd.Series(dtype="float64"),
    'close': pd.Series(dtype="float64"),
    'volume': pd.Series(dtype="float64"),
    'vwap': pd.Series(dtype="float64"),
    'transactions': pd.Series(dtype="int64"),
    'otc': pd.Series(dtype="object")
})

# Example usage - minute
df = polygon_fetch_full_history(
    client=client,
    ticker="AMZN",
    timespan="day",
    multiplier=1,
    adjusted=True,
    existing_history_df=df,
    current_start=datetime(2025, 1, 1),
    free_tier=True,
    verbose=True,
)

Pulling day data for 2025-01-01 00:00:00 thru 2025-06-30 00:00:00 for AMZN...



New data:
                   Date     open     high      low   close       volume  \
0   2025-01-02 05:00:00  222.030  225.150  218.190  220.22   33956579.0   
1   2025-01-03 05:00:00  222.505  225.360  221.620  224.19   27515606.0   
2   2025-01-06 05:00:00  226.780  228.835  224.840  227.61   31849831.0   
3   2025-01-07 05:00:00  227.900  228.381  221.460  222.11   28084164.0   
4   2025-01-08 05:00:00  223.185  223.520  220.200  222.13   25033292.0   
..                  ...      ...      ...      ...     ...          ...   
117 2025-06-24 04:00:00  212.135  214.340  211.045  212.77   38378757.0   
118 2025-06-25 04:00:00  214.615  216.030  211.110  211.99   31755698.0   
119 2025-06-26 04:00:00  213.120  218.035  212.010  217.12   50480814.0   
120 2025-06-27 04:00:00  219.920  223.300  216.740  223.30  119217138.0   
121 2025-06-30 04:00:00  223.520  223.820  219.120  219.39   58887780.0   

         vwap  transactions   otc  
0    221.2745        449631  None  
1    223.7050    

Pulling day data for 2025-06-29 04:00:00 thru 2025-12-26 04:00:00 for AMZN...



New data:
                   Date     open     high     low   close      volume  \
0   2025-06-30 04:00:00  223.520  223.820  219.12  219.39  58887780.0   
1   2025-07-01 04:00:00  219.500  221.875  217.93  220.46  39256830.0   
2   2025-07-02 04:00:00  219.730  221.600  219.06  219.92  30894178.0   
3   2025-07-03 04:00:00  221.820  224.010  221.36  223.41  29632353.0   
4   2025-07-07 04:00:00  223.000  224.290  222.37  223.47  36604139.0   
..                  ...      ...      ...     ...     ...         ...   
120 2025-12-18 05:00:00  225.705  229.225  224.41  226.76  50267599.0   
121 2025-12-19 05:00:00  226.760  229.125  225.58  227.35  85539391.0   
122 2025-12-22 05:00:00  228.610  229.480  226.71  228.43  32189837.0   
123 2025-12-23 05:00:00  229.055  232.445  228.73  232.14  29230205.0   
124 2025-12-24 05:00:00  232.130  232.950  231.33  232.38  11392602.0   

         vwap  transactions   otc  
0    220.6316        673189  None  
1    220.1508        544150  None  
2    

Pulling day data for 2025-12-23 05:00:00 thru 2026-06-21 05:00:00 for AMZN...



New data:
                 Date     open     high     low   close      volume      vwap  \
0 2025-12-23 05:00:00  229.055  232.445  228.73  232.14  29230205.0  231.3567   
1 2025-12-24 05:00:00  232.130  232.950  231.33  232.38  11392602.0  232.3350   

   transactions   otc  
0        435723  None  
1        193487  None  
Combined data:
                   Date     open     high     low   close      volume  \
0   2025-01-02 05:00:00  222.030  225.150  218.19  220.22  33956579.0   
1   2025-01-03 05:00:00  222.505  225.360  221.62  224.19  27515606.0   
2   2025-01-06 05:00:00  226.780  228.835  224.84  227.61  31849831.0   
3   2025-01-07 05:00:00  227.900  228.381  221.46  222.11  28084164.0   
4   2025-01-08 05:00:00  223.185  223.520  220.20  222.13  25033292.0   
..                  ...      ...      ...     ...     ...         ...   
241 2025-12-18 05:00:00  225.705  229.225  224.41  226.76  50267599.0   
242 2025-12-19 05:00:00  226.760  229.125  225.58  227.35  85539391.0   
24

In [6]:
# Copy this <!-- INSERT_polygon_fetch_full_history_HERE --> to index_temp.md
export_track_md_deps(dep_file=dep_file, md_filename="polygon_fetch_full_history.md", content=df.to_markdown(floatfmt=".5f"))

✅ Exported and tracked: polygon_fetch_full_history.md


### Polygon Pull Data

In [7]:
current_year = datetime.now().year
current_month = datetime.now().month
current_day = datetime.now().day

# Example usage - daily
df = polygon_pull_data(
    base_directory=DATA_DIR,
    ticker="AMZN",
    source="Polygon",
    asset_class="Equities",
    start_date=datetime(current_year - 2, current_month, current_day),
    timespan="day",
    multiplier=1,
    adjusted=True,
    force_existing_check=True,
    free_tier=True,
    verbose=True,
    excel_export=True,
    pickle_export=True,
    output_confirmation=True,
)

time.sleep(12)  # Sleep for 2 seconds to avoid hitting rate limits

File found...updating the AMZN day data.
Existing data:
                   Date     open      high       low   close      volume  \
0   2023-07-28 04:00:00  129.690  133.0100  129.3300  132.21  46269781.0   
1   2023-07-31 04:00:00  133.200  133.8700  132.3800  133.68  41901516.0   
2   2023-08-01 04:00:00  133.550  133.6900  131.6199  131.69  42250989.0   
3   2023-08-02 04:00:00  130.154  130.2300  126.8200  128.21  50988614.0   
4   2023-08-03 04:00:00  127.480  129.8400  126.4100  128.91  90855736.0   
..                  ...      ...       ...       ...     ...         ...   
584 2025-11-24 05:00:00  222.555  227.3300  222.2700  226.28  54318223.0   
585 2025-11-25 05:00:00  226.380  230.5200  223.8000  229.67  39379339.0   
586 2025-11-26 05:00:00  230.740  231.7474  228.7700  229.16  38497719.0   
587 2025-11-28 05:00:00  231.240  233.2850  230.2200  233.22  20250425.0   
588 2025-12-01 05:00:00  233.220  235.7970  232.2500  233.88  42903594.0   

         vwap  transactions   o

New data:
                   Date     open     high     low   close      volume  \
0   2023-12-27 05:00:00  153.560  154.780  153.12  153.34  31434733.0   
1   2023-12-28 05:00:00  153.720  154.080  152.95  153.38  27057002.0   
2   2023-12-29 05:00:00  153.100  153.890  151.03  151.94  39823204.0   
3   2024-01-02 05:00:00  151.540  152.380  148.39  149.93  47339424.0   
4   2024-01-03 05:00:00  149.200  151.050  148.33  148.47  49425495.0   
..                  ...      ...      ...     ...     ...         ...   
117 2024-06-14 04:00:00  183.080  183.720  182.23  183.66  25456410.0   
118 2024-06-17 04:00:00  182.520  185.000  181.22  184.06  35601907.0   
119 2024-06-18 04:00:00  183.735  184.290  181.43  182.81  36659157.0   
120 2024-06-20 04:00:00  182.910  186.510  182.72  186.10  44726779.0   
121 2024-06-21 04:00:00  187.800  189.275  185.86  189.08  72931754.0   

         vwap  transactions   otc  
0    153.6438        311463  None  
1    153.5269        301925  None  
2    

Pulling day data for 2024-06-20 04:00:00 thru 2024-12-17 04:00:00 for AMZN...

New data:
                   Date    open     high       low   close      volume  \
0   2024-06-20 04:00:00  182.91  186.510  182.7200  186.10  44726779.0   
1   2024-06-21 04:00:00  187.80  189.275  185.8600  189.08  72931754.0   
2   2024-06-24 04:00:00  189.33  191.000  185.3300  185.57  50610379.0   
3   2024-06-25 04:00:00  186.81  188.840  185.4200  186.34  45898475.0   
4   2024-06-26 04:00:00  186.92  194.800  186.2600  193.61  65103893.0   
..                  ...     ...      ...       ...     ...         ...   
120 2024-12-10 05:00:00  226.09  229.060  224.2002  225.04  31199864.0   
121 2024-12-11 05:00:00  226.41  231.200  226.2600  230.26  35385785.0   
122 2024-12-12 05:00:00  229.83  231.090  227.6300  228.97  28204084.0   
123 2024-12-13 05:00:00  228.40  230.200  225.8608  227.46  28768080.0   
124 2024-12-16 05:00:00  230.23  233.000  228.0100  232.93  37552096.0   

         vwap  transac

Pulling day data for 2024-12-15 05:00:00 thru 2025-06-13 05:00:00 for AMZN...



New data:
                   Date    open      high     low   close      volume  \
0   2024-12-16 05:00:00  230.23  233.0000  228.01  232.93  37552096.0   
1   2024-12-17 05:00:00  232.39  232.7300  227.85  231.15  35948131.0   
2   2024-12-18 05:00:00  230.77  231.3999  220.11  220.52  43281443.0   
3   2024-12-19 05:00:00  224.91  226.0900  222.92  223.29  39918739.0   
4   2024-12-20 05:00:00  219.84  226.2100  218.73  224.92  88279184.0   
..                  ...     ...       ...     ...     ...         ...   
118 2025-06-09 04:00:00  214.75  217.8500  212.88  216.98  38102502.0   
119 2025-06-10 04:00:00  216.78  217.6900  214.15  217.61  31303317.0   
120 2025-06-11 04:00:00  217.41  218.4000  212.89  213.20  39325981.0   
121 2025-06-12 04:00:00  211.78  213.5800  211.33  213.24  27639991.0   
122 2025-06-13 04:00:00  209.96  214.0500  209.62  212.10  29337763.0   

         vwap  transactions   otc  
0    231.5484        430659  None  
1    230.7897        433266  None  
2    

Pulling day data for 2025-06-12 04:00:00 thru 2025-12-09 04:00:00 for AMZN...

New data:
                   Date     open    high      low   close      volume  \
0   2025-06-12 04:00:00  211.780  213.58  211.330  213.24  27639991.0   
1   2025-06-13 04:00:00  209.960  214.05  209.620  212.10  29337763.0   
2   2025-06-16 04:00:00  212.310  217.06  211.600  216.10  33284158.0   
3   2025-06-17 04:00:00  215.195  217.41  214.560  214.82  32086262.0   
4   2025-06-18 04:00:00  215.090  217.96  212.340  212.52  44360509.0   
..                  ...      ...     ...      ...     ...         ...   
119 2025-12-02 05:00:00  235.005  238.97  233.550  234.42  45785363.0   
120 2025-12-03 05:00:00  233.350  233.38  230.610  232.38  35495058.0   
121 2025-12-04 05:00:00  232.770  233.50  226.800  229.11  45680903.0   
122 2025-12-05 05:00:00  230.320  231.24  228.547  229.53  33117277.0   
123 2025-12-08 05:00:00  229.590  230.83  226.270  226.89  35019072.0   

         vwap  transactions   otc 

Pulling day data for 2025-12-07 05:00:00 thru 2026-06-05 05:00:00 for AMZN...

New data:
                  Date     open     high       low   close      volume  \
0  2025-12-08 05:00:00  229.590  230.830  226.2700  226.89  35019072.0   
1  2025-12-09 05:00:00  226.840  228.570  225.1100  227.92  25840496.0   
2  2025-12-10 05:00:00  228.805  232.420  228.4600  231.78  38790432.0   
3  2025-12-11 05:00:00  230.710  232.110  228.6901  230.28  28248522.0   
4  2025-12-12 05:00:00  229.870  230.080  225.1200  226.19  35636939.0   
5  2025-12-15 05:00:00  227.930  227.930  221.5000  222.54  47286085.0   
6  2025-12-16 05:00:00  223.035  223.660  221.1304  222.56  39293678.0   
7  2025-12-17 05:00:00  224.655  225.190  220.9900  221.27  44030569.0   
8  2025-12-18 05:00:00  225.705  229.225  224.4100  226.76  50267599.0   
9  2025-12-19 05:00:00  226.760  229.125  225.5800  227.35  85539391.0   
10 2025-12-22 05:00:00  228.610  229.480  226.7100  228.43  32189837.0   
11 2025-12-23 05:00:00 

Exporting AMZN day data to Pickle...
The first and last date of day data for AMZN is: 


Unnamed: 0,Date,open,high,low,close,volume,vwap,transactions,otc
0,2023-07-28 04:00:00,129.69,133.01,129.33,132.21,46269781.0,131.8837,413438,


Unnamed: 0,Date,open,high,low,close,volume,vwap,transactions,otc
605,2025-12-24 05:00:00,232.13,232.95,231.33,232.38,11392602.0,232.335,193487,


Number of rows after data update: 606
Number of rows added during update: 17
Polygon data complete for AMZN day data.
--------------------


In [8]:
# Copy this <!-- INSERT_polygon_pull_data_HERE --> to index_temp.md
export_track_md_deps(dep_file=dep_file, md_filename="polygon_pull_data.md", content=df.to_markdown(floatfmt=".5f"))

✅ Exported and tracked: polygon_pull_data.md
