# Data Pipelining With Polygon

## Python Imports

In [1]:
# Standard Library
import datetime
import io
import os
import random
import sys
import time
import warnings

from datetime import datetime, timedelta
from pathlib import Path

# Data Handling
import numpy as np
import pandas as pd

# Data Visualization
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
from matplotlib.ticker import FormatStrFormatter, FuncFormatter, MultipleLocator

# Data Sources
import yfinance as yf

# Statistical Analysis
import statsmodels.api as sm

# Machine Learning
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Suppress warnings
warnings.filterwarnings("ignore")

## Add Directories To Path

In [2]:
# Add the source subdirectory to the system path to allow import config from settings.py
current_directory = Path(os.getcwd())
website_base_directory = current_directory.parent.parent.parent
src_directory = website_base_directory / "src"
sys.path.append(str(src_directory)) if str(src_directory) not in sys.path else None

# Import settings.py
from settings import config

# Add configured directories from config to path
SOURCE_DIR = config("SOURCE_DIR")
sys.path.append(str(Path(SOURCE_DIR))) if str(Path(SOURCE_DIR)) not in sys.path else None

# Add other configured directories
BASE_DIR = config("BASE_DIR")
CONTENT_DIR = config("CONTENT_DIR")
POSTS_DIR = config("POSTS_DIR")
PAGES_DIR = config("PAGES_DIR")
PUBLIC_DIR = config("PUBLIC_DIR")
SOURCE_DIR = config("SOURCE_DIR")
DATA_DIR = config("DATA_DIR")
DATA_MANUAL_DIR = config("DATA_MANUAL_DIR")

# Print system path
for i, path in enumerate(sys.path):
    print(f"{i}: {path}")

0: /usr/lib/python313.zip
1: /usr/lib/python3.13
2: /usr/lib/python3.13/lib-dynload
3: 
4: /home/jared/python-virtual-envs/general_313/lib/python3.13/site-packages
5: /home/jared/Cloud_Storage/Dropbox/Websites/jaredszajkowski.github.io/src


## Track Index Dependencies

In [3]:
# Create file to track markdown dependencies
dep_file = Path("index_dep.txt")
dep_file.write_text("")

0

## Python Functions

In [4]:
from export_track_md_deps import export_track_md_deps
from polygon_fetch_full_history import polygon_fetch_full_history
from polygon_pull_data import polygon_pull_data

## Function Usage

### Polygon Fetch Full History

In [5]:
from load_api_keys import load_api_keys
from polygon import RESTClient

# Load API keys from the environment
api_keys = load_api_keys()

# Get the environment variable for where data is stored
DATA_DIR = config("DATA_DIR")

# Open client connection
client = RESTClient(api_key=api_keys["POLYGON_KEY"])

# Create an empty DataFrame
df = pd.DataFrame({
    'Date': pd.Series(dtype="datetime64[ns]"),
    'open': pd.Series(dtype="float64"),
    'high': pd.Series(dtype="float64"),
    'low': pd.Series(dtype="float64"),
    'close': pd.Series(dtype="float64"),
    'volume': pd.Series(dtype="float64"),
    'vwap': pd.Series(dtype="float64"),
    'transactions': pd.Series(dtype="int64"),
    'otc': pd.Series(dtype="object")
})

# Example usage - minute
df = polygon_fetch_full_history(
    client=client,
    ticker="AMZN",
    timespan="day",
    multiplier=1,
    adjusted=True,
    existing_history_df=df,
    current_start=datetime(2025, 1, 1),
    free_tier=True,
    verbose=True,
)

Pulling day data for 2025-01-01 00:00:00 thru 2025-06-30 00:00:00 for AMZN...



New data:
                   Date     open     high      low   close       volume  \
0   2025-01-02 05:00:00  222.030  225.150  218.190  220.22   33956579.0   
1   2025-01-03 05:00:00  222.505  225.360  221.620  224.19   27515606.0   
2   2025-01-06 05:00:00  226.780  228.835  224.840  227.61   31849831.0   
3   2025-01-07 05:00:00  227.900  228.381  221.460  222.11   28084164.0   
4   2025-01-08 05:00:00  223.185  223.520  220.200  222.13   25033292.0   
..                  ...      ...      ...      ...     ...          ...   
117 2025-06-24 04:00:00  212.135  214.340  211.045  212.77   38378757.0   
118 2025-06-25 04:00:00  214.615  216.030  211.110  211.99   31755698.0   
119 2025-06-26 04:00:00  213.120  218.035  212.010  217.12   50480814.0   
120 2025-06-27 04:00:00  219.920  223.300  216.740  223.30  119217138.0   
121 2025-06-30 04:00:00  223.520  223.820  219.120  219.39   58887780.0   

         vwap  transactions   otc  
0    221.2745        449631  None  
1    223.7050    

Pulling day data for 2025-06-29 04:00:00 thru 2025-12-26 04:00:00 for AMZN...

New data:
                  Date     open      high     low   close      volume  \
0  2025-06-30 04:00:00  223.520  223.8200  219.12  219.39  58887780.0   
1  2025-07-01 04:00:00  219.500  221.8750  217.93  220.46  39256830.0   
2  2025-07-02 04:00:00  219.730  221.6000  219.06  219.92  30894178.0   
3  2025-07-03 04:00:00  221.820  224.0100  221.36  223.41  29632353.0   
4  2025-07-07 04:00:00  223.000  224.2900  222.37  223.47  36604139.0   
..                 ...      ...       ...     ...     ...         ...   
94 2025-11-11 05:00:00  248.410  249.7499  247.23  249.10  23563960.0   
95 2025-11-12 05:00:00  250.235  250.3700  243.75  244.20  31190063.0   
96 2025-11-13 05:00:00  243.050  243.7500  236.50  237.58  41401638.0   
97 2025-11-14 05:00:00  235.060  238.7300  232.89  234.69  38956619.0   
98 2025-11-17 05:00:00  233.250  234.6000  229.19  232.87  59918908.0   

        vwap  transactions   otc  

In [6]:
# Copy this <!-- INSERT_polygon_fetch_full_history_HERE --> to index_temp.md
export_track_md_deps(dep_file=dep_file, md_filename="polygon_fetch_full_history.md", content=df.to_markdown(floatfmt=".5f"))

✅ Exported and tracked: polygon_fetch_full_history.md


### Polygon Pull Data

In [7]:
current_year = datetime.now().year
current_month = datetime.now().month
current_day = datetime.now().day

# Example usage - daily
df = polygon_pull_data(
    base_directory=DATA_DIR,
    ticker="AMZN",
    source="Polygon",
    asset_class="Equities",
    start_date=datetime(current_year - 2, current_month, current_day),
    timespan="day",
    multiplier=1,
    adjusted=True,
    force_existing_check=True,
    free_tier=True,
    verbose=True,
    excel_export=True,
    pickle_export=True,
    output_confirmation=True,
)

time.sleep(12)  # Sleep for 2 seconds to avoid hitting rate limits

File found...updating the AMZN day data.
Existing data:
                   Date     open      high       low   close      volume  \
0   2023-07-28 04:00:00  129.690  133.0100  129.3300  132.21  46269781.0   
1   2023-07-31 04:00:00  133.200  133.8700  132.3800  133.68  41901516.0   
2   2023-08-01 04:00:00  133.550  133.6900  131.6199  131.69  42250989.0   
3   2023-08-02 04:00:00  130.154  130.2300  126.8200  128.21  50988614.0   
4   2023-08-03 04:00:00  127.480  129.8400  126.4100  128.91  90855736.0   
..                  ...      ...       ...       ...     ...         ...   
574 2025-11-10 05:00:00  248.340  251.7500  245.5900  248.40  36476474.0   
575 2025-11-11 05:00:00  248.410  249.7499  247.2300  249.10  23563960.0   
576 2025-11-12 05:00:00  250.235  250.3700  243.7500  244.20  31190063.0   
577 2025-11-13 05:00:00  243.050  243.7500  236.5000  237.58  41401638.0   
578 2025-11-14 05:00:00  235.060  238.7300  232.8900  234.69  38956619.0   

         vwap  transactions   o

New data:
                   Date    open      high      low   close      volume  \
0   2023-11-20 05:00:00  145.13  146.6300  144.725  146.13  41978766.0   
1   2023-11-21 05:00:00  143.91  144.0500  141.500  143.90  71225992.0   
2   2023-11-22 05:00:00  144.57  147.7400  144.570  146.71  45700002.0   
3   2023-11-24 05:00:00  146.70  147.2000  145.320  146.74  22378379.0   
4   2023-11-27 05:00:00  147.53  149.2600  146.880  147.73  53762428.0   
..                  ...     ...       ...      ...     ...         ...   
118 2024-05-10 04:00:00  189.16  189.8920  186.930  187.48  34141771.0   
119 2024-05-13 04:00:00  188.00  188.3100  185.360  186.57  24898613.0   
120 2024-05-14 04:00:00  183.82  187.7200  183.450  187.07  38698155.0   
121 2024-05-15 04:00:00  185.97  186.7193  182.730  185.99  75459927.0   
122 2024-05-16 04:00:00  185.60  187.3100  183.460  183.63  38834450.0   

         vwap  transactions   otc  
0    145.9368        380926  None  
1    143.0430        566880  

Pulling day data for 2024-05-15 04:00:00 thru 2024-11-11 04:00:00 for AMZN...

New data:
                   Date    open      high       low   close      volume  \
0   2024-05-15 04:00:00  185.97  186.7193  182.7300  185.99  75459927.0   
1   2024-05-16 04:00:00  185.60  187.3100  183.4600  183.63  38834450.0   
2   2024-05-17 04:00:00  183.76  185.3000  183.3500  184.70  33175655.0   
3   2024-05-20 04:00:00  184.34  186.6650  183.2800  183.54  30511768.0   
4   2024-05-21 04:00:00  182.30  183.2600  180.7500  183.15  50839129.0   
..                  ...     ...       ...       ...     ...         ...   
119 2024-11-04 05:00:00  196.45  197.3300  194.3101  195.78  38492062.0   
120 2024-11-05 05:00:00  196.04  199.8200  195.9900  199.50  30564784.0   
121 2024-11-06 05:00:00  200.01  207.5500  199.1400  207.09  72292167.0   
122 2024-11-07 05:00:00  207.44  212.2500  207.1900  210.05  52878383.0   
123 2024-11-08 05:00:00  209.72  209.9633  207.4400  208.18  36075846.0   

         v

Pulling day data for 2024-11-07 05:00:00 thru 2025-05-06 05:00:00 for AMZN...

New data:
                   Date     open        high     low   close      volume  \
0   2024-11-07 05:00:00  207.440  212.250000  207.19  210.05  52878383.0   
1   2024-11-08 05:00:00  209.720  209.963300  207.44  208.18  36075846.0   
2   2024-11-11 05:00:00  208.500  209.650000  205.59  206.84  35456012.0   
3   2024-11-12 05:00:00  208.370  209.540000  206.01  208.91  38942918.0   
4   2024-11-13 05:00:00  209.400  215.090000  209.14  214.10  46212943.0   
..                  ...      ...         ...     ...     ...         ...   
117 2025-04-30 04:00:00  182.170  185.050000  178.85  184.42  55176543.0   
118 2025-05-01 04:00:00  190.630  191.807112  187.50  190.20  74265963.0   
119 2025-05-02 04:00:00  191.435  192.880000  186.40  189.98  77903487.0   
120 2025-05-05 04:00:00  186.510  188.180000  185.53  186.35  35217469.0   
121 2025-05-06 04:00:00  184.570  187.930000  183.85  185.01  29314055.0   

Pulling day data for 2025-05-05 04:00:00 thru 2025-11-01 04:00:00 for AMZN...

New data:
                   Date     open     high     low   close       volume  \
0   2025-05-05 04:00:00  186.510  188.180  185.53  186.35   35217469.0   
1   2025-05-06 04:00:00  184.570  187.930  183.85  185.01   29314055.0   
2   2025-05-07 04:00:00  185.560  190.990  185.01  188.71   44002926.0   
3   2025-05-08 04:00:00  191.430  194.330  188.82  192.08   41043620.0   
4   2025-05-09 04:00:00  193.375  194.690  191.16  193.06   29663143.0   
..                  ...      ...      ...     ...     ...          ...   
121 2025-10-27 04:00:00  227.660  228.400  225.54  226.97   38266995.0   
122 2025-10-28 04:00:00  228.215  231.485  226.21  229.25   47099924.0   
123 2025-10-29 04:00:00  231.672  232.820  227.76  230.30   52035936.0   
124 2025-10-30 04:00:00  227.060  228.440  222.75  222.86  102252888.0   
125 2025-10-31 04:00:00  250.100  250.500  243.98  244.22  166340683.0   

         vwap  transac

Pulling day data for 2025-10-30 04:00:00 thru 2026-04-28 04:00:00 for AMZN...

New data:
                  Date     open      high     low   close       volume  \
0  2025-10-30 04:00:00  227.060  228.4400  222.75  222.86  102252888.0   
1  2025-10-31 04:00:00  250.100  250.5000  243.98  244.22  166340683.0   
2  2025-11-03 05:00:00  255.360  258.6000  252.90  254.00   95997714.0   
3  2025-11-04 05:00:00  250.380  257.0100  248.66  249.32   51546311.0   
4  2025-11-05 05:00:00  249.030  251.0000  246.16  250.20   40610602.0   
5  2025-11-06 05:00:00  249.155  250.3800  242.17  243.04   46004201.0   
6  2025-11-07 05:00:00  242.900  244.9000  238.49  244.41   46374294.0   
7  2025-11-10 05:00:00  248.340  251.7500  245.59  248.40   36476474.0   
8  2025-11-11 05:00:00  248.410  249.7499  247.23  249.10   23563960.0   
9  2025-11-12 05:00:00  250.235  250.3700  243.75  244.20   31190063.0   
10 2025-11-13 05:00:00  243.050  243.7500  236.50  237.58   41401638.0   
11 2025-11-14 05:00:00 

Exporting AMZN day data to Pickle...
The first and last date of day data for AMZN is: 


Unnamed: 0,Date,open,high,low,close,volume,vwap,transactions,otc
0,2023-07-28 04:00:00,129.69,133.01,129.33,132.21,46269781.0,131.8837,413438,


Unnamed: 0,Date,open,high,low,close,volume,vwap,transactions,otc
579,2025-11-17 05:00:00,233.25,234.6,229.19,232.87,59918908.0,231.891,893852,


Number of rows after data update: 580
Number of rows added during update: 1
Polygon data complete for AMZN day data.
--------------------


In [8]:
# Copy this <!-- INSERT_polygon_pull_data_HERE --> to index_temp.md
export_track_md_deps(dep_file=dep_file, md_filename="polygon_pull_data.md", content=df.to_markdown(floatfmt=".5f"))

✅ Exported and tracked: polygon_pull_data.md
