In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import psycopg2
import os
from dotenv import load_dotenv
from psycopg2.extras import execute_values

In [2]:
tickers = ["PFE", "MSFT", "AAPL", "GOOG", "AMZN"]
data = yf.download(tickers, start="2023-10-05", end="2023-10-10")
data.head(2)

[*********************100%%**********************]  5 of 5 completed


Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Close,Close,Close,Close,Close,...,Open,Open,Open,Open,Open,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,AAPL,AMZN,GOOG,MSFT,PFE,AAPL,AMZN,GOOG,MSFT,PFE,...,AAPL,AMZN,GOOG,MSFT,PFE,AAPL,AMZN,GOOG,MSFT,PFE
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2023-10-05,174.910004,125.959999,135.990005,319.359985,33.470001,174.910004,125.959999,135.990005,319.359985,33.470001,...,173.789993,126.709999,136.130005,319.089996,33.450001,48527900,39660600,15922900,16965600,21279400
2023-10-06,177.490005,127.959999,138.729996,327.26001,33.130001,177.490005,127.959999,138.729996,327.26001,33.130001,...,173.800003,124.160004,134.940002,316.549988,33.52,57224100,46795900,20819300,25645500,23750600


In [3]:
apple = yf.Ticker("AAPL")
hist_ap = apple.history(period="6mo")
hist_ap['Date'] = hist_ap.index
hist_ap= hist_ap.reset_index(drop=True)
hist_ap.head(2)

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Date
0,160.780194,161.617898,159.344119,159.663254,50133100,0.0,0.0,2023-04-12 00:00:00-04:00
1,161.189081,165.347703,160.979647,165.108353,68445600,0.0,0.0,2023-04-13 00:00:00-04:00


In [4]:
amazon = yf.Ticker("AMZN")
hist_az = amazon.history(period="6mo")
hist_az['Date'] = hist_az.index
hist_az= hist_az.reset_index(drop=True)
hist_az.head(2)

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Date
0,100.400002,100.510002,97.709999,97.830002,56735000,0.0,0.0,2023-04-12 00:00:00-04:00
1,98.949997,102.57,98.709999,102.400002,67925100,0.0,0.0,2023-04-13 00:00:00-04:00


In [5]:
google = yf.Ticker("GOOG")
hist_gl = google.history(period="6mo")
hist_gl['Date'] = hist_gl.index
hist_gl= hist_gl.reset_index(drop=True)
hist_gl.head(2)

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Date
0,107.389999,107.586998,104.970001,105.220001,22761600,0.0,0.0,2023-04-12 00:00:00-04:00
1,106.470001,108.264999,106.440002,108.190002,21650700,0.0,0.0,2023-04-13 00:00:00-04:00


In [6]:
load_dotenv()

CODER_REDSHIFT_HOST = os.environ.get('CODER_REDSHIFT_HOST')
CODER_REDSHIFT_DB = os.environ.get('CODER_REDSHIFT_DB')
CODER_REDSHIFT_USER = os.environ.get('CODER_REDSHIFT_USER')
CODER_REDSHIFT_PASS = os.environ.get('CODER_REDSHIFT_PASS')
CODER_REDSHIFT_PORT = os.environ.get('CODER_REDSHIFT_PORT')

try:
    conn = psycopg2.connect(
        host=CODER_REDSHIFT_HOST,
        dbname=CODER_REDSHIFT_DB,
        user=CODER_REDSHIFT_USER,
        password=CODER_REDSHIFT_PASS,
        port=CODER_REDSHIFT_PORT,

    )
    print("Connected to Redshift successfully!")
    
except Exception as e:
    print("Unable to connect to Redshift.")
    print(e)

Connected to Redshift successfully!


In [7]:
hist_az.dtypes

Open                                     float64
High                                     float64
Low                                      float64
Close                                    float64
Volume                                     int64
Dividends                                float64
Stock Splits                             float64
Date            datetime64[ns, America/New_York]
dtype: object

In [8]:
def cargar_en_redshift(conn, table_name, dataframe):
    dtypes = dataframe.dtypes
    cols = list(dtypes.index)
    print(cols)
    tipos = list(dtypes.values)
    type_map = {
        'float64': 'FLOAT',
        'int64': 'INT',
        'datetime64[ns, America/New_York]': 'TIMESTAMP'
    }
    sql_dtypes = [type_map.get(str(dtype), 'VARCHAR(255)') for dtype in tipos]

    # Definir formato SQL VARIABLE TIPO_DATO
    column_defs = [f'"{name}" {data_type}' for name, data_type in zip(cols, sql_dtypes)]

    # Combine column definitions into the CREATE TABLE statement
    table_schema = f"""
        CREATE TABLE IF NOT EXISTS {table_name} ({', '.join(column_defs)});
        """

    # Crear la tabla
    cur = conn.cursor()
    try:
        cur.execute(table_schema)

        # Generar los valores a insertar
        values = [tuple(x) for x in dataframe.values]

        # Definir el INSERT
        insert_sql = f"INSERT INTO {table_name} ({', '.join(cols)}) VALUES %s"
        insert_sql = f"INSERT INTO {table_name} (\"Open\", \"High\", \"Low\", \"Close\", \"Volume\", \"Dividends\", \"Stock Splits\", \"Date\") VALUES %s"


        # Execute the transaction to insert the data
        cur.execute("BEGIN")
        execute_values(cur, insert_sql, values)
        cur.execute("COMMIT")
        print('Proceso terminado')
    except Exception as e:
        print(f"Error: {e}")
        conn.rollback()  # Rollback the transaction on error

def drop_table(conn, table_name):
    cur = conn.cursor()
    try:
        cur.execute(f"DROP TABLE IF EXISTS {table_name}")
        conn.commit()
        print('Proceso terminado')
    except Exception as e:
        print(f"Error: {e}")
        conn.rollback()  # Rollback the transaction on error

In [9]:
drop_table(conn=conn, table_name='amazon')
drop_table(conn=conn, table_name='apple')
drop_table(conn=conn, table_name='google')

Proceso terminado
Proceso terminado
Proceso terminado


In [10]:
cargar_en_redshift(conn=conn, table_name='amazon', dataframe=hist_az)

['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits', 'Date']
Proceso terminado


In [11]:
cargar_en_redshift(conn=conn, table_name='apple', dataframe=hist_ap)

['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits', 'Date']
Proceso terminado


In [12]:
cargar_en_redshift(conn=conn, table_name='google', dataframe=hist_gl)

['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits', 'Date']
Proceso terminado
