In [1]:
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os
import pandas as pd
import mlflow
import datetime


* 'schema_extra' has been renamed to 'json_schema_extra'



# 1. Connection Settings

In [2]:
# Load .env file
load_dotenv()

# Get the EC2 tracking server host from the environment variable
EC2_TRACKING_SERVER_HOST = os.getenv('EC2_TRACKING_SERVER_HOST')
EC2_ENDPOINT = f"http://{EC2_TRACKING_SERVER_HOST}:8000"
MLFLOW_ENDPOINT = f"http://{EC2_TRACKING_SERVER_HOST}:5000"

# Parameters for the RDS PostgreSQL instance
PG_HOST = os.getenv('PG_HOST')
PG_PORT = os.getenv('PG_PORT')
PG_DATABASE = os.getenv('PG_DATABASE')
PG_USER = os.getenv('PG_USER')
PG_PASSWORD = os.getenv('PG_PASSWORD')

# Create the MySQL database connection string
db_url = f'postgresql+psycopg2://{PG_USER}:{PG_PASSWORD}@{PG_HOST}:{PG_PORT}/{PG_DATABASE}'

In [3]:
def select_from_rds(query):
    engine = create_engine(db_url)
    connection = engine.connect()
    df = pd.read_sql(query, connection)
    connection.close()
    return df

# 2. Experiment Setup

In [4]:
mlflow.set_tracking_uri(MLFLOW_ENDPOINT) 

In [5]:
def get_experiment_id(experiment_name):
    try:
        print('Trying to create an experiment...')
        id = mlflow.create_experiment(experiment_name, artifact_location="s3://mlflow-artifacts-krystianpi")
    except:
        print(f'Experiment {experiment_name} exists')
        id = mlflow.get_experiment_by_name(experiment_name).experiment_id
    return id

In [6]:
today = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')
id = get_experiment_id('test_notebook')
run_name = f'test_run_{today}'

Trying to create an experiment...


# 3. Experiment 

In [7]:
with mlflow.start_run(experiment_id=id ,run_name=run_name) as run:
    pass