### Load Data

### Load Model

In [8]:
import mlflow
import mlflow.tensorflow
import tempfile
from mlflow.client import MlflowClient
import joblib
from datetime import timedelta
import pandas as pd
import numpy as np
from src.db.database import engine

MLFLOW_TRACKING_URI = "http://127.0.0.1:5000"
features = ['return', 'return_3d_avg', 'return_5d_avg', 'return_7d_avg', 'sentiment_fill_1_1d']

def load_model(ticker: str):
    mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
    with tempfile.TemporaryDirectory() as tmp:
        loaded_model = mlflow.tensorflow.load_model(f"models:/model-{ticker}/latest", tmp)
    return loaded_model

def load_scaler(ticker: str):
    client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)
    run_id = client.get_registered_model(f'model-{ticker}').latest_versions[0].run_id
    with tempfile.TemporaryDirectory() as tmp:
        path = client.download_artifacts(run_id=run_id, path='scaler', dst_path=tmp) 
        print(path)
        scaler = joblib.load(path + f"/scaler-{ticker}.pkl")
    return scaler

def load_data(ticker: str, n_days: int = 30):
    sql = f"""
        SELECT "Date", "return" AS return, "sentiment" AS sentiment, "company" AS company
        FROM data_stock_price
        WHERE "company" = '{ticker}'
        ORDER BY "Date" DESC
        LIMIT {n_days}
        ;
    """
    df = pd.read_sql(sql, engine)
    
    df.set_index('Date', inplace=True)
    df.sort_index(inplace=True)
    return df

def processing_data(df: pd.DataFrame):
    df.sort_index(inplace=True)
    df['sentiment_fill_1_1d'] = df['sentiment']
    df['return_3d_avg'] = df['return'].rolling(window=3, min_periods=1).mean()
    df['return_5d_avg'] = df['return'].rolling(window=5, min_periods=1).mean()
    df['return_7d_avg'] = df['return'].rolling(window=7, min_periods=1).mean()
    
    df = df[features]
    return df

def change_last_sentiment(df: pd.DataFrame, sentiment: float):
    df.loc[df.index[-1], 'sentiment'] = sentiment
    return df

def transform_data(df, scaler):
    scaled_data = scaler.transform(df[-30:])
    sequence = scaled_data.reshape(1, 30, len(features))
    return sequence
    
def reshape_y_value(y_value, n_features):
    return np.concatenate([y_value.reshape(-1, 1), np.zeros((len(y_value), n_features-1))], axis=1)

def create_predict_dataframe(*, ticker: str, sentiment: float, return_value: float, index: list):
    df = pd.DataFrame({
        "company": ticker,
        "sentiment": sentiment,
        "return": return_value,
    }, index=index)
    return df

In [9]:
def predict(ticker: str, sentiment: float = 0, n_days: int = 1):
    model = load_model(ticker=ticker)
    scaler = load_scaler(ticker=ticker)
    raw_data = load_data(ticker=ticker, n_days=40)
    raw_data = change_last_sentiment(raw_data, sentiment=sentiment)
    last_date = raw_data.index[-1].to_pydatetime()
    return_data = pd.DataFrame()
    
    for i in range(n_days):
        processed_data = processing_data(df=raw_data)
        scaled_data = transform_data(df=processed_data, scaler=scaler)    
        predict_value = model.predict(scaled_data)
        original_predict_value = scaler.inverse_transform(reshape_y_value(y_value = predict_value, n_features = len(features)))[:, 0]
        predict_date = last_date + timedelta(i+1)
        predict_data = create_predict_dataframe(ticker=ticker, sentiment=sentiment, return_value=original_predict_value, index=[predict_date])
        return_data = pd.concat([return_data, predict_data])
        raw_data = pd.concat([raw_data, predict_data])
        
    return return_data

In [10]:
a = predict('TLKM', -0.4, 3)

print(a)

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 65.85it/s]   
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 27.59it/s]


/var/folders/l2/dlhzwvkx7dl40tcq669ct3km0000gn/T/tmp76fdkkpy/scaler
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 215ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
           company  sentiment    return
2025-04-12    TLKM       -0.4 -0.038873
2025-04-13    TLKM       -0.4 -0.082365
2025-04-14    TLKM       -0.4 -0.166554


In [28]:
pd.concat([pd.DataFrame(), pd.DataFrame({'q': 1}, index=[0])])

Unnamed: 0,q
0,1


In [9]:
df = load_data('BBCA', 0.1)

array([0.20902435])

In [28]:
import datetime 
last_date = df_raw.index[-1].to_pydatetime()

In [42]:
new_df = pd.DataFrame({
    "sentiment": 0.12, "return": 1, "company": "BBCA"
}, index=[last_date + datetime.timedelta(1)])

In [43]:
pd.concat([df_raw, new_df])

Unnamed: 0,return,sentiment,company
2025-03-03,4.451039,-0.117302,BBCA
2025-03-04,0.568182,0.445104,BBCA
2025-03-05,1.694915,0.0783,BBCA
2025-03-06,-0.277778,0.169492,BBCA
2025-03-07,-0.557103,-0.027778,BBCA
2025-03-10,0.0,-0.05571,BBCA
2025-03-11,0.0,-0.0739,BBCA
2025-03-12,2.240896,0.0055,BBCA
2025-03-13,-1.643836,0.22409,BBCA
2025-03-14,-2.506964,-0.164384,BBCA


In [73]:
scaler = load_scaler('BBCA')

Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 15.22it/s]


/var/folders/l2/dlhzwvkx7dl40tcq669ct3km0000gn/T/tmpqs2w5h7v/scaler


In [62]:
load_bca_model = load_model('BBCA')

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 54.81it/s]   


/var/folders/l2/dlhzwvkx7dl40tcq669ct3km0000gn/T/tmpiei7r1jj


  saveable.load_own_variables(weights_store.get(inner_path))


In [17]:
from mlflow.client import MlflowClient

client = MlflowClient(tracking_uri="http://127.0.0.1:5000")


In [27]:
import os

os.getcwd()

'/Users/oz/Learning/Langgraph/stock_analyst'

In [29]:
from tempfile import TemporaryDirectory
import time
with TemporaryDirectory(dir=f"{os.getcwd()}") as tmp:
    time.sleep(10)

In [31]:
import joblib
run_id = client.get_registered_model('model-BBCA').latest_versions[0].run_id
scaler = joblib.load(client.download_artifacts(run_id=run_id, path='scaler', dst_path='./artifacts') + "/scaler-BBCA.pkl")

Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 32.86it/s]


In [32]:
scaler

In [76]:
a.close()

In [37]:
import pandas as pd
import numpy as np
import psycopg2

db_params = {
        'dbname': 'postgres',
        'user': 'user',
        'password': 'password',
        'host': 'localhost',
        'port': '5432'
    }
    
# Fetch data from PostgreSQL
try:
    conn = psycopg2.connect(**db_params)
    query = """
        SELECT "Date", "return" AS return, "sentiment" AS sentiment, "company" AS company
        FROM data_stock_price
        WHERE "company" = 'ANTM'
        ORDER BY "Date" DESC
        LIMIT 50
        ;
    """
    df = pd.read_sql(query, conn)
    df.set_index('Date', inplace=True)
    df['sentiment_fill_1_1d'] = df['sentiment']
    df['return_3d_avg'] = df['return'].rolling(window=3, min_periods=1).mean()
    df['return_5d_avg'] = df['return'].rolling(window=5, min_periods=1).mean()
    df['return_7d_avg'] = df['return'].rolling(window=7, min_periods=1).mean()
    features = ['return', 'return_3d_avg', 'return_5d_avg', 'return_7d_avg', 'sentiment_fill_1_1d']
    df = df[features]
    df.sort_index(inplace=True)
    
except Exception as e:
    print(f"Error fetching data: {e}")
    
finally:
    conn.close()

  df = pd.read_sql(query, conn)


In [52]:
df[-1:]

Unnamed: 0_level_0,return,return_3d_avg,return_5d_avg,return_7d_avg,sentiment_fill_1_1d
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-04-11,3.98773,3.98773,3.98773,3.98773,1.050847


In [55]:
df.loc[df.index[-1], 'sentiment_fill_1_1d'] = 0.12
df[-1:]

Unnamed: 0_level_0,return,return_3d_avg,return_5d_avg,return_7d_avg,sentiment_fill_1_1d
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-04-11,3.98773,3.98773,3.98773,3.98773,0.12


In [38]:
scaled = scaler.transform(df)

In [43]:
sequence = scaled[-30:].reshape(1, 30, 5)

In [63]:
y_pred = load_bca_model.predict(sequence)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 219ms/step


In [49]:
def reshape_y_value(y_value, n_features):
    return np.concatenate([y_value.reshape(-1, 1), np.zeros((len(y_value), n_features-1))], axis=1)

In [64]:
reshape_y_value(y_pred, 5)

array([[0.30828035, 0.        , 0.        , 0.        , 0.        ]])

array([[0.30828035]], dtype=float32)

In [51]:
scaler.inverse_transform(reshape_y_value(y_pred, 5))[:, 0]

array([-0.55643569])

In [None]:
scaler.inverse_transform(np.concatenate([y_test.reshape(-1, 1), np.zeros((len(y_test), len(features)-1))], axis=1))[:, 0]

In [4]:
import pandas as pd
import numpy as np
import psycopg2

def predict_stock_return(loaded_model, scaler):
    # Database connection parameters (update with your credentials)
    db_params = {
        'dbname': 'postgres',
        'user': 'user',
        'password': 'password',
        'host': 'localhost',
        'port': '5432'
    }
    
    # Fetch data from PostgreSQL
    try:
        conn = psycopg2.connect(**db_params)
        query = """
            SELECT "Date", "return" AS return, "sentiment" AS sentiment, "company" AS company
            FROM data_stock_price
            WHERE "RBC company" = 'ANTM'
            ORDER BY "Date";
        """
        df = pd.read_sql(query, conn)
        
    except Exception as e:
        print(f"Error fetching data: {e}")
        return None
    finally:
        conn.close()

    # Process data
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    
    # Forward fill sentiment
    df['sentiment_fill_1_1d'] = df['sentiment'].fillna(method='ffill')
    
    # Calculate rolling averages
    df['return_3d_avg'] = df['return'].rolling(window=3, min_periods=1).mean()
    df['return_5d_avg'] = df['return'].rolling(window=5, min_periods=1).mean()
    df['return_7d_avg'] = df['return'].rolling(window=7, min_periods=1).mean()
    
    # Select and prepare features
    features = ['return', 'return_3d_avg', 'return_5d_avg', 'return_7d_avg', 'sentiment_fill_1_1d']
    data = df[features].ffill().dropna()
    
    if len(data) < 30:
        print("Insufficient data for prediction. Need at least 30 days.")
        return None
    
    # Scale features
    scaled_data = scaler.transform(data)
    
    # Create sequence for prediction
    sequence = scaled_data[-30:].reshape(1, 30, len(features))
    
    # Predict
    predicted_scaled = loaded_model.predict(sequence)
    
    # Inverse transform the prediction
    dummy = np.zeros((1, len(features)))
    dummy[0, 0] = predicted_scaled[0][0]
    predicted_return = scaler.inverse_transform(dummy)[0][0]
    
    return predicted_return

# Example usage:
# Assuming loaded_model and scaler are already defined in the environment
# predicted_return = predict_stock_return(loaded_model, scaler)
# print(f"Predicted Return: {predicted_return}")