# Model
## Using random Forest

In [101]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import os

## Pull Data from SQL

In [99]:
def pull_from_sql(table_name="full_stock_calculations"):
    try:
        user = os.getenv("DB_USER")
        password = os.getenv("DB_PASSWORD")
        host = os.getenv("DB_HOST")
        port = os.getenv("DB_PORT")
        db_name = os.getenv("DB_NAME")

        engine = create_engine(f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{db_name}")

        # Read data into DataFrame
        query = f"SELECT * FROM {table_name};"
        df = pd.read_sql_query(query, engine)

        print("Successfully pulled SQL Data")
        return df

    except Exception as e:
        print(f"Error pulling data from SQL: {e}")
        return None
    finally:
        engine.dispose() 


## Training Models, then saving via joblib

In [90]:
def train_stock_models(full_data):
    symbols = full_data['symbol'].unique()

    for symbol in symbols:
        print(f"Training model for {symbol}...")
        
        # Filter data
        symbol_data = full_data[full_data['symbol'] == symbol].copy()
        
        # Shift target
        symbol_data['target'] = symbol_data['current_price'].shift(-1)
        symbol_data = symbol_data.dropna()

        # Define features and target variable
        features = symbol_data[['open_price', 'percent_change', 'cumulative_return', 
                                'ma_10_day', 'ma_30_day', 'volatility_30_day', 
                                'ema_10', 'ema_30']]
        target = symbol_data['target']
        
        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

        # Train the model
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)

        # Make predictions
        y_pred = model.predict(X_test)

        # Evaluate the model
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        print(f"Mean Squared Error for {symbol}: {mse}")
        print(f"R-squared for {symbol}: {r2}")

        # Save the model
        joblib.dump(model, f'stock_price_predictor_{symbol}.joblib')
        print(f"Model for {symbol} saved as stock_price_predictor_{symbol}.joblib")


## Main 

In [None]:
def main():
    # Load data into a DataFrame
    full_data = pull_from_sql()
    #Run and Train models
    train_stock_models(full_data)

if __name__ == "__main__":
    main()