In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
from datetime import datetime, timedelta

## 1. Flask Configuration

In [3]:
from flask import Flask, request, jsonify

In [5]:
app = Flask(__name__)

## 2. Models

In [10]:
from abc import ABC, abstractmethod
from statsmodels.tsa.ar_model import AutoReg
from sklearn.model_selection import TimeSeriesSplit
from itertools import product
from sklearn.metrics import r2_score

In [22]:
def create_model(model_type, data):
    if model_type == 'AR':
        return AR_model(data)
    elif model_type == 'ARIMA':
        return ARIMA_model(data)

In [23]:
class Model(ABC):
    def __init__(self, data):
        self.data = data

    @abstractmethod
    def train(self):
        """
        Abstract method to train the model.
        """
        pass
    
    @abstractmethod
    def forecast(self, forecast_days):
        """
        Abstract method to make predictions using the trained model.
        """
        pass

### 2.1 AR Model

In [24]:
class AR_model(Model):
    def __init__(self, data):
        super().__init__(data)
        self.data = data
        self.trained_model = None
        self.model_type = 'AR'

    def train(self):
        # Handle NaNs
        data = self.data.dropna(subset=['Close'])

        # Define parameter grid for tuning
        trends = ['n', 'c', 't', 'ct']
        min_lag = 1
        max_lag = len(data) 
        lags_range = range(min_lag, max_lag) 

        best_r2 = -float('inf') 
        best_params = None

        # Perform grid search with cross-validation on the training set
        # Choose the best params based on R2 score
        n_splits = 3
        tscv = TimeSeriesSplit(n_splits=n_splits)  # Time series cross-validation
        warnings.filterwarnings("ignore")
        for trend, lags in product(trends, lags_range):
            for train_index, val_index in tscv.split(data):
                train_split, val_split = data.iloc[train_index], data.iloc[val_index]
                try:
                    model = AutoReg(train_split['Close'].values, lags=lags, trend=trend).fit()
                    predictions = model.predict(start=len(train_split), end=len(train_split) + len(val_split) - 1)
                    r2 = r2_score(val_split['Close'], predictions)
                    r2_sum += r2
                except Exception as e:
                    print(f"Error for trend={trend}, lags={lags}: {e}")
                    continue
            
            # Average R2 score across folds
            avg_r2 = r2_sum / n_splits
            
            # Update best parameters if better R2 found
            if avg_r2 > best_r2:
                best_r2 = avg_r2
                best_params = (trend, lags)

        best_trend, best_lags = best_params
        
        print(f"Best R2 score: {best_r2:.4f}")
        print(f"Best parameters: trend={best_params[0]}, lags={best_params[1]}")
        
        # Fit the best model on the entire dataset 
        try:
            self.trained_model = AutoReg(data['Close'].values, lags=best_lags, trend=best_trend).fit()
        except Exception as e:
            print('Model training failed with the error message: {e}')
        
        return self.trained_model
    
    def forecast(self, forecast_days):
        #Forecast next forecast_period days
        start = len(self.data)
        end = start + forecast_days - 1
        forecast_prices = self.trained_model.predict(start=start, end=end)

        ''' PLOTTING
        # Create a date range for the predictions
        last_date = data.index[-1]
        prediction_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=forecast_period)

        # Plot the results
        plt.figure(figsize=(14, 7))
        plt.plot(data.index, data['Close'], label='Historical Data', color='black')
        plt.plot(prediction_dates, forecast_predictions, label='Forecast', linestyle='-', color='red', alpha = 0.7)

        # Ensure the first date from data.index and last date from prediction_dates are on the x-axis
        plt.title(f'{self.model_type} Model Prediction of Close Prices')
        plt.xlabel('Date')
        plt.ylabel('Close Price')
        plt.legend()
        plt.grid(True)
        plt.show()
        '''

        return forecast_prices


## 3. Database for User and Model Tables

In [21]:
from flask_sqlalchemy import SQLAlchemy
import pickle

In [None]:
app.config['SQLALCHEMY_DATABASE_URI'] = 'postgresql://username:password@localhost/dbname'
db = SQLAlchemy(app)

### 3.1 Users Table

In [None]:
class Users(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    user_id = db.Column(db.String(50), unique=True, nullable=False)
    email = db.Column(db.String(120), unique=True, nullable=False)
    password = db.Column(db.String(255), nullable=False)
    account_type = db.Column(db.String(20), nullable=False)  # basic or premium

In [None]:
# Example of adding a new user
new_user = User(user_id='12345', email='example@email.com', password='hashed_password', account_type='premium')
db.session.add(new_user)
db.session.commit()

### 3.2 Trained Models Table

In [None]:
class TrainedModels(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    symbol = db.Column(db.String(20), nullable=False)
    model_type = db.Column(db.String(50), nullable=False)
    start_date = db.Column(db.String(50), nullable=False)
    end_date = db.Column(db.String(50), nullable=False)  # Nullable if model is ongoing
    trained_model = db.Column(db.Text)  # Serialized model data or file path

    def save_trained_model(self, trained_model):
        """
        Save the trained model to the database.
        """
        self.trained_model = pickle.dumps(trained_model)
        db.session.commit()

In [8]:
def find_model(model_type, start_date, end_date, symbol):
    model = TrainedModels.query.filter_by(model_type=model_type,
                                          start_date=start_date,
                                          end_date=end_date,
                                          symbol=symbol).first()
    if model:
        return model
    else:
        return False

In [None]:
# Example of filtering models by symbol
models_aapl = TrainedModels.query.filter_by(symbol='AAPL').all()

## 4. Fetch Historical Data

In [17]:
def fetch_data(symbol, start_date, end_date):
    data = None
    try:
        # Fetch historical price data
        df = yf.download(symbol, start=start_date, end=end_date)

        # Drop 'Adj Close' column if present
        if 'Adj Close' in df.columns:
            df.drop(columns=['Adj Close'], inplace=True)

        # Store the dataframe with technical indicators
        data = df

    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")

    return data

## MAIN

In [None]:
@app.route('/process_data', methods=['POST'])
def process_data():
    # Data from Firebase
    data = request.json
    symbol = data['symbol']
    data_length = data['data_length']
    forecast_days = data['forecast_days']
    model_type = data['model_type']

    #Set start and end date
    now = datetime.now()
    start_date =  (now - timedelta(days = data_length)).strftime("%Y-%m-%d")
    end_date = now.strftime("%Y-%m-%d")

    #Check if model already exists
    model = find_model(model_type= model_type, start_date= start_date, end_date= end_date, symbol= symbol)
    forecast_prices = None
    if model:  # if the model is already trained avoid re-training it.
        trained_model = model
        # Forecast
        forecast_prices = trained_model.forecast(forecast_days = forecast_days)
    else:
        data = fetch_data(symbol, start_date, end_date)
        model = create_model(model_type, data)
        trained_model = model.train()
        # Create a new instance of Model
        new_model = TrainedModels(symbol= symbol, model_type= model_type, start_date= start_date, end_date= end_date)
        # Save the trained model to the database
        new_model.save_trained_model(trained_model)
        # Commit the changes
        db.session.add(new_model)
        db.session.commit()
        # Forecast
        forecast_prices = trained_model.forecast(forecast_days = forecast_days)

    # Example: Data to send to Firebase
    processed_data = {
        'forecast_prices': forecast_prices,
        'processed': True
    }

    return jsonify(processed_data), 200 

In [None]:
if __name__ == '__main__':
    app.run(debug=True)