<a href="https://colab.research.google.com/github/gitcoder27/Stock_AI/blob/main/stock_ai.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
!pip cache purge

Files removed: 14


In [12]:
!apt-get install -y python3-dev zlib1g-dev libjpeg-dev cmake swig

Reading package lists... Done
Building dependency tree       
Reading state information... Done
libjpeg-dev is already the newest version (8c-2ubuntu8).
libjpeg-dev set to manually installed.
python3-dev is already the newest version (3.8.2-0ubuntu2).
python3-dev set to manually installed.
cmake is already the newest version (3.16.3-1ubuntu1.20.04.1).
zlib1g-dev is already the newest version (1:1.2.11.dfsg-2ubuntu1.5).
zlib1g-dev set to manually installed.
The following additional packages will be installed:
  swig4.0
Suggested packages:
  swig-doc swig-examples swig4.0-examples swig4.0-doc
The following NEW packages will be installed:
  swig swig4.0
0 upgraded, 2 newly installed, 0 to remove and 23 not upgraded.
Need to get 1,086 kB of archives.
After this operation, 5,413 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu focal/universe amd64 swig4.0 amd64 4.0.1-5build1 [1,081 kB]
Get:2 http://archive.ubuntu.com/ubuntu focal/universe amd64 swig all 4.0.1

In [16]:
pip install optuna pandas numpy ta stable_baselines3

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting optuna
  Downloading optuna-3.1.0-py3-none-any.whl (365 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m365.3/365.3 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Collecting ta
  Downloading ta-0.10.2.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting stable_baselines3
  Downloading stable_baselines3-1.7.0-py3-none-any.whl (171 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m171.8/171.8 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
Collecting cmaes>=0.9.1
  Downloading cmaes-0.9.1-py3-none-any.whl (21 kB)
Collecting colorlog
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting alembic>=1.5.0
  Downloading alembic-1.10.2-py3-none-any.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.2/212.2 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
Collecting importlib-metadata~=4.1

In [None]:
import optuna
import pandas as pd
import numpy as np
from ta import add_all_ta_features
import gym
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# Reduce the total_timesteps
total_timesteps = 100000



selected_features = ["open", "high", "low", "close", "momentum_rsi", "momentum_rsi_9" "trend_macd", "momentum_stoch", "trend_supertrend"]

# Read the data
def read_data(fileName):
    df = pd.read_csv(fileName)

    # Compute technical indicators
    df = add_all_ta_features(df, open="open", high="high", low="low", close="close", volume=pd.DataFrame())

    # Select desired features
    
    df = df[selected_features]

    # Normalize the data
    df_norm = (df - df.min()) / (df.max() - df.min())

    return df_norm

# Create the sliding window dataset
def create_sliding_window(df_norm, lookback_window=60):
    X, y = [], []
    for i in range(lookback_window, len(df_norm)):
        X.append(df_norm.iloc[i - lookback_window:i].values)
        y.append(df_norm.iloc[i]["close"])

    X = np.stack(X)
    y = np.array(y)

    return X, y

df_norm = read_data("candle_data_train_copy.csv")
X, y = create_sliding_window(df_norm)

# Creating the environment
class StockTradingEnv(gym.Env):
    def __init__(self, X, y, take_profit=60):
        super(StockTradingEnv, self).__init__()
        self.X = X
        self.y = y
        self.take_profit = take_profit

        self.action_space = spaces.Discrete(3)  # Buy, Sell, Hold
        self.observation_space = spaces.Box(low=0, high=1, shape=(60, len(selected_features)), dtype=np.float32)

        self.reset()

    def step(self, action):
        # Compute the next state based on the action and the current state
        self.current_step += 1
        state = self.X[self.current_step]
        reward = self._get_reward(action)
        done = self.current_step == len(self.X) - 1

        return state, reward, done, {}

    def reset(self):
        self.current_step = 0
        return self.X[self.current_step]

    def render(self, mode='human'):
        # Optional: Implement a rendering function if you want to visualize the environment
        pass

    def _get_reward(self, action):
        # Compute the reward based on the action and the current state
        current_price = self.y[self.current_step]
        next_price = self.y[self.current_step + 1]
        profit = (next_price - current_price) / current_price * 100

        if action == 0:  # Buy
            if profit >= self.take_profit:
                return 1
            else:
                return -1
        elif action == 1:  # Sell
            if profit <= -self.take_profit:
                return 1
            else:
                return -1
        else:  # Hold
            return 0

env = StockTradingEnv(X, y)

# implement PPO algo
vec_env = DummyVecEnv([lambda: env])

model = PPO("MlpPolicy", vec_env, verbose=1)
model.learn(total_timesteps=total_timesteps)

# Evaluate the agent
def evaluate_agent(model, env, num_episodes=10):
    rewards = []
    for _ in range(num_episodes):
        state = env.reset()
        done = False
        episode_reward = 0
        while not done:
            action, _states = model.predict(state)
            state, reward, done, _ = env.step(action)
            episode_reward += reward
        rewards.append(episode_reward)
    return np.mean(rewards)

# Create a new environment for evaluation with new data (e.g., X_test, y_test)
df_norm_test = read_data("candle_data_test_copy.csv")
X_test, y_test = create_sliding_window(df_norm_test)

eval_env = StockTradingEnv(X_test, y_test)

mean_reward = evaluate_agent(model, eval_env)
print("Mean reward:", mean_reward)

# fine tune the model
def objective(trial):
    # Define hyperparameters for the PPO agent
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
    gamma = trial.suggest_uniform("gamma", 0.9, 1.0)
    n_steps = trial.suggest_int("n_steps", 16, 256)
    ent_coef = trial.suggest_loguniform("ent_coef", 1e-5, 1)
    clip_range = trial.suggest_uniform("clip_range", 0.1, 0.4)

    env = StockTradingEnv(X, y)
    vec_env = DummyVecEnv([lambda: env])

    model = PPO("MlpPolicy", vec_env, learning_rate=learning_rate, gamma=gamma, n_steps=n_steps, ent_coef=ent_coef, clip_range=clip_range, verbose=0)
    model.learn(total_timesteps=total_timesteps)

    return evaluate_agent(model, env)

# Reduce the number of trials in Optuna
n_trials_optuna = 25

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=n_trials_optuna)

ModuleNotFoundError: ignored

# New section