In [47]:
import gym
from gym import spaces
import numpy as np
import pandas as pd
import nltk
import yfinance as yf

from nltk.sentiment import SentimentIntensityAnalyzer


In [91]:
sia = SentimentIntensityAnalyzer()

ticker = yf.Ticker("YUM")
summaryList=[]
for article in ticker.news:
    summaryList.append(article["content"]["summary"])
    
totalScore = []

for i in summaryList:
    sentiment_scores = sia.polarity_scores(i)
    totalScore.append(sentiment_scores)
    
compoundScore = [s['compound'] for s in totalScore]
avgCoundScoreYUM = sum(compoundScore)/len(compoundScore)
print(compoundScore)
print(round(avgCoundScoreYUM,4))



[0.1585, 0.8519, 0.0, 0.4215, 0.7964, 0.0, 0.2263, 0.0, 0.4215, 0.2263]
0.3102


In [89]:
ticker = yf.Ticker("MCD")
summaryList=[]
for article in ticker.news:
    summaryList.append(article["content"]["summary"])
    
totalScore = []

for i in summaryList:
    sentiment_scores = sia.polarity_scores(i)
    totalScore.append(sentiment_scores)
    
compoundScore = [s['compound'] for s in totalScore]
avgCoundScoreMCD = sum(compoundScore)/len(compoundScore)

print(compoundScore)
print(round(avgCoundScoreMCD,4))

[0.0, 0.0, 0.2263, -0.6705, -0.4939, 0.0, 0.2732, 0.2023, 0.6705, 0.3818]
0.059


In [97]:
import yfinance as yf
import numpy as np
import pandas as pd

#  5-years
tickers   = ["YUM", "MCD"]
start_day = "2020-01-01"

raw = yf.download(tickers, start=start_day, progress=False)["Close"]

# log spread = ln(P_YUM) − ln(P_MCD)
log_price = np.log(raw)
spread    = log_price["YUM"] - log_price["MCD"]

# 3. Moving average, STD
win = 30
spread_MA = spread.rolling(win).mean()
spread_STD = spread.rolling(win).std(ddof=0)
Z_score = (spread - spread_MA) / spread_STD
diffScore =  round(avgCoundScoreYUM,4) - round(avgCoundScoreMCD,4)

# 4. DataFrame
df = pd.DataFrame({
    "spread"    : spread, 
    "spread_MA" : spread_MA, # Recent Average Why?: Indicates the ‘normal (mean) position’ in a mean-reversion strategy. Simply using the overall historical mean reacts too slowly when the time series shifts, so we use a rolling mean instead.
    "spread_STD": spread_STD, # Volatility (σ) over the same window—how much does it fluctuate. Why?: Provides a scale reference to judge whether Fred’s ±5¢ move is ‘large’ or ‘small
    "Z_score"   : Z_score, # With thresholds like ±2σ, you can easily define Long/Short entry and exit rules.  A deep RL model can also instantly perceive the ‘normalized distance’ using only the Z_score.
    "price"     : spread, # Log spread +: P_YUM is relatively more expensive than P_MCD. - P_YUM is relatively cheaper than P_MCD. price = 0.1 => e^0.1 = 1.105 110% more expensive 
    #Long(1) 일 때 스프레드(=price)가 줄어들면 → 이익
    #Short(2) 일 때 스프레드가 벌어지면 → 이익
    "compondScoreYUM": round(avgCoundScoreYUM,4),
    "compondScoreMCD": round(avgCoundScoreMCD,4),
    "diffScore": diffScore # Why are all of data same? we don't need past score.  
}).dropna()          # Remove NAN.

In [99]:
print(df)

              spread  spread_MA  spread_STD   Z_score     price  \
Date                                                              
2020-02-13 -0.699376  -0.682026    0.015843 -1.095164 -0.699376   
2020-02-14 -0.695480  -0.683479    0.014988 -0.800720 -0.695480   
2020-02-18 -0.703085  -0.685199    0.014154 -1.263583 -0.703085   
2020-02-19 -0.689587  -0.686077    0.013571 -0.258649 -0.689587   
2020-02-20 -0.699661  -0.687300    0.013077 -0.945246 -0.699661   
...              ...        ...         ...       ...       ...   
2025-04-25 -0.765610  -0.716598    0.046749 -1.048417 -0.765610   
2025-04-28 -0.762781  -0.720042    0.046216 -0.924775 -0.762781   
2025-04-29 -0.755869  -0.723242    0.045251 -0.721010 -0.755869   
2025-04-30 -0.753662  -0.726446    0.043868 -0.620418 -0.753662   
2025-05-01 -0.746988  -0.729109    0.042591 -0.419786 -0.746988   

            compondScoreYUM  compondScoreMCD  diffScore  
Date                                                     
2020-02-13  

In [113]:

class PairTradingEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def sample_policy(self):
        # Must use News score.
        row = self.data.iloc[self.current_step]
        z = float(self.data.iloc[self.current_step]['Z_score'])
        diffScore = float(row['diffScore'])
        
        if z >  1.0 and diffScore <= -0.1:
            return 2          # Short
        elif z < -1.0 and diffScore >=0.1:
            return 1          # Long
        else:
            return 0          # Hold
    
    def __init__(self, data):
        """
        data: pandas DataFrame
            - 'spread'     : Log price spread
            - 'spread_MA'  : Moving Average
            - 'spread_STD' : STD of spre
            - 'Z_score'    : Z-score (spread - MA) / STD)
            - 'price'      : pair price
        """
        super(PairTradingEnv, self).__init__()
        
        self.data = data.reset_index(drop=True)
        self.n_steps = len(self.data)
        self.current_step = 0

        # action: 0-hold, 1-Long, 2-Short
        self.action_space = spaces.Discrete(3)
        
        # [spread, spread_MA, spread_STD, Z_score, price]
        low = -np.inf * np.ones(5)
        high = np.inf * np.ones(5)
        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
        
        # Current Position: 0-hold, 1-Long, 2-Short
        self.position = 0
        self.entry_price = 0

    def reset(self):
        self.current_step = 0
        self.position = 0
        self.entry_price = 0
        return self._next_observation()
    
    def _next_observation(self):
        obs = self.data.iloc[self.current_step][['spread', 'spread_MA', 'spread_STD', 'Z_score', 'price']].values
        return obs.astype(np.float32)
    
    def step(self, action):
        """
        Process one step. 
        action: int, {0: hold, 1: Long, 2: Short}
        """
        done = False
        reward = 0.0
        info = {}
        
        # Move next step
        self.current_step += 1
        if self.current_step >= self.n_steps - 1:
            done = True
        
        current_price = self.data.iloc[self.current_step]['price']
        
        if action == 1:  # Long position.
            if self.position < 0:
                reward += (self.entry_price - current_price)  # Reward using short.
                self.position = 0

            if self.position == 0:
                self.position = 1
                self.entry_price = current_price
        elif action == 2:  # Short Position.
            if self.position > 0:
                reward += (current_price - self.entry_price)  # Reward using Long.
                self.position = 0
            if self.position == 0:
                self.position = -1
                self.entry_price = current_price
        else:  # 0: Hold position
            if self.position != 0:
                if self.position == 1:
                    reward += (current_price - self.entry_price)
                else:
                    reward += (self.entry_price - current_price)
                self.position = 0
                self.entry_price = 0
        
        # Update State
        obs = self._next_observation()
        return obs, reward, done, info
    
    def render(self, mode='human', close=False):
        print(f"Step: {self.current_step}, Position: {self.position}, Price: {self.data.iloc[self.current_step]['price']}")


In [115]:
from gymnasium import make

# Class pairTradingEnv using dataFrame = df
env = PairTradingEnv(df)

state = env.reset()
print("Initial State:", state)

done = False
total_reward = 0
while not done:
    action = env.sample_policy()        # test Random action without policy.
    state, reward, done, _ = env.step(action)
    total_reward += reward
    print('-',reward)
    env.render()


Initial State: [-0.6993762  -0.68202597  0.01584259 -1.0951644  -0.6993762 ]
- 0.0
Step: 1, Position: 1, Price: -0.6954799583305258
- -0.007604598686534736
Step: 2, Position: 0, Price: -0.7030845570170605
- 0.0
Step: 3, Position: 1, Price: -0.6895871866670609
- -0.01007428535221333
Step: 4, Position: 0, Price: -0.6996614720192742
- 0.0
Step: 5, Position: 0, Price: -0.706799412633317
- 0.0
Step: 6, Position: 1, Price: -0.7278547481410715
- 0.0
Step: 7, Position: 1, Price: -0.7443922212715757
- 0.0
Step: 8, Position: 1, Price: -0.739522587127813
- 0.0
Step: 9, Position: 1, Price: -0.7460515886616612
- 0.0
Step: 10, Position: 1, Price: -0.7553223912269997
- 0.0
Step: 11, Position: 1, Price: -0.7614835014256052
- 0.0
Step: 12, Position: 1, Price: -0.7596337526921841
- 0.0
Step: 13, Position: 1, Price: -0.7608703695619798
- 0.0
Step: 14, Position: 1, Price: -0.7579314366451113
- 0.0
Step: 15, Position: 1, Price: -0.7775098008651344
- 0.0
Step: 16, Position: 1, Price: -0.7811502251730742
- 0

In [117]:
print("Total Reward:", total_reward)

Total Reward: 0.10716475963429595


In [35]:
raw = yf.download(["YUM", "MCD"], start="2020-01-01", progress=False)
print(raw.columns)   

MultiIndex([( 'Close', 'MCD'),
            ( 'Close', 'YUM'),
            (  'High', 'MCD'),
            (  'High', 'YUM'),
            (   'Low', 'MCD'),
            (   'Low', 'YUM'),
            (  'Open', 'MCD'),
            (  'Open', 'YUM'),
            ('Volume', 'MCD'),
            ('Volume', 'YUM')],
           names=['Price', 'Ticker'])


In [23]:
print(df)
print(raw)

              spread  spread_MA  spread_STD   Z_score     price
Date                                                           
2020-02-13 -0.699376  -0.682026    0.015843 -1.095174 -0.699376
2020-02-14 -0.695480  -0.683479    0.014988 -0.800715 -0.695480
2020-02-18 -0.703085  -0.685199    0.014154 -1.263594 -0.703085
2020-02-19 -0.689587  -0.686077    0.013571 -0.258654 -0.689587
2020-02-20 -0.699661  -0.687300    0.013077 -0.945223 -0.699661
...              ...        ...         ...       ...       ...
2025-04-21 -0.787156  -0.703192    0.042818 -1.960943 -0.787156
2025-04-22 -0.788363  -0.706253    0.045435 -1.807192 -0.788363
2025-04-23 -0.778858  -0.709824    0.046772 -1.475970 -0.778858
2025-04-24 -0.762068  -0.713152    0.046819 -1.044789 -0.762068
2025-04-25 -0.765610  -0.716598    0.046749 -1.048417 -0.765610

[1307 rows x 5 columns]
Ticker             MCD         YUM
Date                              
2020-01-02  177.814438   92.652519
2020-01-03  177.185684   92.362297
202