In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
#PATH="/kaggle/input/airfares-enhanced/Enhanced_Dataset_with_Dynamic_Pricing.csv"
PATH="airfares-enhanced.csv"

In [3]:
import pandas as pd
df=pd.read_csv(PATH)
df.head(5)

Unnamed: 0.1,Unnamed: 0,airline,flight,source_city,departure_time,stops,arrival_time,destination_city,class,duration,...,travel_date,month,day_of_week,is_weekend,seasonality,is_festival,demand_index,competitor_price,seats_left,adjusted_price
0,0,SpiceJet,SG-8709,Delhi,Evening,zero,Night,Mumbai,Economy,2.17,...,2023-01-02,1,0,0,Medium,0,1.0,6004.29,5,8598.78
1,1,SpiceJet,SG-8157,Delhi,Early_Morning,zero,Morning,Mumbai,Economy,2.33,...,2023-01-02,1,0,0,Medium,0,1.0,6057.64,5,8598.78
2,2,AirAsia,I5-764,Delhi,Early_Morning,zero,Early_Morning,Mumbai,Economy,2.17,...,2023-01-02,1,0,0,Medium,0,1.0,6392.52,5,8603.11
3,3,Vistara,UK-995,Delhi,Morning,zero,Afternoon,Mumbai,Economy,2.25,...,2023-01-02,1,0,0,Medium,0,1.0,5908.67,5,8006.17
4,4,Vistara,UK-963,Delhi,Morning,zero,Morning,Mumbai,Economy,2.33,...,2023-01-02,1,0,0,Medium,0,1.0,5623.71,5,8006.17


# Imports

In [4]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import gym
from gym import spaces
from sbx import PPO, TD3, SAC
from scipy.special import expit

# Data Processing

In [5]:
df = pd.read_csv(PATH)
df.fillna(method='ffill', inplace=True)

# Categorical Encoding
categorical_cols = ['flight','airline','source_city','departure_time','stops',
                   'arrival_time','destination_city','class','seasonality']
for col in categorical_cols:
    df[col] = df[col].astype('category').cat.codes

# Handle dates
df['travel_date']=pd.to_datetime(df['travel_date'])
period = 365.25
df['day_sin'] = np.sin(df['travel_date'].dt.day * (2 * np.pi / period))
df['day_cos'] = np.cos(df['travel_date'].dt.day * (2 * np.pi / period))
df['day_of_month'] = df['travel_date'].dt.month
df['week_of_year'] = df['travel_date'].dt.isocalendar().week
df=df.drop("travel_date", axis=1)

target = 'price'

# Normalize features
scaler = MinMaxScaler()
num_features = ['duration','demand_index','competitor_price',
               'seats_left','adjusted_price']
df[num_features] = scaler.fit_transform(df[num_features])

# For the environment, save min and max price for scaling actions
min_price = df[target].min()
max_price = df[target].max()


  df.fillna(method='ffill', inplace=True)


In [6]:
for col in df.columns:
    print(col, df[col].dtype)

Unnamed: 0 int64
airline int8
flight int16
source_city int8
departure_time int8
stops int8
arrival_time int8
destination_city int8
class int8
duration float64
days_left int64
price int64
month int64
day_of_week int64
is_weekend int64
seasonality int8
is_festival int64
demand_index float64
competitor_price float64
seats_left float64
adjusted_price float64
day_sin float64
day_cos float64
day_of_month int32
week_of_year UInt32


# 2. Airfare Prediction Class

In [7]:
class AirfarePricingEnv(gym.Env):
    def __init__(self, data, min_price, max_price):
        super().__init__()
        self.data = data.reset_index(drop=True)
        self.current_idx = 0
        self.min_price = min_price
        self.max_price = max_price
        self.action_space = spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32)  # normalized price
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(data.shape[1] - 1,), dtype=np.float32
        )
        # Example coefficients for the demand model (customize as needed)
        self.beta = np.random.uniform(-1, 1, size=(data.shape[1] - 1))
        self.gamma = -2.0  # price sensitivity

    def reset(self):
        self.current_idx = 0
        return self.data.iloc[self.current_idx, :-1].values.astype(np.float32)

    def step(self, action):
        price = float(action[0]) * (self.max_price - self.min_price) + self.min_price
        X = self.data.iloc[self.current_idx, :-1].values

        # exponent_value = -np.dot(self.beta, X) - self.gamma * price
        # if abs(exponent_value) > 700:
        #     print(f"Large exponent detected: {exponent_value}")
        #     print(f"Beta max/min: {np.max(self.beta)}/{np.min(self.beta)}")
        #     print(f"X max/min: {np.max(X)}/{np.min(X)}")
        #     print(f"Price: {price}")
        
        # Simulate purchase probability (logistic demand model)
        prob_purchase = expit(np.dot(self.beta, X) + self.gamma * price)
        #prob_purchase = 1 / (1 + np.exp(-np.dot(self.beta, X) - self.gamma * price))
        reward = price * prob_purchase
        self.current_idx += 1
        done = self.current_idx >= len(self.data)
        next_state = (
            self.data.iloc[self.current_idx, :-1].values.astype(np.float32)
            if not done
            else np.zeros_like(X, dtype=np.float32)
        )
        return next_state, reward, done, {}


# 3.Metrics Class

In [8]:
class Metrics:
    def __init__(self, model_name):
        self.rewards = []
        self.model_name = model_name
        self.metrics_dir = f'metrics/{model_name}'
        os.makedirs(self.metrics_dir, exist_ok=True)
    
    def log_reward(self, reward):
        self.rewards.append(reward)
    
    def save_and_plot(self):
        rewards = np.array(self.rewards)
        np.save(f'{self.metrics_dir}/rewards.npy', rewards)
        window = min(100, len(rewards))
        moving_avg = np.convolve(rewards, np.ones(window) / window, mode='valid')
        plt.figure(figsize=(10, 5))
        plt.plot(moving_avg)
        plt.title(f'Moving Average Reward - {self.model_name}')
        plt.xlabel('Episode')
        plt.ylabel('Reward')
        plt.tight_layout()
        plt.savefig(f'{self.metrics_dir}/reward_curve.png')
        plt.close()


# 4. Define training

In [9]:
def train_and_evaluate(model_class, model_name, env, total_timesteps=100_000):
    print(f"Training {model_name}...")
    model = model_class(
        'MlpPolicy', 
        env, 
        verbose=1,
        n_steps=512,
        batch_size=64,
        n_epochs=10
    )
    model.learn(total_timesteps=total_timesteps, progress_bar=True)
    model.save(f'{model_name}_airfare')
    print(f"Evaluating {model_name}...")
    metrics = Metrics(model_name)
    obs = env.reset()
    done = False
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = env.step(action)
        metrics.log_reward(reward)
    metrics.save_and_plot()
    print(f"{model_name} metrics saved in metrics/{model_name}/")
    return metrics

from stable_baselines3.common.env_util import make_vec_env

# 5.Execution

In [11]:
os.makedirs('models', exist_ok=True)
# Prepare the environment
#env = AirfarePricingEnv(df, min_price, max_price)
env = make_vec_env(AirfarePricingEnv, n_envs=16, env_kwargs={
    'data': df, 
    'min_price': min_price, 
    'max_price': max_price
})
# Train and evaluate each model
model_classes = {'ppo': PPO, 'td3': TD3, 'sac': SAC}
all_metrics = {}
for name, cls in model_classes.items():
    # Re-instantiate the environment for each model for a fresh start
    env = AirfarePricingEnv(df, min_price, max_price)
    metrics = train_and_evaluate(cls, name, env)
    all_metrics[name] = metrics

# Optionally, plot all moving averages for comparison
plt.figure(figsize=(10, 6))
for name, metrics in all_metrics.items():
    rewards = np.array(metrics.rewards)
    window = min(100, len(rewards))
    moving_avg = np.convolve(rewards, np.ones(window) / window, mode='valid')
    plt.plot(moving_avg, label=name.upper())
plt.title('Moving Average Reward Comparison')
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.legend()
plt.tight_layout()
plt.savefig('metrics/reward_comparison.png')
plt.close()
print("All done! Metrics and models saved.")



Training ppo...
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




-----------------------------
| time/              |      |
|    fps             | 514  |
|    iterations      | 1    |
|    time_elapsed    | 3    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 398         |
|    iterations           | 2           |
|    time_elapsed         | 10          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.013157856 |
|    clip_fraction        | 0.107       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.42       |
|    explained_variance   | -1.31e-05   |
|    learning_rate        | 0.0003      |
|    loss                 | 3.88e+07    |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0183     |
|    std                  | 0.998       |
|    value_loss           | 8.34e+07    |
----------------------------------



Evaluating td3...
td3 metrics saved in metrics/td3/
Training a2c...
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




------------------------------------
| time/                 |          |
|    fps                | 356      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -1.4     |
|    explained_variance | -44.7    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.00222  |
|    std                | 0.985    |
|    value_loss         | 1.06e-05 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 374      |
|    iterations         | 200      |
|    time_elapsed       | 2        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -1.41    |
|    explained_variance | -3.26    |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | -0.0166  |
|