In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# STEP 1: Load your dataset
df = pd.read_csv("Clean_Dataset.csv")  # Replace with your actual file name

# STEP 2: Drop rows where 'price' is missing
df = df.dropna(subset=['price'])

# STEP 3: One-hot encode categorical features
df_encoded = pd.get_dummies(df, drop_first=True)

# STEP 4: Separate features and target
X = df_encoded.drop(columns='price')
y = df_encoded['price']

# STEP 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# STEP 6: Train the model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# STEP 7: Evaluate the model
y_pred = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R²:", r2_score(y_test, y_pred))

# STEP 8: Predict on a real sample input
sample_input = X_test.iloc[0:1]
predicted_price = model.predict(sample_input)[0]
print(f"\n✅ Predicted flight price: ₹{predicted_price:,.2f}")


MAE: 479.567903345725
MSE: 1523491.1659101115
R²: 0.9130988760613329

✅ Predicted flight price: ₹10,681.16


In [10]:
# STEP 1: Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

# STEP 2: Load and prepare data
df = pd.read_csv('Clean_Dataset.csv')
df.drop(columns=['Unnamed: 0'], inplace=True, errors='ignore')
df.dropna(inplace=True)  # Ensure no NaNs

# STEP 3: Encode categorical features
df_encoded = pd.get_dummies(df, drop_first=True)

# STEP 4: Split into features and target
X = df_encoded.drop(columns='price')
y = df_encoded['price']

# STEP 5: Scale the target
y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(y.values.reshape(-1, 1)).ravel()

# STEP 6: Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_scaled, test_size=0.2, random_state=42)

# STEP 7: Train model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# STEP 8: Evaluate
y_pred = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R²:", r2_score(y_test, y_pred))

# STEP 9: Save model, features, and y_scaler
joblib.dump(model, 'best_random_forest_model.pkl')
joblib.dump(y_scaler, 'y_scaler.pkl')
joblib.dump(X.columns.tolist(), 'model_features.pkl')  # Ensure prediction matches feature order

print("✅ Model, scaler, and features saved successfully.")


MAE: 0.15649433129255028
MSE: 0.14563837372540642
R²: 0.8594452267436237
✅ Model, scaler, and features saved successfully.
