**Customer Travel Satisfaction -- Big Data Project**

Extracting the Dataset

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
# Load the dataset
travel_pro = pd.read_csv('Train.csv')

# Convert date columns to datetime (keep original columns)
travel_pro['Departure Date'] = pd.to_datetime(travel_pro['Departure Date'], dayfirst=True)
travel_pro['Return Date'] = pd.to_datetime(travel_pro['Return Date'], dayfirst=True)

# Define seasons by months
season_months = {
    'Winter': [12, 1, 2],
    'Spring': [3, 4, 5],
    'Summer': [6, 7, 8],
    'Autumn': [9, 10, 11]
}

# Function to check if any date in the trip overlaps with season months
def overlaps_season(start_date, end_date, season_mths):
    # Generate all months between start and end
    trip_range = pd.date_range(start=start_date, end=end_date)
    trip_months = trip_range.month
    return int(any(month in season_mths for month in trip_months))

# Create seasonal columns
for season, months in season_months.items():
    travel_pro[season] = travel_pro.apply(
        lambda row: overlaps_season(row['Departure Date'], row['Return Date'], months),
        axis=1
    )

# Preview the result
travel_pro.head(20)

Unnamed: 0,Trip ID,User ID,Destination,Departure Date,Return Date,Travel Type,Transportation Mode,Hotel Rating,Total Cost ($),Customer_Satisfaction,Winter,Spring,Summer,Autumn
0,TRIP001,USER6303,Sydney,2024-07-28,2024-09-14,Cultural,Bus,4,2297.38,1,0,0,1,1
1,TRIP002,USER1398,Tokyo,2025-01-04,2025-02-20,Leisure,Flight,3,4171.03,1,1,0,0,0
2,TRIP003,USER8452,New York,2024-09-14,2025-03-06,Business,Cruise,4,695.11,3,1,1,0,1
3,TRIP004,USER5289,Sydney,2024-07-06,2024-08-14,Adventure,Flight,2,1371.81,3,0,0,1,0
4,TRIP005,USER9783,New York,2024-08-26,2024-11-25,Cultural,Bus,2,691.52,7,0,0,1,1
5,TRIP006,USER1457,Sydney,2024-04-27,2024-08-11,Beach,Train,2,2651.58,9,0,1,1,0
6,TRIP007,USER4699,Bangkok,2024-04-22,2024-11-09,Family,Train,1,4744.11,10,0,1,1,1
7,TRIP008,USER8584,Paris,2025-02-17,2025-04-10,Beach,Car,1,2926.97,3,1,1,0,0
8,TRIP009,USER5911,Dubai,2024-12-28,2025-02-03,Adventure,Train,3,2872.18,1,1,0,0,0
9,TRIP010,USER4150,Paris,2025-02-01,2025-02-23,Business,Flight,2,3524.44,4,1,0,0,0


In [None]:
# added one-hot encoding for season combinations
def season_combinations(season_row):
    season_combos = [season.lower() for season in season_months if season_row[season]==1]
    return '-'.join(sorted(season_combos))

travel_pro['season_combination'] = travel_pro.apply(season_combinations, axis=1)

season_combination_dummies = pd.get_dummies(travel_pro['season_combination'], prefix='sc').astype(int)
travel_pro_2 = pd.concat([travel_pro, season_combination_dummies], axis=1)

travel_pro_2.head(10)

Unnamed: 0,Trip ID,User ID,Destination,Departure Date,Return Date,Travel Type,Transportation Mode,Hotel Rating,Total Cost ($),Customer_Satisfaction,...,sc_autumn-spring-summer-winter,sc_autumn-spring-winter,sc_autumn-summer,sc_autumn-summer-winter,sc_autumn-winter,sc_spring,sc_spring-summer,sc_spring-winter,sc_summer,sc_winter
0,TRIP001,USER6303,Sydney,2024-07-28,2024-09-14,Cultural,Bus,4,2297.38,1,...,0,0,1,0,0,0,0,0,0,0
1,TRIP002,USER1398,Tokyo,2025-01-04,2025-02-20,Leisure,Flight,3,4171.03,1,...,0,0,0,0,0,0,0,0,0,1
2,TRIP003,USER8452,New York,2024-09-14,2025-03-06,Business,Cruise,4,695.11,3,...,0,1,0,0,0,0,0,0,0,0
3,TRIP004,USER5289,Sydney,2024-07-06,2024-08-14,Adventure,Flight,2,1371.81,3,...,0,0,0,0,0,0,0,0,1,0
4,TRIP005,USER9783,New York,2024-08-26,2024-11-25,Cultural,Bus,2,691.52,7,...,0,0,1,0,0,0,0,0,0,0
5,TRIP006,USER1457,Sydney,2024-04-27,2024-08-11,Beach,Train,2,2651.58,9,...,0,0,0,0,0,0,1,0,0,0
6,TRIP007,USER4699,Bangkok,2024-04-22,2024-11-09,Family,Train,1,4744.11,10,...,0,0,0,0,0,0,0,0,0,0
7,TRIP008,USER8584,Paris,2025-02-17,2025-04-10,Beach,Car,1,2926.97,3,...,0,0,0,0,0,0,0,1,0,0
8,TRIP009,USER5911,Dubai,2024-12-28,2025-02-03,Adventure,Train,3,2872.18,1,...,0,0,0,0,0,0,0,0,0,1
9,TRIP010,USER4150,Paris,2025-02-01,2025-02-23,Business,Flight,2,3524.44,4,...,0,0,0,0,0,0,0,0,0,1


In [None]:
#  added a new col saying the customer is satisfied or not satisfied (0-4, 5-10) 
def customer_sat_summary(row):

    if row["Customer_Satisfaction"] < 5:
        return 0
    else:
        return 1

travel_pro_2['cust_sat_summary'] = travel_pro_2.apply(customer_sat_summary, axis=1)

travel_pro_2.head(10)

Unnamed: 0,Trip ID,User ID,Destination,Departure Date,Return Date,Travel Type,Transportation Mode,Hotel Rating,Total Cost ($),Customer_Satisfaction,...,sc_autumn-summer-winter,sc_autumn-winter,sc_spring,sc_spring-summer,sc_spring-winter,sc_summer,sc_winter,cs_summaries,cust_sat_summaries,cust_sat_summary
0,TRIP001,USER6303,Sydney,2024-07-28,2024-09-14,Cultural,Bus,4,2297.38,1,...,0,0,0,0,0,0,0,0,0,0
1,TRIP002,USER1398,Tokyo,2025-01-04,2025-02-20,Leisure,Flight,3,4171.03,1,...,0,0,0,0,0,0,1,0,0,0
2,TRIP003,USER8452,New York,2024-09-14,2025-03-06,Business,Cruise,4,695.11,3,...,0,0,0,0,0,0,0,0,0,0
3,TRIP004,USER5289,Sydney,2024-07-06,2024-08-14,Adventure,Flight,2,1371.81,3,...,0,0,0,0,0,1,0,0,0,0
4,TRIP005,USER9783,New York,2024-08-26,2024-11-25,Cultural,Bus,2,691.52,7,...,0,0,0,0,0,0,0,1,1,1
5,TRIP006,USER1457,Sydney,2024-04-27,2024-08-11,Beach,Train,2,2651.58,9,...,0,0,0,1,0,0,0,1,1,1
6,TRIP007,USER4699,Bangkok,2024-04-22,2024-11-09,Family,Train,1,4744.11,10,...,0,0,0,0,0,0,0,1,1,1
7,TRIP008,USER8584,Paris,2025-02-17,2025-04-10,Beach,Car,1,2926.97,3,...,0,0,0,0,1,0,0,0,0,0
8,TRIP009,USER5911,Dubai,2024-12-28,2025-02-03,Adventure,Train,3,2872.18,1,...,0,0,0,0,0,0,1,0,0,0
9,TRIP010,USER4150,Paris,2025-02-01,2025-02-23,Business,Flight,2,3524.44,4,...,0,0,0,0,0,0,1,0,0,0
