In [None]:
# Q1. Load the flight price dataset and examine its dimensions. How many rows and columns does the dataset have?

import pandas as pd

# Load the dataset
df = pd.read_csv("flight_price_data.csv")

# Get the dimensions of the dataset (rows, columns)
rows, cols = df.shape
print(f"The dataset has {rows} rows and {cols} columns.")


In [None]:
# Q2. What is the distribution of flight prices in the dataset? Create a histogram to visualize the distribution.

import matplotlib.pyplot as plt

# Plot the histogram of flight prices
plt.figure(figsize=(10, 6))
plt.hist(df['Price'], bins=30, edgecolor='black', alpha=0.7)
plt.title('Distribution of Flight Prices')
plt.xlabel('Price')
plt.ylabel('Frequency')
plt.show()


In [None]:
# Q3. What is the range of prices in the dataset? What is the minimum and maximum price?

min_price = df['Price'].min()
max_price = df['Price'].max()
price_range = max_price - min_price

print(f"The minimum price is {min_price}, the maximum price is {max_price}, and the price range is {price_range}.")


In [None]:
# Q4. How does the price of flights vary by airline? Create a boxplot to compare the prices of different airlines.

import seaborn as sns

# Create a boxplot of flight prices by airline
plt.figure(figsize=(12, 6))
sns.boxplot(data=df, x='Airline', y='Price')
plt.title('Flight Price Distribution by Airline')
plt.xlabel('Airline')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.show()


In [None]:
# Q5. Are there any outliers in the dataset? Identify any potential outliers using a boxplot and describe how they may impact your analysis.

# Create a boxplot to visually identify outliers
plt.figure(figsize=(10, 6))
sns.boxplot(df['Price'])
plt.title('Boxplot of Flight Prices')
plt.xlabel('Price')
plt.show()

# Get the statistics to detect outliers
Q1 = df['Price'].quantile(0.25)
Q3 = df['Price'].quantile(0.75)
IQR = Q3 - Q1

# Define outlier bounds
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Identify outliers
outliers = df[(df['Price'] < lower_bound) | (df['Price'] > upper_bound)]
print(f"Potential outliers:\n{outliers}")


In [None]:
# Q6. You are working for a travel agency, and your boss has asked you to analyze the Flight Price dataset to identify the peak travel season. What features would you analyze to identify the peak season, and how would you present your findings to your boss?

# Extract month from the date if there's a 'Date' column in the dataset
df['Month'] = pd.to_datetime(df['Date']).dt.month

# Calculate the average price by month
avg_price_by_month = df.groupby('Month')['Price'].mean()

# Plot the average price by month
plt.figure(figsize=(10, 6))
avg_price_by_month.plot(kind='bar', color='skyblue')
plt.title('Average Flight Price by Month')
plt.xlabel('Month')
plt.ylabel('Average Price')
plt.xticks(rotation=0)
plt.show()


In [None]:
# Q7. You are a data analyst for a flight booking website, and you have been asked to analyze the Flight Price dataset to identify any trends in flight prices. What features would you analyze to identify these trends, and what visualizations would you use to present your findings to your team?

# Trend of flight prices by day of the week
df['Day of Week'] = pd.to_datetime(df['Date']).dt.dayofweek
avg_price_by_day = df.groupby('Day of Week')['Price'].mean()

plt.figure(figsize=(10, 6))
avg_price_by_day.plot(kind='line', marker='o', color='green')
plt.title('Average Flight Price by Day of Week')
plt.xlabel('Day of Week')
plt.ylabel('Average Price')
plt.xticks(range(7), ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
plt.show()


In [None]:
# Q8. You are a data scientist working for an airline company, and you have been asked to analyze the Flight Price dataset to identify the factors that affect flight prices. What features would you analyze to identify these factors, and how would you present your findings to the management team?

# Correlation heatmap to visualize relationships between numerical features
corr_matrix = df[['Price', 'Duration', 'Distance', 'Month', 'Day of Week']].corr()

import seaborn as sns
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix for Factors Affecting Flight Prices')
plt.show()
