# Aircraft Price Analysis & Prediction Dataset

## Frame the Problem and look at the big picture

### About Dataset

A dataset for aircraft price prediction

model_name: Name of the aircraft model.

engine_type: Type of the engine.

engine_power: Power of the engine. (hp or lbs)

max_speed: Maximum speed of the aircraft. (Knots)

cruise_speed: Cruise speed of the aircraft. (Knots)

stall_speed: Minimum speed of the aircraft to prevent stalling. (Knots)

fuel_tank: Fuel tank capacity of the aircraft. (gal)

all_eng_roc: All Engine Rate of Climb. The maximum altitude of the aircraft at full power. (feet)

out_eng_roc: Out Engine Rate of Climb. The maximum altitude of the aircraft at out power. (feet)

takeoff_distance: The minimum distance required for an aircraft to take off. (feet)

landing_distance: The minimum distance required for an aircraft to landing. (feet)

empty_weight: Empty weight of the aircraft. (lbs)

length: Length of the aircraft. (inch)

wing_span: Wing span of the aircraft. (inch)

range: Range of the aircraft. (nmi)

price: Price of the aircraft.

## Import Libraries, Get Data and Split Data 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import mean_squared_error
import joblib

In [None]:
aircraft = pd.read_csv('aircraft_price.csv')

In [None]:
aircraft.head()

## Explore the Data to Get Insights

In [None]:
aircraft.size

In [None]:
aircraft.shape

In [None]:
aircraft.describe()

In [None]:
aircraft.info()

In [None]:
aircraft['model_name'].value_counts()

In [None]:
aircraft['engine_type'].value_counts()

In [None]:
X = aircraft.drop('price', axis = 1)
y = aircraft['price']

X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [None]:
explore_df          = X_train_full.copy()
explore_df['price'] = y_train_full

In [None]:
## Visualize Numerical Features

plt.rc('font', size = 14)
plt.rc('axes', labelsize = 14, titlesize = 14)
plt.rc('legend', fontsize = 14)
plt.rc('xtick', labelsize = 14)
plt.rc('ytick', labelsize = 14)

explore_df.hist(bins = 50, figsize = (12, 12))
plt.show()

In [None]:
## Visualize Categorical Feature

plt.figure(figsize = (6, 4))
sns.countplot(x = 'engine_type', data = explore_df, order = explore_df['engine_type'].value_counts().index)
plt.title('Distribution of Engine Type')
plt.show()

In [None]:
## Looking for Correlations

plt.figure(figsize = (14, 8))
corr_matrix = explore_df.corr(numeric_only = True)

sns.heatmap(corr_matrix, cmap = 'YlGnBu', annot = True)

plt.show()

In [None]:
corr_matrix['price'].sort_values(ascending = False)

In [None]:
from pandas.plotting import scatter_matrix

attributes = ['cruise_speed', 'max_speed', 'length', 'stall_speed', 'takeoff_distance']
scatter_matrix(explore_df[attributes], figsize = (12, 8))
plt.show()

In [None]:
explore_df.plot(kind = 'scatter', x = 'cruise_speed', y = 'price', alpha = 0.5, grid = True)
plt.show()

In [None]:
explore_df.columns

In [None]:
## Compute Again Correlation Matrix
corr_matrix = explore_df.corr(numeric_only = True)

In [None]:
corr_matrix['price'].sort_values(ascending = False)

In [None]:
from sklearn import set_config
set_config(display = 'diagram')

In [None]:
num_pipeline = make_pipeline(
    SimpleImputer(strategy = 'median'),
    StandardScaler()
)

In [None]:
num_pipeline

In [None]:
cat_pipeline = make_pipeline(
    SimpleImputer(strategy = 'most_frequent'),
    OneHotEncoder(handle_unknown = 'ignore')
)

In [None]:
cat_pipeline

In [None]:
preprocessing = make_column_transformer(
    (num_pipeline, make_column_selector(dtype_include = np.number)),
    (cat_pipeline, make_column_selector(dtype_include = object)),
)

In [None]:
aircraft        = X_train_full.drop('price', axis = 1)
aircraft_prices = y_train_full.copy()