# 🏎️ Elite Sports Cars Advanced Analysis

This notebook contains advanced analysis of the Elite Sports Cars dataset, including:
- Statistical Analysis
- Feature Engineering
- Market Segmentation
- Price Prediction Models
- Performance Metrics

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import plotly.express as px
import plotly.graph_objects as go

# Set style
plt.style.use('seaborn')
sns.set_palette('husl')

In [None]:
# Load the dataset
df = pd.read_csv('../data/Elite Sports Cars in Data.csv')
df.head()

## 1. Statistical Analysis

In [None]:
# Basic statistics
print("Dataset Shape:", df.shape)
print("\nBasic Statistics:")
df.describe()

## 2. Feature Engineering

In [None]:
# Calculate performance metrics
df['Power_to_Weight'] = df['Horsepower'] / df['Weight']
df['Price_per_HP'] = df['Price'] / df['Horsepower']
df['Performance_Score'] = (df['Horsepower'] + df['Top_Speed']) / df['Price'] * 1000

## 3. Market Segmentation

In [None]:
# Perform market segmentation
from sklearn.cluster import KMeans

# Select features for clustering
features = ['Price', 'Horsepower', 'Top_Speed']
X = df[features]

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply KMeans clustering
kmeans = KMeans(n_clusters=4, random_state=42)
df['Segment'] = kmeans.fit_predict(X_scaled)

# Visualize segments
fig = px.scatter_3d(df, x='Price', y='Horsepower', z='Top_Speed',
                    color='Segment', hover_data=['Brand', 'Model'])
fig.show()

## 4. Price Prediction Model

In [None]:
# Prepare data for price prediction
features = ['Horsepower', 'Torque', 'Engine_Size', 'Top_Speed', 'Acceleration_0_100']
X = df[features]
y = df['Price']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Feature importance
importance = pd.DataFrame({
    'Feature': features,
    'Importance': model.feature_importances_
}).sort_values('Importance', ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(data=importance, x='Importance', y='Feature')
plt.title('Feature Importance for Price Prediction')
plt.show()