IMPORT REQUIRED LIBRARIES

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

-REMOVE WARNINGS

In [20]:
# REMOVE WARNINGS
import warnings
warnings.filterwarnings('ignore')

READ THE DATA USING PANDAS

In [4]:
df=pd.read_csv('BMW_Car_Sales_Classification.csv')
print(df.head())

      Model  Year         Region  Color Fuel_Type Transmission  Engine_Size_L  \
0  5 Series  2016           Asia    Red    Petrol       Manual            3.5   
1        i8  2013  North America    Red    Hybrid    Automatic            1.6   
2  5 Series  2022  North America   Blue    Petrol    Automatic            4.5   
3        X3  2024    Middle East   Blue    Petrol    Automatic            1.7   
4  7 Series  2020  South America  Black    Diesel       Manual            2.1   

   Mileage_KM  Price_USD  Sales_Volume Sales_Classification  
0      151748      98740          8300                 High  
1      121671      79219          3428                  Low  
2       10991     113265          6994                  Low  
3       27255      60971          4047                  Low  
4      122131      49898          3080                  Low  


In [5]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Model                 50000 non-null  object 
 1   Year                  50000 non-null  int64  
 2   Region                50000 non-null  object 
 3   Color                 50000 non-null  object 
 4   Fuel_Type             50000 non-null  object 
 5   Transmission          50000 non-null  object 
 6   Engine_Size_L         50000 non-null  float64
 7   Mileage_KM            50000 non-null  int64  
 8   Price_USD             50000 non-null  int64  
 9   Sales_Volume          50000 non-null  int64  
 10  Sales_Classification  50000 non-null  object 
dtypes: float64(1), int64(4), object(6)
memory usage: 4.2+ MB
None


In [6]:
fig=px.sunburst(df,path=["Fuel_Type","Transmission","Color"],values="Price_USD",title="Fuel Type and Transmission")
fig.show()

In [7]:
df_mean=df.groupby("Engine_Size_L")["Mileage_KM"].mean().reset_index()
fig=px.bar(df_mean,x="Engine_Size_L",y="Mileage_KM",title="MILEAGE VS ENGINE SIZE",labels={"Mileage_KM":"Mileage per km ", "Engine_Size_L":"Engine Size Per L"})
fig.show()

In [8]:
df=df.sample(frac=0.1,random_state=10).reset_index(drop=True)
print(df.shape)

(5000, 11)


In [9]:
df.head(10)

Unnamed: 0,Model,Year,Region,Color,Fuel_Type,Transmission,Engine_Size_L,Mileage_KM,Price_USD,Sales_Volume,Sales_Classification
0,7 Series,2012,Africa,Silver,Petrol,Automatic,2.8,166334,113412,8298,High
1,M3,2016,Europe,Blue,Petrol,Automatic,3.7,77039,118570,941,Low
2,M3,2010,North America,Blue,Petrol,Manual,3.3,55432,111237,4339,Low
3,M5,2024,North America,Black,Petrol,Manual,3.1,101064,96471,962,Low
4,M3,2021,Asia,Silver,Petrol,Automatic,3.4,128077,109564,4924,Low
5,i8,2015,South America,White,Diesel,Manual,3.0,180573,43925,9293,High
6,X6,2014,Asia,Blue,Hybrid,Automatic,3.6,23068,98979,3672,Low
7,7 Series,2024,Africa,Red,Petrol,Automatic,4.7,8413,86558,9109,High
8,M5,2012,Middle East,White,Electric,Automatic,2.7,135948,80741,1383,Low
9,i8,2020,Asia,Silver,Hybrid,Manual,3.5,80738,51179,4950,Low


In [10]:
fig=px.histogram(df,x="Price_USD",color="Model",nbins=200,title="PRICE DISTRIBUTION IN $")
fig.show()

In [11]:
# Count of each Model
df_model_count = df['Model'].value_counts().reset_index()
df_model_count.columns = ['Model', 'Count']
# Create bar chart
fig = px.bar(df_model_count, x='Model', y='Count',
             title='Number of Cars per BMW Model',
             text='Count',color="Model",)
fig.update_layout(xaxis_tickangle=-45)
fig.show()

In [12]:
top_models = df['Model'].value_counts().nlargest(5).index
df_top_models = df[df['Model'].isin(top_models)]

fig = px.histogram(
    df_top_models,
    x="Price_USD",
    color="Model",
    nbins=200,
    title="Price Distribution in USD (Top 5 BMW Models)",
    opacity=0.7
)
fig.show()

In [13]:
df_model_sales = df.groupby("Model")["Sales_Volume"].sum().reset_index()
df_model_sales = df_model_sales.sort_values(by="Sales_Volume", ascending=False).head(10)
fig = px.bar(
    df_model_sales,
    x="Model",
    y="Sales_Volume",
    title="Top 10 Best-Selling BMW Models",
    text="Sales_Volume",
    color="Model"
)
fig.show()

In [15]:
#PARALLEL CORRDINATES PLOT
fig=px.parallel_coordinates(df,color="Price_USD",title="PARALLEL PLOT",
                              #dimensions=["carat","depth"] choose columns
                              )
fig.show()

In [21]:
# TOP SELLING COLORS
df_colors=df.groupby("Color")["Sales_Volume"].sum().reset_index()
df_colors = df_colors.sort_values(by="Sales_Volume", ascending=False).head(10)
fig = px.bar(
    df_colors,
    x="Color",
    y="Sales_Volume",
    title="Top 10 Best-Selling BMW Models",
    text="Sales_Volume",
    color="Color",
)
fig.show()

In [19]:
# MOST BMW SELLS IN WHICH REGION
df_region=df.groupby("Region")["Sales_Volume"].sum().reset_index()
df_region = df_region.sort_values(by="Sales_Volume", ascending=False)

fig = px.pie(
    df_region,
    names="Region",
    values="Sales_Volume",
    title="BMW Sales Distribution by Region",
    hole=0.4  # donut-style
)
fig.show()

In [17]:
df_region = df.groupby("Region")["Sales_Volume"].sum().reset_index()
region_to_country = {
    "Asia": "India",
    "Europe": "Germany",
    "North America": "United States",
    "South America": "Brazil",
    "Middle East": "United Arab Emirates",
    "Africa": "South Africa",
    "Oceania": "Australia"
}
df_region["Country"] = df_region["Region"].map(region_to_country)
fig = px.choropleth(
    df_region,
    locations="Country",
    locationmode='country names',
    color="Sales_Volume",
    hover_name="Country",
    color_continuous_scale=px.colors.sequential.Plasma,
    title="BMW Sales Volume by Region"
)
fig.update_layout(
    geo=dict(showframe=False, showcoastlines=True, projection_type='equirectangular'),
    coloraxis_colorbar=dict(title="Sales Volume")
)
fig.show()

In [18]:
#Average Price by Fuel Type
df_price=df.groupby("Fuel_Type")["Price_USD"].sum().reset_index()
df_price = df_price.sort_values(by="Price_USD", ascending=False).reset_index()
fig = px.bar(df_price, x="Fuel_Type",y="Price_USD", color="Price_USD", title=" Price Distribution by Fuel Type")
fig.show()