Tugas Modul : Business Intelligence

Dataset : Auto mpg

In [None]:
#Import Library
import pandas as pd
import hvplot.pandas
import panel as pn
import zipfile
import os


In [None]:
#Load Dataset
zip_path = "auto+mpg.zip"
extract_folder = "auto_mpg_data"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)

os.listdir(extract_folder)

['Index', 'auto-mpg.names', 'auto-mpg.data-original', 'auto-mpg.data']

In [None]:
data_path = os.path.join(extract_folder, "auto-mpg.data")
columns = ['mpg','cylinders','displacement','horsepower','weight',
           'acceleration','model_year','origin','car_name']

In [None]:
df = pd.read_csv(data_path, delim_whitespace=True, names=columns, na_values='?')

  df = pd.read_csv(data_path, delim_whitespace=True, names=columns, na_values='?')


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    392 non-null    float64
 4   weight        398 non-null    float64
 5   acceleration  398 non-null    float64
 6   model_year    398 non-null    int64  
 7   origin        398 non-null    int64  
 8   car_name      398 non-null    object 
dtypes: float64(5), int64(3), object(1)
memory usage: 28.1+ KB


In [None]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,car_name
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino


# **Pertanyaan Analisis**
1.	Bagaimana distribusi nilai mpg di seluruh mobil?
2.	Apakah ada perbedaan rata-rata mpg berdasarkan silinder?
3.	Bagaimana hubungan antara berat dan mpg?
4.	Bagaimana tren rata-rata mpg dari tahun ke tahun?


## **Data Pre Processing**


In [None]:
df.dropna(inplace=True)

In [None]:
df['origin'] = df['origin'].replace({1:'USA', 2:'Europe', 3:'Japan'})
df['horsepower'] = df['horsepower'].astype(float)

# **Dashboard Visualisasi**

In [None]:
# Widget interaktif
cylinder_select = pn.widgets.IntSlider(name='Jumlah Silinder', start=3, end=8, step=1, value=4)
year_slider = pn.widgets.IntSlider(name='Tahun Model', start=70, end=82, step=1, value=75)

In [None]:
# Plot 1: Distribusi mpg
def plot_mpg_distribution():
    return df.hvplot.hist('mpg', bins=20, title="Distribusi Nilai MPG", color='skyblue')


In [None]:
# Distribusi nilai MPG
df.hvplot.hist(
    'mpg',
    bins=20,
    title="Distribusi Nilai MPG",
    color='skyblue',
    width=600,
    height=400
)


In [None]:
# Plot 2: Rata-rata mpg per jumlah silinder
def plot_avg_mpg_by_cylinder():
    avg_mpg = df.groupby('cylinders')['mpg'].mean().reset_index()
    return avg_mpg.hvplot.bar(x='cylinders', y='mpg', title="Rata-Rata MPG berdasarkan Jumlah Silinder", color='orange')


In [None]:
avg_mpg = df.groupby('cylinders')['mpg'].mean().reset_index()

avg_mpg.hvplot.bar(
    x='cylinders',
    y='mpg',
    title="Rata-Rata MPG berdasarkan Jumlah Silinder",
    color='orange',
    width=600,
    height=400
)


In [None]:
# Plot 3: Hubungan weight vs mpg
def plot_weight_vs_mpg():
    return df.hvplot.scatter(x='weight', y='mpg', color='cylinders', cmap='viridis',
                             title="Hubungan antara Berat Mobil dan MPG", hover_cols=['car_name'])


In [None]:
df.hvplot.scatter(
    x='weight',
    y='mpg',
    color='cylinders',
    cmap='viridis',
    hover_cols=['car_name'],
    title="Hubungan antara Berat Mobil dan MPG",
    width=600,
    height=400
)


In [None]:
# Plot 4: Tren rata-rata mpg dari tahun ke tahun
def plot_mpg_trend():
    avg_year = df.groupby('model_year')['mpg'].mean().reset_index()
    return avg_year.hvplot.line(x='model_year', y='mpg', title="Tren Rata-Rata MPG per Tahun", color='green')


In [None]:
avg_year.hvplot.line(
    x='model_year',
    y='mpg',
    title="Tren Rata-Rata MPG per Tahun",
    color='green',
    width=600,
    height=400
)


# **Dashboard Kombinasi**

In [None]:
dashboard = pn.Column(
    pn.pane.Markdown("## 🚗 Dashboard Analisis Auto MPG\nMenjawab pertanyaan eksplorasi data dengan visualisasi interaktif."),
    pn.Row(cylinder_select, year_slider),
    pn.Tabs(
        ("Distribusi MPG", plot_mpg_distribution),
        ("Rata-rata per Silinder", plot_avg_mpg_by_cylinder),
        ("Berat vs MPG", plot_weight_vs_mpg),
        ("Tren per Tahun", plot_mpg_trend)
    )
)

dashboard.servable()