# 📊 BOKEH - Visualisasi Interaktif Developer Survey 2023 - Visualisasi Data

## 📄 Informasi Kelompok

**Nama Anggota Kelompok:**
- Okky Rangga Pratama - NIM: 1203220011
- Bagus Christiannanta - NIM: 1203220161  
- Dimas Arifil Azizi - NIM: 1203220097

**Fakultas:** Informatika  
**Program Studi:** Informatika  
**Kelas:** IF-02-01  
**Angkatan:** 2022  
  
**Sumber Dataset:** [Stack Overflow Developer Survey](https://survey.stackoverflow.co/)

---

### Impor library, load dan preprocessing data

In [1]:
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, HoverTool, Tabs, TabPanel, NumeralTickFormatter
from collections import Counter

output_notebook()

df = pd.read_csv('survey_results_public_2023.csv')
df = df[df['ConvertedCompYearly'] < 500000]
df = df[df['YearsCodePro'].apply(lambda x: str(x).isdigit())]
df['YearsCodePro'] = df['YearsCodePro'].astype(int)
df = df[['Country', 'YearsCodePro', 'RemoteWork', 'LanguageHaveWorkedWith', 'DevType', 'ConvertedCompYearly']].dropna()

bins = [0, 5, 10, 15, 20, 25, 30, 50]
labels = ['0-5', '6-10', '11-15', '16-20', '21-25', '26-30', '30+']
df['exp_group'] = pd.cut(df['YearsCodePro'], bins=bins, labels=labels, right=True)

top_countries = [
    "United States of America", "Germany", "Canada",
    "France", "United Kingdom of Great Britain and Northern Ireland", "India"
]

default_devtype = "Semua"

def filter_by_devtype(data, devtype):
    if devtype == "Semua":
        return data
    return data[data['DevType'].str.contains(devtype, na=False)]


### Function Visualisasi 1

In [2]:
def make_remote_tabs():
    tabs = []
    for country in top_countries:
        data = filter_by_devtype(df[df["Country"] == country], default_devtype)
        counts = data["RemoteWork"].value_counts()
        src = ColumnDataSource(data=dict(x=counts.index.tolist(), y=counts.values))
        p = figure(height=400, width=700, title=f"Preferensi Kerja di {country}", x_range=counts.index.tolist())
        p.vbar(x='x', top='y', source=src, width=0.5)
        p.add_tools(HoverTool(tooltips=[("Mode", "@x"), ("Jumlah", "@y")]))
        tabs.append(TabPanel(child=p, title=country))
    return Tabs(tabs=tabs)


### Function Visualisasi 2

In [3]:
def make_language_tabs(pref_value):
    tabs = []
    for country in top_countries:
        data = filter_by_devtype(df[(df["Country"] == country) & (df["RemoteWork"] == pref_value)], default_devtype)
        langs = data["LanguageHaveWorkedWith"].str.split(";")
        flat = [l.strip() for sublist in langs.dropna() for l in sublist]
        counts = Counter(flat)

        if not counts:
            p = figure(height=400, width=700, title=f"Tidak ada data untuk {{country}} ({{pref_value}})")
            tabs.append(TabPanel(child=p, title=country))
            continue

        top_langs = dict(sorted(counts.items(), key=lambda item: item[1], reverse=True)[:10])
        src = ColumnDataSource(data=dict(x=list(top_langs.keys()), y=list(top_langs.values())))
        p = figure(height=400, width=700, title=f"10 Bahasa Terpopuler di {{country}} ({{pref_value}})", x_range=list(top_langs.keys()))
        p.vbar(x='x', top='y', source=src, width=0.5)
        p.xaxis.major_label_orientation = 1.2
        p.add_tools(HoverTool(tooltips=[("Bahasa", "@x"), ("Jumlah", "@y")]))
        tabs.append(TabPanel(child=p, title=country))
    return Tabs(tabs=tabs)


### Function Visualisasi 3

In [4]:
def make_scatter_tabs():
    tabs = []
    for country in top_countries:
        data = filter_by_devtype(df[df["Country"] == country], default_devtype)
        sample = data.sample(min(1000, len(data)), random_state=42)
        src = ColumnDataSource(sample[["YearsCodePro", "ConvertedCompYearly"]])
        p = figure(height=400, width=700, title=f"Gaji vs Pengalaman di {country}", 
                   x_axis_label="Tahun Pengalaman", y_axis_label="Gaji")
        p.circle('YearsCodePro', 'ConvertedCompYearly', source=src, size=5, alpha=0.3)
        p.add_tools(HoverTool(tooltips=[("Pengalaman", "@YearsCodePro tahun"), ("Gaji", "@ConvertedCompYearly{$0,0}")]))
        tabs.append(TabPanel(child=p, title=country))
    return Tabs(tabs=tabs)


### Function Visualisasi 4

In [5]:
def make_lineplot_tabs():
    tabs = []
    for country in top_countries:
        df_country = df[df['Country'] == country]
        if df_country.empty:
            continue

        pivot = df_country.pivot_table(index='exp_group', columns='RemoteWork', aggfunc='size', fill_value=0)
        pivot_pct = pivot.div(pivot.sum(axis=1), axis=0).reset_index()

        data = dict(
            exp_group=pivot_pct['exp_group'].astype(str).tolist(),
            hybrid=pivot_pct.get("Hybrid (some remote, some in-person)", [0]*len(pivot_pct)).tolist(),
            in_person=pivot_pct.get("In-person", [0]*len(pivot_pct)).tolist(),
            remote=pivot_pct.get("Remote", [0]*len(pivot_pct)).tolist()
        )
        source = ColumnDataSource(data=data)

        p = figure(title=f"Preferensi Kerja vs Pengalaman ({country})",
                   x_range=data['exp_group'], height=400, width=800,
                   x_axis_label="Kelompok Tahun Pengalaman", y_axis_label="Proporsi")

        p.line(x='exp_group', y='hybrid', source=source, line_width=2, color='orange', legend_label='Hybrid')
        p.line(x='exp_group', y='in_person', source=source, line_width=2, color='red', legend_label='In-person')
        p.line(x='exp_group', y='remote', source=source, line_width=2, color='green', legend_label='Remote')

        p.circle(x='exp_group', y='hybrid', source=source, size=6, color='orange')
        p.circle(x='exp_group', y='in_person', source=source, size=6, color='red')
        p.circle(x='exp_group', y='remote', source=source, size=6, color='green')

        hover = HoverTool(tooltips=[
            ("Tahun Pengalaman", "@exp_group"),
            ("Remote", "@remote{0.0%}"),
            ("Hybrid", "@hybrid{0.0%}"),
            ("In-person", "@in_person{0.0%}")
        ])
        p.add_tools(hover)
        p.yaxis.formatter = NumeralTickFormatter(format="0%")
        p.legend.location = "top_left"
        p.legend.click_policy = "hide"

        tab = TabPanel(child=p, title=country)
        tabs.append(tab)

    return Tabs(tabs=tabs)

## 📌 Visualisasi 1: Jumlah Programmer Berdasarkan Preferensi Kerja

In [6]:
remote_tabs = make_remote_tabs()
show(remote_tabs)

## 📌 Visualisasi 2: Bahasa Pemrograman Terpopuler Berdasarkan Preferensi Kerja

In [7]:
lang_remote_tabs = make_language_tabs('Remote')
show(lang_remote_tabs)

In [8]:
lang_hybrid_tabs = make_language_tabs('Hybrid (some remote, some in-person)')
show(lang_hybrid_tabs)

In [9]:
lang_inperson_tabs = make_language_tabs('In-person')
show(lang_inperson_tabs)

## 📌 Visualisasi 3: Keterkaitan Gaji Dengan Pengalaman Kerja Programmer

In [10]:
scatter_tabs = make_scatter_tabs()
show(scatter_tabs)



## 📌 Visualisasi 4: Preferensi Kerja Berdasarkan Tahun Pengalaman

In [11]:
lineplot_tabs = make_lineplot_tabs()
show(lineplot_tabs)


  pivot = df_country.pivot_table(index='exp_group', columns='RemoteWork', aggfunc='size', fill_value=0)
  pivot = df_country.pivot_table(index='exp_group', columns='RemoteWork', aggfunc='size', fill_value=0)
  pivot = df_country.pivot_table(index='exp_group', columns='RemoteWork', aggfunc='size', fill_value=0)
  pivot = df_country.pivot_table(index='exp_group', columns='RemoteWork', aggfunc='size', fill_value=0)
  pivot = df_country.pivot_table(index='exp_group', columns='RemoteWork', aggfunc='size', fill_value=0)
  pivot = df_country.pivot_table(index='exp_group', columns='RemoteWork', aggfunc='size', fill_value=0)
