# BÀI TẬP ipywidgets

## Đề Bài

Luyện tập: áp dụng interactive widgets cho phân tích EDA dữ liệu chuyến bay.

## Script

### Using

In [35]:
import pandas as pd
import ipywidgets as widgets
import IPython.display as disp
import matplotlib.pyplot as plt

### Contract Fields

In [36]:
CN_MINUTE = 'minute'
CN_HOUR = 'hour'
CN_DAY = 'day'
CN_MONTH = 'month'
CN_YEAR = 'year'
CN_DEP_TIME = 'dep_time'
CN_DEP_DELAY = 'dep_delay'
CN_ARR_TIME = 'arr_time'
CN_ARR_DELAY = 'arr_delay'
CN_CARRIER = 'carrier'
CN_TAILNUM = 'tailnum'
CN_FLIGHT = 'flight'
CN_ORIGIN = 'origin'
CN_DEST = 'dest'
CN_AIR_TIME = 'air_time'
CN_DISTANCE = 'distance'

### Contract Methods

In [37]:
# Refactor
def refactor(s):
    return s.str.strip().str.upper()

In [38]:
# Try parse to int
def try_prs_int(s):
    return pd.to_numeric(s, errors='coerce').astype('Int64')

In [39]:
# Format header
def fmt_hdr(col_name):
    return col_name.replace('_', ' ').title()

In [40]:
# Format headers
def fmt_hdrs(cols):
    return cols.str.replace('_', ' ').str.title()

In [41]:
# Group count
def grp_cnt(df, grp_col_name, col_name):
    return df.groupby(grp_col_name)[col_name].count()

In [42]:
# Group mean
def grp_mean(df, grp_col_name, col_name_1, col_name_2):
    return df.groupby(grp_col_name)[[col_name_1, col_name_2]].mean().rename(columns={col_name_1: fmt_hdr(col_name_1), col_name_2: fmt_hdr(col_name_2)})

In [43]:
# Valid string series
def vld_str_ser(s):
    return s.notna() & ~s.eq('')

### Processing

In [44]:
df = pd.read_csv('nycflights.csv')

In [45]:
minute = try_prs_int(df[CN_MINUTE])
hour = try_prs_int(df[CN_HOUR])
day = try_prs_int(df[CN_DAY])
month = try_prs_int(df[CN_MONTH])
year = try_prs_int(df[CN_YEAR])
dep_time = try_prs_int(df[CN_DEP_TIME])
dep_delay = try_prs_int(df[CN_DEP_DELAY])
arr_time = try_prs_int(df[CN_ARR_TIME])
arr_delay = try_prs_int(df[CN_ARR_DELAY])
carrier = refactor(df[CN_CARRIER])
tailnum = refactor(df[CN_TAILNUM])
flight = try_prs_int(df[CN_FLIGHT])
origin = refactor(df[CN_ORIGIN])
dest = refactor(df[CN_DEST])
air_time = try_prs_int(df[CN_AIR_TIME])
distance = try_prs_int(df[CN_DISTANCE])

In [46]:
# Request 1
df1 = df.loc[(dep_delay.notna() & vld_str_ser(origin))]
deld1 = df1[df1[CN_DEP_DELAY] > 0]
on_time1 = df1[df1[CN_DEP_DELAY] <= 0]
fmt_hdr_origin = fmt_hdr(CN_ORIGIN)

deld_rb1 = widgets.RadioButtons(options=['Delayed', 'On-time'], description='Flight type:', value='Delayed')

op1 = widgets.Output()

def on_rdo_chg1(change):
    with op1:
        disp.clear_output(True)
        flt_type = change['new']
        if flt_type == 'Delayed':
            deld_flts = grp_cnt(deld1, CN_ORIGIN, CN_DEP_DELAY)
            deld_flts.index.name = fmt_hdr_origin
            deld_flts.plot.bar(rot=0)
            plt.ylabel('Số lượng')
            plt.title('Số chuyến bay trễ của các sân bay')
            plt.show()
        elif flt_type == 'On-time':
            on_time_flts = grp_cnt(on_time1, CN_ORIGIN, CN_DEP_DELAY)
            on_time_flts.index.name = fmt_hdr_origin
            on_time_flts.plot.bar(rot=0)
            plt.ylabel('Số lượng')
            plt.title('Số chuyến bay không trễ của các sân bay')
            plt.show()

deld_rb1.observe(on_rdo_chg1, names='value')

disp.display(deld_rb1)
disp.display(op1)

RadioButtons(description='Flight type:', options=('Delayed', 'On-time'), value='Delayed')

Output()

In [47]:
# Request 2
df2 = df.loc[(dep_delay.notna() & vld_str_ser(carrier))]
deld2 = df2[df2[CN_DEP_DELAY] > 0]
on_time2 = df2[df2[CN_DEP_DELAY] <= 0]
fmt_hdr_cxr = fmt_hdr(CN_CARRIER)

deld_rb2 = widgets.RadioButtons(options=['Delayed', 'On-time'], description='Flight type:', value='Delayed')

op2 = widgets.Output()

def on_rdo_chg2(change):
    with op2:
        disp.clear_output(True)
        flt_type = change['new']
        if flt_type == 'Delayed':
            deld_flts = grp_cnt(deld2, CN_CARRIER, CN_DEP_DELAY)
            deld_flts.index.name = fmt_hdr_cxr
            deld_flts.plot.bar(rot=0)
            plt.ylabel('Số lượng')
            plt.title('Số chuyến bay trễ của các hãng hàng không')
            plt.show()
        elif flt_type == 'On-time':
            on_time_flts = grp_cnt(on_time2, CN_CARRIER, CN_DEP_DELAY)
            on_time_flts.index.name = fmt_hdr_cxr
            on_time_flts.plot.bar(rot=0)
            plt.ylabel('Số lượng')
            plt.title('Số chuyến bay không trễ của các hãng hàng không')
            plt.show()

deld_rb2.observe(on_rdo_chg2, names='value')

disp.display(deld_rb2)
disp.display(op2)

RadioButtons(description='Flight type:', options=('Delayed', 'On-time'), value='Delayed')

Output()

In [48]:
# Request 3
df3 = df.loc[(dep_delay.notna() & arr_delay.notna() & vld_str_ser(origin) & vld_str_ser(carrier))]

In [49]:
# Request 3.1
mean_dlys_ori = grp_mean(df3, CN_ORIGIN, CN_DEP_DELAY, CN_ARR_DELAY)
mean_dlys_ori.index.name = fmt_hdr(CN_ORIGIN)

deld_rb31 = widgets.RadioButtons(options=['Dep Delay', 'Arr Delay'], description='Delay type:', value='Dep Delay')

op31 = widgets.Output()

def on_rdo_chg31(change):
    with op31:
        disp.clear_output(True)
        dly_type = change['new']
        if dly_type == 'Dep Delay':
            mean_dlys_ori['Dep Delay'].plot(rot=0)
            plt.ylabel('Delay (minutes)')
            plt.title('Thời gian trễ trung bình của các sân bay')
            plt.show()
        elif dly_type == 'Arr Delay':
            mean_dlys_ori['Arr Delay'].plot(rot=0)
            plt.ylabel('Delay (minutes)')
            plt.title('Thời gian trễ trung bình của các sân bay')
            plt.show()

deld_rb31.observe(on_rdo_chg31, names='value')

disp.display(deld_rb31)
disp.display(op31)

RadioButtons(description='Delay type:', options=('Dep Delay', 'Arr Delay'), value='Dep Delay')

Output()

In [50]:
# Request 3.2
mean_dlys_cxr = grp_mean(df3, CN_CARRIER, CN_DEP_DELAY, CN_ARR_DELAY)
mean_dlys_cxr.index.name = fmt_hdr(CN_CARRIER)

deld_rb32 = widgets.RadioButtons(options=['Dep Delay', 'Arr Delay'], description='Delay type:', value='Dep Delay')

op32 = widgets.Output()

def on_rdo_chg32(change):
    with op32:
        disp.clear_output(True)
        dly_type = change['new']
        if dly_type == 'Dep Delay':
            mean_dlys_cxr['Dep Delay'].plot(rot=0)
            plt.ylabel('Delay (minutes)')
            plt.title('Thời gian trễ trung bình của các hãng hàng không')
            plt.show()
        elif dly_type == 'Arr Delay':
            mean_dlys_cxr['Arr Delay'].plot(rot=0)
            plt.ylabel('Delay (minutes)')
            plt.title('Thời gian trễ trung bình của các hãng hàng không')
            plt.show()

deld_rb32.observe(on_rdo_chg32, names='value')

disp.display(deld_rb32)
disp.display(op32)

RadioButtons(description='Delay type:', options=('Dep Delay', 'Arr Delay'), value='Dep Delay')

Output()

In [51]:
# Request 4
df4 = df.loc[(distance.notna() & distance > 0 & vld_str_ser(carrier))]

tot_dist = df4.groupby(CN_CARRIER)[CN_DISTANCE].sum()
tot_dist.index.name = fmt_hdr(CN_CARRIER)

mean_dist = df4.groupby(CN_CARRIER)[CN_DISTANCE].mean()
mean_dist.index.name = fmt_hdr(CN_CARRIER)

med_dist = df4.groupby(CN_CARRIER)[CN_DISTANCE].median()
med_dist.index.name = fmt_hdr(CN_CARRIER)

deld_rb4 = widgets.RadioButtons(options=['Sum', 'Mean', 'Median'], description='Select:', value='Sum')

op4 = widgets.Output()

def on_rdo_chg4(change):
    with op4:
        disp.clear_output(True)
        type = change['new']
        if type == 'Sum':
            tot_dist.plot.bar(rot=0)
            plt.title('Sum quãng đường bay của các hãng hàng không')
            plt.ticklabel_format(style='plain', axis='y')
            plt.show()
        elif type == 'Mean':
            mean_dist.plot.bar(rot=0)
            plt.title('Mean quãng đường bay của các hãng hàng không')
            plt.show()
        elif type == 'Median':
            med_dist.plot.bar(rot=0)
            plt.title('Median quãng đường bay của các hãng hàng không')
            plt.show()

deld_rb4.observe(on_rdo_chg4, names='value')

disp.display(deld_rb4)
disp.display(op4)

RadioButtons(description='Select:', options=('Sum', 'Mean', 'Median'), value='Sum')

Output()