In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

# 假設檔案位於 'My Drive' 中的 'data' 資料夾

df1 = pd.read_csv('/content/drive/My Drive/all-cancelled-flights.csv')
df2 = pd.read_csv('/content/drive/My Drive/cancellation-codes.csv')


In [19]:
import os
merged_data = pd.merge(df1, df2, left_on='CANCELLATION_CODE', right_on='Code', how='left')
print(merged_data.head())
# 指定保存的路徑 (Google Drive 中的 Task2 資料夾)
output_path = '/content/drive/MyDrive/Task2/merged_cancelled_flights.csv'

# 在保存檔案前，先檢查目錄是否存在，如果不存在則建立
os.makedirs(os.path.dirname(output_path), exist_ok=True)

# 保存合併結果到 Task2 資料夾
merged_data.to_csv(output_path, index=False)

print(f"合併後的檔案已成功保存到: {output_path}")

  FLIGHT_DATE       DAY OP_CARRIER               CARRIER_NAME  \
0  2020-10-01  Thursday         BA   AnyCompany Blue Airlines   
1  2020-10-01  Thursday         BA   AnyCompany Blue Airlines   
2  2020-10-01  Thursday         GA  AnyCompany Green Airlines   
3  2020-10-01  Thursday         GA  AnyCompany Green Airlines   
4  2020-10-01  Thursday         GA  AnyCompany Green Airlines   

   OP_CARRIER_FL_NUM ORIGIN DISPLAY_AIRPORT_NAME_ORIGIN DEST  \
0               5029    ATL           Atlanta Municipal  MLB   
1               5069    MLB          Melbourne Regional  ATL   
2                134    DFW  Dallas Fort Worth Regional  OGG   
3               1960    OGG             Kahului Airport  PHX   
4               2242    LAX   Los Angeles International  KOA   

          DISPLAY_AIRPORT_NAME_DEST CANCELLATION_CODE  NUMBER_OF_FLIGHTS  \
0                Melbourne Regional                 A                  1   
1                 Atlanta Municipal                 A                  1

In [None]:
# 現在我們將計算 CANCELLATION_CODE 列中各值 (A, B, C, D) 的比例
cancellation_counts = df['CANCELLATION_CODE'].value_counts(normalize=True) * 100

# 以百分比的形式顯示結果
cancellation_counts


Unnamed: 0_level_0,proportion
CANCELLATION_CODE,Unnamed: 1_level_1
B,52.713887
A,37.304273
C,8.708309
D,1.273532


In [None]:
# 首先，識別取消次數最多的航班班機號 (OP_CARRIER_FL_NUM)
flight_cancellation_counts = df['OP_CARRIER_FL_NUM'].value_counts()

# 獲取取消次數最多的航班班機號
most_cancelled_flight = flight_cancellation_counts.idxmax()
most_cancelled_count = flight_cancellation_counts.max()

# 計算這個航班的取消次數佔總取消次數的百分比
total_cancellations = flight_cancellation_counts.sum()
most_cancelled_percentage = (most_cancelled_count / total_cancellations) * 100

most_cancelled_flight, most_cancelled_count, most_cancelled_percentage


(76, 153, 0.0602692024375544)

In [None]:
# 首先，過濾出從 OGG 出發的航班數據
ogg_flights = df[df['ORIGIN'] == 'OGG'].copy()  # 使用 .copy() 以避免 SettingWithCopyWarning

# 從航班日期中提取年份，進行每年分析
ogg_flights.loc[:, 'YEAR'] = pd.to_datetime(ogg_flights['FLIGHT_DATE']).dt.year  # 使用 .loc 進行賦值

# 按年份和取消原因分組，並計算每年的取消次數
yearly_cancellation_distribution = ogg_flights.groupby(['YEAR', 'CANCELLATION_CODE']).size().unstack(fill_value=0)

# 計算每年各取消原因的比例
yearly_cancellation_distribution_percentage = yearly_cancellation_distribution.div(yearly_cancellation_distribution.sum(axis=1), axis=0) * 100

# 顯示結果
yearly_cancellation_distribution_percentage



CANCELLATION_CODE,A,B,C,D
YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020,10.994764,1.04712,0.0,87.958115
2021,91.071429,6.696429,0.446429,1.785714
2022,97.340426,2.12766,0.531915,0.0


In [18]:
# 篩選出ORIGIN為IAD的航班
iad_flights = df1[df1['ORIGIN'] == 'IAD']

# 找出從IAD起飛次數最多的航班名(OP_CARRIER)
most_frequent_carrier = iad_flights['OP_CARRIER'].value_counts().idxmax()
most_frequent_carrier_count = iad_flights['OP_CARRIER'].value_counts().max()

# 篩選出該航班名的所有航班
carrier_flights = iad_flights[iad_flights['OP_CARRIER'] == most_frequent_carrier]

# 找出該航班名中被取消的航班，並計算取消原因的出現次數
cancelled_carrier_flights = carrier_flights[carrier_flights['CANCELLATION_CODE'].notna()]
most_frequent_cancellation = cancelled_carrier_flights['CANCELLATION_CODE'].value_counts().idxmax()
most_frequent_cancellation_count = cancelled_carrier_flights['CANCELLATION_CODE'].value_counts().max()

# 顯示結果
print(f"從IAD起飛次數最多的航班名是: {most_frequent_carrier}，共 {most_frequent_carrier_count} 次")
if not cancelled_carrier_flights.empty:
    print(f"該航班最常遇到的取消原因是: {most_frequent_cancellation}，出現了 {most_frequent_cancellation_count} 次")
else:
    print(f"該航班從未被取消")

從IAD起飛次數最多的航班名是: AA，共 654 次
該航班最常遇到的取消原因是: B，出現了 289 次
