In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
import matplotlib.font_manager as fm
fm.get_fontconfig_fonts()
# font_location = '/usr/share/fonts/truetype/nanum/NanumGothicOTF.ttf'
font_location = 'C:\Windows\Fonts\HANBatangB.ttf' # For Windows
font_name = fm.FontProperties(fname=font_location).get_name()
matplotlib.rc('font', family=font_name)

In [2]:
def display_heatmap(df):
    plt.figure(figsize=(10, 7))
    sns.heatmap(df.corr(),annot=True ,cmap='cubehelix_r')
    plt.show()

In [3]:
def mask_with_in1d(df, column, val):
    mask = np.in1d(df[column].values, [val])
    return df[mask]

In [4]:
def one_hot(df, cols):
    new_df = pd.get_dummies(df, columns=cols)
    return new_df

In [5]:
def fix_flight(df, i):
    if i == 0:
        df.drop(["Unnamed: 0", "편명", "목적지",  "예상", "구분"], axis=1, inplace=True)
    else:
        df.drop(["Unnamed: 0", "편명", "출발지", "예상", "구분"], axis=1, inplace=True)

In [6]:
def fix_time(df, file_name):
    df['time'] = df['계획'].str.split(':').str[0]
    df['TM'] = ""
    for i in range(len(df)):
        if len(df['time'][i]) == 1:
            df['TM'][i] = str(df['날짜'][i]) + "0" + df['time'][i]
            print(df['TM'][i])
        else:
            df['TM'][i] = str(df['날짜'][i]) + df['time'][i]
    df.to_csv("dataset/FlightCSV/"+file_name, encoding='utf-8-sig')

In [7]:
def fix_weather(df):
    df['Date'] = df.TM.apply(lambda x: str(x)[:8])
    df['Time'] = df.TM.apply(lambda x: str(x)[8:])

In [8]:
def month_slice(df, day, last):
    new_df = pd.DataFrame()
    for i in range(last):
        temp = mask_with_in1d(df, "날짜", day)
        new_df = pd.concat([new_df, temp], ignore_index=True)
        day += 1
    return new_df

In [9]:
#지연시간 계산
def delay_time(df,e):
    if e==0:
        ename = df.출발
    else:
        ename = df.도착
        
    df['sSplit'] = df.계획.apply(lambda x: str(x).split(':'))
    df['stemp'] = df.sSplit.apply(lambda x: int(x[0])*60+int(x[1]))
        
    df['eSplit'] = ename.apply(lambda x: str(x).split(':'))
    df['etemp'] = df.eSplit.apply(lambda x: int(x[0])*60 + int(x[1]) if (x[0]!='' and x[1]!='') else None)
        
    delaytime = list()  
    for i in range(len(df)):
        if df.etemp[i] != None:
            delaytime.append(df.etemp[i]-df.stemp[i])
        else:
            delaytime.append(None)
    
    df['delayTime'] = delaytime
    df.drop(['sSplit','stemp','eSplit','etemp'], axis=1, inplace=True)

In [10]:
#20181001 - 20191001해당 요일 딕셔너리 생성
Date = 20181000
i = 0
count = 0
end_date = [31,30,31,31,28,31,30,31,30,31,31,30]
weekend = {}

while(Date<20190931): 
    day = Date%100
    month = ((Date%10000)-day)/100
    year = (Date-(Date%10000))/10000
    day = day + 1
    if day > end_date[i]:
        day = 1
        month = month + 1
        i = i+1
        if month == 13:
            year = year + 1
            month = 1
            day = 1
    Date = int((year*10000)+(month*100)+day)
    
    count = count + 1
    if count%7 == 1:
        weekend[Date] = "MON"
    elif count%7 == 2:
         weekend[Date] = "TUE"
    elif count%7 == 3:
         weekend[Date] = "WED"
    elif count%7 == 4:
         weekend[Date] = "THU"
    elif count%7 == 5:
         weekend[Date] = "FRI"
    elif count%7 == 6:
         weekend[Date] = "SAT"
    elif count%7 == 0:
         weekend[Date] = "SUN"

In [11]:
def set_weekend(df):
    df['weekend'] = df.날짜.apply(lambda x: weekend[x])

In [12]:
delay_bins = [-550,-10,0,10,15,20,30,40,50,60,90,120,150,180,210,240,270,300,330,360,390,400]
delay_labels = [-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]
def delayBinning(df):
    df['DelayRate'] = pd.cut(df['delayTime'], bins=delay_bins, labels=delay_labels)

In [13]:
def preprocessing(df):
    df.drop(['계획','delayTime'], axis=1, inplace=True)
    df.fillna(19, inplace=True)
    df = pd.get_dummies(df, columns=['항공사', '현황', 'weekend'])