## 참고: https://hleecaster.com/ml-linear-regression-example/

## 라이브러리 설치, 호출

!pip3 install -U scikit-learn<br>
!pip3 install pandas<br>
!pip3 install numpy<br>
!pip3 install matplotlib<br>
!pip3 install statsmodels<br>

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau

Alaska Airlines AS / ASA  
American Airlines AA/AAL  
Air Canada AC/ACA  
Aeromexico AM / AMX  
Continental Airlines CO / COA  
Delta Airlines DL / DAL  
FedEx FX / FDX  
Hawaiian Airlines HA / HAL  
Northwest Airlines NW / NWA  
Polar Air Cargo PO / PAC  
Southwest Airlines SW / SWA  
United Airlines UA / UAL  
United Parcel (UPS) 5X / UPS  
Virgin Atlantic VS / VIR  
VivaAerobús VB / VIV  
WestJet WS / WJ  

ATL - Hartsfield-Jackson Atlanta International Airport - Georgia  
AUS - Austin-Bergstrom International Airport - Texas  
BNA - Nashville International Airport - Tennessee  
BOS - Boston Logan International Airport - Massachusetts  
BWI - Baltimore-Washington International Thurgood Marshall Airport - Washington  
CLT - Charlotte Douglas International Airport - North Carolina  
DAL - Dallas Love Field - Texas  
DCA - Ronald Reagan Washington National Airport - Arlington, Virginia  
DEN - Denver International Airport - Colorado  
DFW - Dallas/Fort Worth International Airport - Texas  
DTW - Detroit Metropolitan Airport - Michigan  
EWR - Newark Liberty International Airport - New Jersey  
FLL - Fort Lauderdale–Hollywood International Airport - Florida  
HNL - Daniel K. Inouye International Airport - Honolulu, Hawaii  
HOU - William P. Hobby Airport - Houston, Texas  
IAD - Dulles International Airport - Virginia  
IAH - George Bush Intercontinental Airport - Houston, Texas  
JFK - John F. Kennedy International Airport - Queens, New York  
LAS - McCarran International Airport - Las Vegas, Nevada  
LAX - Los Angeles International Airport - California  
LGA - LaGuardia Airport - Queens, New York  
MCO - Orlando International Airport - Florida  
MDW - Chicago Midway International Airport - Illinois  
MIA - Miami International Airport - Florida  
MSP - Minneapolis–Saint Paul International Airport - Minnesota  
MSY - Louis Armstrong New Orleans International Airport - Louisiana  
OAK - Oakland International Airport - California  
ORD - O'Hare International Airport - Chicago, Illinois  
PDX - Portland International Airport - Oregon  
PHL - Philadelphia International Airport - Pennsylvania  
PHX - Phoenix Sky Harbor International Airport - Arizona  
RDU - Raleigh-Durham International Airport - North Carolina  
SAN - San Diego International Airport - California  
SEA - Seattle–Tacoma International Airport - Washington  
SFO - San Francisco International Airport - California  
SJC - Norman Y. Mineta San Jose International Airport - California  
SLC - Salt Lake City International Airport - Utah  
SMF - Sacramento International Airport - California  
STL - St. Louis Lambert International Airport - Missouri  
TPA - Tampa International Airport - Florida  

## 데이터 다운로드 (airline)

In [2]:
import pandas as pd
df = pd.read_csv('Airlines.csv')
print(df.head())
print(df.shape)

   id Airline  Flight AirportFrom AirportTo  DayOfWeek  Time  Length  Delay
0   1      CO     269         SFO       IAH          3    15     205      1
1   2      US    1558         PHX       CLT          3    15     222      1
2   3      AA    2400         LAX       DFW          3    20     165      1
3   4      AA    2466         SFO       DFW          3    20     195      1
4   5      AS     108         ANC       SEA          3    30     202      0
(539383, 9)


In [3]:
df = df.iloc[:,1:]

In [5]:
df.tail()

Unnamed: 0,Airline,Flight,AirportFrom,AirportTo,DayOfWeek,Time,Length,Delay
539378,CO,178,OGG,SNA,5,1439,326,0
539379,FL,398,SEA,ATL,5,1439,305,0
539380,FL,609,SFO,MKE,5,1439,255,0
539381,UA,78,HNL,SFO,5,1439,313,1
539382,US,1442,LAX,PHL,5,1439,301,1


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 539383 entries, 0 to 539382
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   Airline      539383 non-null  object
 1   Flight       539383 non-null  int64 
 2   AirportFrom  539383 non-null  object
 3   AirportTo    539383 non-null  object
 4   DayOfWeek    539383 non-null  int64 
 5   Time         539383 non-null  int64 
 6   Length       539383 non-null  int64 
 7   Delay        539383 non-null  int64 
dtypes: int64(5), object(3)
memory usage: 32.9+ MB


In [50]:
# df[['Flight','DayOfWeek','Time','Length','Delay']] = df[['Flight','DayOfWeek','Time','Length','Delay']].astype(np.float32)

In [20]:
airline_mapping = {'ASA':1,'AAL':2,'ACA':3, 'AMX':4, 
                   'COA':5 ,'DAL':6, 'FDX':7, 'HAL':8, 
                   'NWA':9, 'PAC':10, 'SWA':11, 'UAL':12, 
                   'UPS':13, 'VIR':14, 'VIV':15, 'WJ':16}



airport_mapping = {'ATL' : 1,'AUS' : 2,'BNA' : 3,'BOS' : 4,'BWI' : 5,'CLT' : 6,
                   'DAL' : 7,'DCA' : 8,'DEN' : 9,'DFW' : 10,'DTW' : 11,'EWR' : 12,
                   'FLL' : 13,'HNL' : 14,'HOU' : 15,'IAD' : 16,'IAH' : 17,'JFK' : 18,
                   'LAS' : 19,'LAX' : 20,'LGA' : 21,'MCO' : 22,'MDW' : 23,'MIA' : 24,
                   'MSP' : 25, 'MSY' : 26,'OAK' : 27,'ORD' : 28,'PDX' : 29,'PHL' : 30,
                   'PHX' : 31,'RDU' : 32,'SAN' : 33,'SEA' : 34,'SFO' : 35,'SJC' : 36,
                   'SLC' : 37,'SMF' : 38,'STL' : 39,'TPA' : 40}

Unnamed: 0,Airline,Flight,AirportFrom,AirportTo,DayOfWeek,Time,Length,Delay
539378,CO,178,OGG,SNA,5,1439,326,0
539379,FL,398,SEA,ATL,5,1439,305,0
539380,FL,609,SFO,MKE,5,1439,255,0
539381,UA,78,HNL,SFO,5,1439,313,1
539382,US,1442,LAX,PHL,5,1439,301,1


## Input, Feature , train, test 설정

In [7]:
Label = df['Delay']
InputFeature = df[['Flight', 'DayOfWeek', 'Time', 'Length']]

In [8]:
from sklearn import model_selection

In [9]:
train_data, test_data, train_label, test_label = model_selection.train_test_split(InputFeature, Label,
                                                                                 test_size=0.3,
                                                                                 random_state=0)
print(train_data.shape)
print(test_data.shape)
print(train_label.shape)
print(test_label.shape)

(377568, 4)
(161815, 4)
(377568,)
(161815,)


## Keras Logit 모델 fitting

In [10]:
model = Sequential()
model.add(Dense(10, activation='relu', input_shape=(4,)))
model.add(Dense(5, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer=Adam(learning_rate=0.01), metrics=['accuracy'])

## Keras 모델 살펴보기

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10)                50        
                                                                 
 dense_1 (Dense)             (None, 5)                 55        
                                                                 
 dense_2 (Dense)             (None, 1)                 6         
                                                                 
Total params: 111
Trainable params: 111
Non-trainable params: 0
_________________________________________________________________


## Call-back 함수
## 모델 학습

In [55]:
# Call-back 함수
# CheckPoint: Epoch 마다 validation 성능을 검증하여, best performance 일 경우 저장
CP = ModelCheckpoint(filepath='-{epoch:03d}-{loss:.4f}-{accuracy:.4f}.hdf5',
            monitor='loss', verbose=1, save_best_only=True, mode='min')

# Learning Rate 줄여나가기
LR = ReduceLROnPlateau(monitor='loss',factor=0.8,patience=3, verbose=1, min_lr=1e-8)

CALLBACK = [CP, LR]

In [12]:
model.fit(x=train_data, y=train_label, epochs=50, shuffle=True, batch_size=512) #callbacks=CALLBACK)

Epoch 1/50
  1/738 [..............................] - ETA: 1:43 - loss: 100.6807 - accuracy: 0.5488

2022-07-08 16:33:59.834469: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x285cb2a60>

In [13]:
model.evaluate(test_data,test_label)



[0.6869062185287476, 0.5558322668075562]

In [19]:
model.predict(test_data)

array([[0.44286036],
       [0.44286036],
       [0.44286036],
       ...,
       [0.44286036],
       [0.44286036],
       [0.44286036]], dtype=float32)

# Model Load 하기 전, hdf5 파일 이름 꼭 확인하기

In [8]:
model.load_weights("-098-0.5495-0.7619.hdf5")

## FLASK 셋팅하기

In [9]:
from flask import Flask
from flask import render_template
from flask import request

In [10]:
app = Flask(__name__)

# FLASK API 구현부분

In [11]:
@app.route('/')
@app.route('/SpecialSale')
def PredictionSpecialSale():
    busy_day = request.args.get('busy_day')
    high_temperature = request.args.get('high_temperature')
    
    print(busy_day, high_temperature)
    
    if busy_day == None or high_temperature == None:
        return render_template('SpecialSale.html', Output = '')
    
    Input = pd.DataFrame({
        'busy_day':[int(busy_day)],
        'high_temperature':[float(high_temperature)]
    })
    ModelOutput = model.predict(Input)[0][0]

    return render_template('SpecialSale.html', Output = ModelOutput)

# Flask, port 5000으로 실행

In [None]:
app.run(host='0.0.0.0', port=5000)

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.2.16:5000 (Press CTRL+C to quit)
127.0.0.1 - - [07/Jul/2022 09:59:57] "GET / HTTP/1.1" 200 -


None None
1 38


127.0.0.1 - - [07/Jul/2022 10:00:01] "GET /SpecialSale?busy_day=1&high_temperature=38 HTTP/1.1" 200 -
127.0.0.1 - - [07/Jul/2022 10:10:42] "GET / HTTP/1.1" 200 -


None None
0 18


127.0.0.1 - - [07/Jul/2022 10:11:00] "GET /SpecialSale?busy_day=0&high_temperature=18 HTTP/1.1" 200 -


1 50


127.0.0.1 - - [07/Jul/2022 10:11:14] "GET /SpecialSale?busy_day=1&high_temperature=50 HTTP/1.1" 200 -


In [None]:
# http://127.0.0.1:5000/SpecialSale/1/38