In [143]:
import json
from datetime import datetime

from scheduler.weather_data_fetcher import _request
import pandas as pd
from joblib import load

from redis_manager.redis import redis

_DATE_FORMAT = "%Y%m%d"
_DATE_FORMAT_MINUTE = "%Y%m%d%H%M"

flight_code = "KC910"

redis.set_connection('localhost', 6379)


### 필요한 input 데이터 (순서 중요)

- airline: 영문 2자리 
- cause: ''이면 '기타'로 치환
- destination: 영문 3자리
- flight_code:
- temperature:
- term: 시기, normalize해서 0와 10 사이의 실수로 표현
- wind_speed_10m_avg_kt: m/s로 변환하기 위해 0.1944를 곱하기
```


In [144]:
model = load(filename="lasso_cv_model.joblib")
encoder = load(filename="one_hot_encoder.pkl")


In [145]:
redis.select(redis.FLIGHTS_API)
flights_data = json.loads(redis.get(datetime.strftime(datetime.today(), _DATE_FORMAT) + 'D'))
flight_data = flights_data[flight_code]

redis.select(redis.WEATHERS_API)
# last_received_time = get_last_received_time()
# last_received_time = "202404031045"
# weather_data = json.loads(redis.get(last_received_time))

_, weather_data_array = _request(datetime(2024, 4, 3, 10, 55))
weather_data = weather_data_array[-1]
flight_data, weather_data

[36m2024-04-03 13:53:17 - DEBUG - Weather Data Delta Time: 3 minutes[0m


({'departure_date': '20240403',
  'airline': '에어 아스타나',
  'flight_code': 'KC910',
  'destination': 'ALA(알마티)',
  'departure_time_plan': '10:55',
  'departure_time_expected': '10:55',
  'departure_time_real': '11:24',
  'division': '여객',
  'flight_status': '출발',
  'cause': ''},
 {'S': 113,
  'TM': 202404031055,
  'L_VIS': 10000,
  'R_VIS': -99999,
  'L_RVR': 2000,
  'R_RVR': -99999,
  'CH_MIN': 3900,
  'TA': 161,
  'TD': 41,
  'HM': 45,
  'PS': 10074,
  'PA': 10065,
  'RN': 0,
  '예비1': -99999,
  '예비2': -99999,
  'WD02': 90,
  'WD02_MAX': 100,
  'WD02_MIN': 80,
  'WS02': 31,
  'WS02_MAX': 37,
  'WS02_MIN': 24,
  'WD10': 70,
  'WD10_MAX': 90,
  'WD10_MIN': 30,
  'WS10': 30,
  'WS10_MAX': 44,
  'WS10_MIN': 21})

airline
flight_code
destination
cause
delay_minute
temperature
wind_speed_10m_avg_kt
term

- airline: 영문 2자리 
- flight_code:
- destination: 영문 3자리
- cause: ''이면 '기타'로 치환
- temperature:
- wind_speed_10m_avg_kt: m/s로 변환하기 위해 0.1944를 곱하기
- term: 시기, normalize해서 0와 10 사이의 실수로 표현

In [146]:
def normalize_date(now):
    year = now.year
    month = now.month
    day = now.day
    is_leap_year = now.is_leap_year

    month_days = [0, 31, 29 if is_leap_year else 28, 31, 30, 31, 30, 31, 31,
                  30,
                  31, 30, 31]

    cumulative_days = sum(month_days[:month]) + day

    max_days = 366 if is_leap_year else 365

    normalized = (cumulative_days - 1) / (
            max_days - 1)  # 1월 1일은 0으로, 12월 31일은 1로 정규화
    return normalized * 10

In [147]:
COLUMNS = ['airline',
           'flight_code',
           'destination',
           'cause',
           'delay_minute',
           'temperature',
           'wind_speed_10m_avg_kt',
           'term']

df = pd.DataFrame(columns=COLUMNS)

data = {
    'airline': flight_data['flight_code'][:2],
    'flight_code': flight_data['flight_code'],
    'destination': flight_data['destination'][:3],
    'cause': '기타' if flight_data['cause'] == '' else flight_data['cause'],
    'delay_minute': None,
    'temperature': weather_data['TA'] / 10,
    'wind_speed_10m_avg_kt': weather_data['WS10'] * 0.194384,
    'term': normalize_date(
            pd.to_datetime(flight_data['departure_date'] + flight_data['departure_time_plan'],
                           format="%Y%m%d%H:%M"))
}

df.loc[0] = data

df.head()

Unnamed: 0,airline,flight_code,destination,cause,delay_minute,temperature,wind_speed_10m_avg_kt,term
0,KC,KC910,ALA,기타,,16.1,5.83152,2.547945


In [148]:
categorical_features = [
    'airline',
    'flight_code',
    'destination',
    'cause']
numeric_features = [col for col in df.columns if col not in categorical_features]

encoded_data = encoder.transform(df)
encoded_df = pd.DataFrame(encoded_data.toarray())

new_column_names = encoder.named_transformers_['encoder'].get_feature_names_out(
        input_features=categorical_features)
all_column_names = list(new_column_names) + list(numeric_features)
encoded_df.columns = all_column_names

encoded_df.drop('delay_minute', axis=1, inplace=True)
# encoded_df.head()

In [149]:
y_pred = model.predict(encoded_df)

y_pred

array([76.28631931])