# Import Library

In [None]:
from google.cloud import bigquery
import os
from google.cloud import bigquery
from google.oauth2 import service_account
import datetime

import requests
import json
import pandas as pd

# Retrieve Data from BigQuery

In [None]:
credentials = service_account.Credentials.from_service_account_file( 
r'D:\JOB\APSD\Big Data Platform\Data\big-data-xxx-xxx.json')

project_id = 'big-data-xxx'
client = bigquery.Client(credentials= credentials,project=project_id)

In [None]:
sql = """
   SELECT * FROM `big-data-xxxx.test_dataset01.ex-daily_data` LIMIT 100"""
cloud_data = client.query(sql).to_dataframe()
cloud_data.head()

In [None]:
sql = """
   SELECT *
   FROM `big-data-xxxx.test_dataset01.test_flight_data_2023`
   ORDER BY index"""
cloud_data = client.query(sql).to_dataframe()
cloud_data.head()

In [None]:
cloud_data = cloud_data.set_index('index')
cloud_data

# Collect Data Flight from AP2 API

In [None]:
def collect_data(start, end):
    ## Create Vessel List
    ACTUAL_TIME = []
    AIRCRAFT_REG_NO = []
    CAPACITY = []
    CATEGORY_CODE = []
    CHECK_IN_COUNTER = []
    CONVEYOR_BELT = []
    DATE = []
    DESCRIPTION = []
    ESTIMATED_TIME = []
    FLIGHT_NO = []
    FROM_STATION = []
    GATE_NUMBER = []
    LEG = []
    REMARK = []
    SCHEDULED_TIME = []
    TERMINAL = []
    TO_STATION = []

    ## Genereate Date
    date1 = start
    date2 = end
    mydates = pd.date_range(date1, date2).to_list()
    date = pd.to_datetime(pd.Series(mydates), format='%Y-%m-%d').dt.date

    ## Login
    url_login = 'https://xxx.angkasapura2.co.id/xxxx'
    myobj_login = {"username": "username",
        "password": "password"}

    login = requests.post(url_login, json = myobj_login)
    login_response = json.loads(login.text)

    ## Get Token
    token = login_response['token']

    ## Looping Get Data
    for i in date:
        url_flight = 'https://xxxx.angkasapura2.co.id/xxxx'
        myobj_flight = {"date": str(i), "branch": "XXX"}

        flight = requests.get(url_flight, headers={'x-access-token': token}, json = myobj_flight)
        flight_response = json.loads(flight.text)
           
        data = flight_response
        
        ## Store each data to it's own vessel
        for i in data['data']:
            ACTUAL_TIME.append(i['ACTUAL_TIME'])
            AIRCRAFT_REG_NO.append(i['AIRCRAFT_REG_NO'])
            CAPACITY.append(i['CAPACITY'])
            CATEGORY_CODE.append(i['CATEGORY_CODE'])
            CHECK_IN_COUNTER.append(i['CHECK_IN_COUNTER'])
            CONVEYOR_BELT.append(i['CONVEYOR_BELT'])
            DATE.append(i['DATE'])
            DESCRIPTION.append(i['DESCRIPTION'])
            ESTIMATED_TIME.append(i['ESTIMATED_TIME'])
            FLIGHT_NO.append(i['FLIGHT_NO'])
            FROM_STATION.append(i['FROM_STATION'])
            GATE_NUMBER.append(i['GATE_NUMBER'])
            LEG.append(i['LEG'])
            REMARK.append(i['REMARK'])
            SCHEDULED_TIME.append(i['SCHEDULED_TIME'])
            TERMINAL.append(i['TERMINAL'])
            TO_STATION.append(i['TO_STATION'])

        ## Convert list data to DataFrame
        Dataset = pd.DataFrame()
        Dataset['ACTUAL_TIME'] = pd.to_datetime(ACTUAL_TIME, errors = 'coerce')
        Dataset['AIRCRAFT_REG_NO'] = AIRCRAFT_REG_NO
        Dataset['CAPACITY'] = CAPACITY
        Dataset['CATEGORY_CODE'] = CATEGORY_CODE
        Dataset['CHECK_IN_COUNTER'] = CHECK_IN_COUNTER
        Dataset['CONVEYOR_BELT'] = CONVEYOR_BELT
        Dataset['DATE'] = pd.to_datetime(DATE, errors = 'coerce').strftime('%Y-%m-%d')
        Dataset['DESCRIPTION'] = DESCRIPTION
        Dataset['ESTIMATED_TIME'] = pd.to_datetime(ESTIMATED_TIME, errors = 'coerce')
        Dataset['FLIGHT_NO'] = FLIGHT_NO
        Dataset['FROM_STATION'] = FROM_STATION
        Dataset['GATE_NUMBER'] = GATE_NUMBER
        Dataset['LEG'] = LEG
        Dataset['REMARK'] = REMARK
        Dataset['SCHEDULED_TIME'] = pd.to_datetime(SCHEDULED_TIME, errors = 'coerce')
        Dataset['TERMINAL'] = TERMINAL
        Dataset['TO_STATION'] = TO_STATION
        

    return Dataset

In [None]:
# get last date of data in BigQuery
last_date = cloud_data['DATE'].values[-1] + datetime.timedelta(days=1)
last_date = last_date.strftime('%Y-%m-%d')

# get today's datetime
today = datetime.datetime.today() - datetime.timedelta(days=1)
today.strftime('%Y-%m-%d')

# Collect data from last date of data in BigQuery to today's date
new_flight_2023_incomplete = collect_data(last_date, today)

In [None]:
# new_flight_2023_incomplete
print(new_flight_2023_incomplete.to_string())

# Concat Data from BigQuey and Newest Data

In [None]:
frames = [cloud_data, new_flight_2023_incomplete]

new_flight_2023 = pd.concat(frames)
new_flight_2023 = new_flight_2023.reset_index(drop=True)
new_flight_2023

In [None]:
print(new_flight_2023.to_string())

In [None]:
new_flight_2023.to_csv('flight_data_2023-incomplete.csv')

# Upload

In [None]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=r"D:\JOB\APSD\Big Data Platform\Data\big-data-xxxxxx-xxxxxxxxx.json"

# Construct a BigQuery client object.
client = bigquery.Client()

table_id = "big-data-xxxxxx.test_dataset01.test_flight_data_2023"
file_path = r"D:\JOB\APSD\Big Data Platform\Data\flight_data_2023-incomplete.csv"

job_config = bigquery.LoadJobConfig(
    source_format=bigquery.SourceFormat.CSV, skip_leading_rows=1, autodetect=True,
        write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE  #added to have truncate and insert load
)

with open(file_path, "rb") as source_file:
    job = client.load_table_from_file(source_file, table_id, job_config=job_config)
    
job.result()  # Waits for the job to complete.

table = client.get_table(table_id)  # Make an API request.
print(
    "Loaded {} rows and {} columns to {}".format(
        table.num_rows, len(table.schema), table_id
    )
)