In [1]:
import requests
import pandas as pd
import time
from tqdm import tqdm
from datetime import datetime, timedelta
import sys
from IPython.display import clear_output

In [12]:
import os
from dotenv import load_dotenv

load_dotenv()

API_CREDENTIALS = [
    {
        'API_KEY': os.getenv('API_KEY_1'),
        'API_SECRET': os.getenv('API_SECRET_1')
    },
    {
        'API_KEY': os.getenv('API_KEY_2'),
        'API_SECRET': os.getenv('API_SECRET_2')
    },
    {
        'API_KEY': os.getenv('API_KEY_3'),
        'API_SECRET': os.getenv('API_SECRET_3')
    },
]

current_api_index = 0


In [13]:
def get_access_token():
    global current_api_index
    credentials = API_CREDENTIALS[current_api_index]
    url = 'https://test.api.amadeus.com/v1/security/oauth2/token'
    data = {
        'grant_type': 'client_credentials',
        'client_id': credentials['API_KEY'],
        'client_secret': credentials['API_SECRET']
    }
    response = requests.post(url, data=data)
    response.raise_for_status()
    return response.json()['access_token']

def switch_to_next_api():
    global current_api_index
    current_api_index = (current_api_index + 1) % len(API_CREDENTIALS)
    print(f"Chuyển sang cặp API credentials tiếp theo (index: {current_api_index})")

In [14]:
def get_flight_offers(origin, destination, departure_date, access_token):
    url = 'https://test.api.amadeus.com/v2/shopping/flight-offers'
    headers = {'Authorization': f'Bearer {access_token}'}
    params = {
        'originLocationCode': origin,
        'destinationLocationCode': destination,
        'departureDate': departure_date,
        'adults': '1',
        'nonStop': 'false',
        'currencyCode': 'VND',
        'max': '250'
    }
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
    return response.json()


In [15]:
def process_data(data):
    flight_offers = data.get('data', [])
    records = {}
    for offer in flight_offers:
        price = offer.get('price', {}).get('total')
        itineraries = offer.get('itineraries', [])
        for itinerary in itineraries:
            segments = itinerary.get('segments', [])
            for segment in segments:
                departure = segment.get('departure', {})
                arrival = segment.get('arrival', {})
                carrierCode = segment.get('carrierCode')
                flightNumber = segment.get('number')
                duration = segment.get('duration')
                unique_id = (
                    departure.get('iataCode'),
                    departure.get('at'),
                    arrival.get('iataCode'),
                    arrival.get('at'),
                    carrierCode,
                    flightNumber
                )
                # Chỉ thêm chuyến bay nếu chưa tồn tại
                if unique_id not in records:
                    records[unique_id] = {
                        'Departure Airport': departure.get('iataCode'),
                        'Departure Time': departure.get('at'),
                        'Arrival Airport': arrival.get('iataCode'),
                        'Arrival Time': arrival.get('at'),
                        'Carrier Code': carrierCode,
                        'Flight Number': flightNumber,
                        'Price (VND)': price,
                        'Duration': duration,
                    }
    return pd.DataFrame.from_dict(records, orient='index')


In [18]:
# Lấy Access Token
access_token = get_access_token()

# Danh sách các sân bay
airports = ['SGN', 'HAN', 'DAD', 'HUI', 'CXR', 'VCA', 'HPH', 'VII', 'BMV', 'DLI']

# Tạo danh sách các ngày trong tháng 11
start_date = datetime(2024, 11, 1)
end_date = datetime(2024, 11, 30)
date_list = [start_date + timedelta(days=x) for x in range((end_date - start_date).days + 1)]

df = pd.DataFrame()

total_pairs = len(airports) * (len(airports) - 1) * len(date_list)

# Lấy dữ liệu từ các sân bay với thanh tiến trình chi tiết
with tqdm(total=total_pairs, desc='Đang lấy dữ liệu') as pbar:
    for departure_date in date_list:
        for origin in airports:
            for destination in airports:
                if origin != destination:
                    # Xóa dòng trước đó và in ra dòng mới
                    sys.stdout.write(f"\rLấy chuyến bay từ {origin} đến {destination} ngày {departure_date.strftime('%Y-%m-%d')}")
                    sys.stdout.flush()
                    
                    try:
                        data = get_flight_offers(origin, destination, departure_date.strftime('%Y-%m-%d'), access_token)
                        flights_df = process_data(data)
                        df = pd.concat([df, flights_df], ignore_index=True)
                        time.sleep(1)  # Nghỉ 1 giây giữa các yêu cầu
                    except requests.exceptions.HTTPError as http_err:
                        if http_err.response.status_code == 401:  # Unauthorized error
                            print(f"API credentials hết hạn. Đang chuyển sang cặp tiếp theo.")
                            switch_to_next_api()
                            access_token = get_access_token()
                            continue  # Thử lại với credentials mới
                        else:
                            sys.stdout.write(f"\rLỗi HTTP từ {origin} đến {destination} ngày {departure_date.strftime('%Y-%m-%d')}: {http_err}\n")
                    except Exception as e:
                        sys.stdout.write(f"\rLỗi từ {origin} đến {destination} ngày {departure_date.strftime('%Y-%m-%d')}: {e}\n")

sys.stdout.write("\nHoàn thành!\n")

Đang lấy dữ liệu:   0%|          | 0/2700 [00:15<?, ?it/s]


KeyboardInterrupt: 

In [7]:
# Loại bỏ các bản ghi trùng lặp
df.drop_duplicates(subset=[
    'Departure Airport', 'Departure Time', 'Arrival Airport', 'Arrival Time',
    'Carrier Code', 'Flight Number'], inplace=True)

# Chuyển đổi thời gian sang định dạng datetime
df['Departure Time'] = pd.to_datetime(df['Departure Time'])
df['Arrival Time'] = pd.to_datetime(df['Arrival Time'])

# Reset lại index của DataFrame
df.reset_index(drop=True, inplace=True)

print(df.shape)
df.head()


(5212, 8)


Unnamed: 0,Departure Airport,Departure Time,Arrival Airport,Arrival Time,Carrier Code,Flight Number,Price (VND),Duration
0,SGN,2024-11-01 05:00:00,HAN,2024-11-01 07:10:00,VJ,194,1546000.0,PT2H10M
1,SGN,2024-11-01 05:20:00,HAN,2024-11-01 07:30:00,VJ,198,1546000.0,PT2H10M
2,SGN,2024-11-01 06:00:00,HAN,2024-11-01 08:10:00,VJ,120,1546000.0,PT2H10M
3,SGN,2024-11-01 06:30:00,HAN,2024-11-01 08:40:00,VJ,122,1546000.0,PT2H10M
4,SGN,2024-11-01 07:30:00,HAN,2024-11-01 09:40:00,VJ,126,1546000.0,PT2H10M


In [9]:
df.to_csv(f'data/{departure_date}_full_data.csv', index=False)