In [40]:
import requests
import pandas as pd
import time
from tqdm import tqdm
from datetime import datetime, timedelta
import sys
from IPython.display import clear_output

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

API_KEY = os.getenv('API_KEY')
API_SECRET = os.getenv('API_SECRET')


In [None]:
def get_access_token():
    url = 'https://test.api.amadeus.com/v1/security/oauth2/token'
    data = {
        'grant_type': 'client_credentials',
        'client_id': API_KEY,
        'client_secret': API_SECRET
    }
    response = requests.post(url, data=data)
    response.raise_for_status()
    return response.json()['access_token']


In [None]:
def get_flight_offers(origin, destination, departure_date, access_token):
    url = 'https://test.api.amadeus.com/v2/shopping/flight-offers'
    headers = {'Authorization': f'Bearer {access_token}'}
    params = {
        'originLocationCode': origin,
        'destinationLocationCode': destination,
        'departureDate': departure_date,
        'adults': '1',
        'nonStop': 'false',
        'currencyCode': 'VND',
        'max': '250'
    }
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
    return response.json()


In [None]:
def process_data(data):
    flight_offers = data.get('data', [])
    records = {}
    for offer in flight_offers:
        price = offer.get('price', {}).get('total')
        itineraries = offer.get('itineraries', [])
        for itinerary in itineraries:
            segments = itinerary.get('segments', [])
            for segment in segments:
                departure = segment.get('departure', {})
                arrival = segment.get('arrival', {})
                carrierCode = segment.get('carrierCode')
                flightNumber = segment.get('number')
                duration = segment.get('duration')
                unique_id = (
                    departure.get('iataCode'),
                    departure.get('at'),
                    arrival.get('iataCode'),
                    arrival.get('at'),
                    carrierCode,
                    flightNumber
                )
                # Chỉ thêm chuyến bay nếu chưa tồn tại
                if unique_id not in records:
                    records[unique_id] = {
                        'Departure Airport': departure.get('iataCode'),
                        'Departure Time': departure.get('at'),
                        'Arrival Airport': arrival.get('iataCode'),
                        'Arrival Time': arrival.get('at'),
                        'Carrier Code': carrierCode,
                        'Flight Number': flightNumber,
                        'Price (VND)': price,
                        'Duration': duration,
                    }
    return pd.DataFrame.from_dict(records, orient='index')


In [44]:
access_token = get_access_token()

# Danh sách các sân bay
airports = ['SGN', 'HAN', 'DAD', 'HUI', 'CXR', 'VCA', 'HPH', 'VII', 'BMV', 'DLI']

tomorrow = datetime.now() + timedelta(days=1)
departure_date = tomorrow.strftime('%Y-%m-%d')
df = pd.DataFrame()

total_pairs = len(airports) * (len(airports) - 1)

with tqdm(total=total_pairs, desc='Đang lấy dữ liệu') as pbar:
    for origin in airports:
        for destination in airports:
            if origin != destination:
                sys.stdout.write(f"\rLấy chuyến bay từ {origin} đến {destination}")
                sys.stdout.flush()
                
                try:
                    data = get_flight_offers(origin, destination, departure_date, access_token)
                    flights_df = process_data(data)
                    df = pd.concat([df, flights_df], ignore_index=True)
                    time.sleep(1)  # Nghỉ 1 giây giữa các yêu cầu
                except requests.exceptions.HTTPError as http_err:
                    sys.stdout.write(f"\rLỗi HTTP từ {origin} đến {destination}: {http_err}\n")
                except Exception as e:
                    sys.stdout.write(f"\rLỗi từ {origin} đến {destination}: {e}\n")
                
                # Cập nhật thanh tiến trình
                clear_output()
                pbar.update(1)

sys.stdout.write("\nHoàn thành!\n")




Đang lấy dữ liệu: 100%|██████████| 90/90 [07:08<00:00,  4.76s/it]


Hoàn thành!





13

In [45]:
# Loại bỏ các bản ghi trùng lặp
df.drop_duplicates(subset=[
    'Departure Airport', 'Departure Time', 'Arrival Airport', 'Arrival Time',
    'Carrier Code', 'Flight Number'], inplace=True)

# Chuyển đổi thời gian sang định dạng datetime
df['Departure Time'] = pd.to_datetime(df['Departure Time'])
df['Arrival Time'] = pd.to_datetime(df['Arrival Time'])

# Reset lại index của DataFrame
df.reset_index(drop=True, inplace=True)

print(df.shape)
df.head()


(1162, 8)


Unnamed: 0,Departure Airport,Departure Time,Arrival Airport,Arrival Time,Carrier Code,Flight Number,Price (VND),Duration
0,SGN,2024-10-04 22:20:00,HAN,2024-10-05 00:10:00,VJ,176,1538000.0,PT1H50M
1,SGN,2024-10-04 05:00:00,HAN,2024-10-04 07:10:00,VJ,182,1538000.0,PT2H10M
2,SGN,2024-10-04 05:25:00,HAN,2024-10-04 07:35:00,VJ,198,1538000.0,PT2H10M
3,SGN,2024-10-04 06:00:00,HAN,2024-10-04 08:10:00,VJ,120,1538000.0,PT2H10M
4,SGN,2024-10-04 06:40:00,HAN,2024-10-04 08:50:00,VJ,168,1538000.0,PT2H10M


In [None]:
df.to_csv(f'data/{departure_date}_full_data.csv', index=False)