In [None]:
from dotenv import load_dotenv

load_dotenv()

In [None]:
import requests

url = "https://storage.googleapis.com/benchmarks-artifacts/travel-db/travel2.sqlite"

response = requests.get(url)

if response.status_code == 200:
    with open("travel.db", "wb") as file:
        file.write(response.content)
    print("File downloaded and saved as travel.db")
else:
    print(f"Failed to download the file. Status code: {response.status_code}")

In [None]:

import sqlite3
import pandas as pd

def update_dates(file):
    conn = sqlite3.connect(file)
    cursor = conn.cursor()

    tables = pd.read_sql(
        "SELECT name FROM sqlite_master WHERE type='table';", conn
    ).name.tolist()
    tdf = {}
    for t in tables:
        tdf[t] = pd.read_sql(f"SELECT * from {t}", conn)

    example_time = pd.to_datetime(
        tdf["flights"]["actual_departure"].replace("\\N", pd.NaT)
    ).max()
    current_time = pd.to_datetime("now").tz_localize(example_time.tz)
    time_diff = current_time - example_time

    tdf["bookings"]["book_date"] = (
        pd.to_datetime(tdf["bookings"]["book_date"].replace("\\N", pd.NaT), utc=True)
        + time_diff
    )

    datetime_columns = [
        "scheduled_departure",
        "scheduled_arrival",
        "actual_departure",
        "actual_arrival",
    ]
    for column in datetime_columns:
        tdf["flights"][column] = (
            pd.to_datetime(tdf["flights"][column].replace("\\N", pd.NaT)) + time_diff
        )

    for table_name, df in tdf.items():
        df.to_sql(table_name, conn, if_exists="replace", index=False)
    del df
    del tdf
    conn.commit()
    conn.close()

    return file

local_file = 'travel.db'

db = update_dates(local_file)

In [1]:
import sqlite3
import pandas as pd

conn = sqlite3.connect("travel.db")
cursor = conn.cursor()

cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

for table in tables:
    table_name = table[0]
    print(f"\nTable: {table_name}")

    query = f"SELECT * FROM {table_name} LIMIT 10;"
    cursor.execute(f"SELECT COUNT(*) FROM {table_name};")
    row_count = cursor.fetchone()[0] 

    print(f"Table: {table_name} | Row Count: {row_count}")
    df = pd.read_sql(query, conn)

    if df.empty:
        print("No data available.")
    else:
        print(df.to_markdown(index=False)) 
conn.close()



Table: aircrafts_data
Table: aircrafts_data | Row Count: 9
| aircraft_code   | model               |   range |
|:----------------|:--------------------|--------:|
| 773             | Boeing 777-300      |   11100 |
| 763             | Boeing 767-300      |    7900 |
| SU9             | Sukhoi Superjet-100 |    3000 |
| 320             | Airbus A320-200     |    5700 |
| 321             | Airbus A321-200     |    5600 |
| 319             | Airbus A319-100     |    6700 |
| 733             | Boeing 737-300      |    4200 |
| CN1             | Cessna 208 Caravan  |    1200 |
| CR2             | Bombardier CRJ-200  |    2700 |

Table: airports_data
Table: airports_data | Row Count: 115
| airport_code   | airport_name                                     | city        | coordinates          | timezone            |
|:---------------|:-------------------------------------------------|:------------|:---------------------|:--------------------|
| ATL            | Hartsfield-Jackson Atlanta Inte

In [None]:
import os
import sqlite3
import psycopg2
from psycopg2.extras import execute_batch

sqlite_conn = sqlite3.connect("travel.db")
sqlite_cursor = sqlite_conn.cursor()

pg_conn = psycopg2.connect(
    dbname=os.environ["DB_NAME"],
    user=os.environ["DB_USER"],
    password=os.environ["DB_PASSWORD"],
    host=os.environ["DB_HOST"],
    port=os.environ["DB_PORT"]
)
pg_cursor = pg_conn.cursor()

sqlite_cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = [table[0] for table in sqlite_cursor.fetchall()]

BATCH_SIZE = 5000  

for table_name in tables:
    print(f"🔄 Migrating table: {table_name}")

    sqlite_cursor.execute(f"PRAGMA table_info({table_name});")
    columns = sqlite_cursor.fetchall()
    column_names = [column[1] for column in columns]

    create_table_query = f"CREATE TABLE IF NOT EXISTS {table_name} ("
    create_table_query += ", ".join(
        f"{column[1]} {column[2].replace('NVARCHAR', 'VARCHAR').replace('DATETIME', 'TIMESTAMP')}"
        for column in columns
    )
    create_table_query += ");"
    pg_cursor.execute(create_table_query)
    
    sqlite_cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
    total_rows = sqlite_cursor.fetchone()[0]
    print(f"📊 Total rows in {table_name}: {total_rows}")

    offset = 0
    while True:
        sqlite_cursor.execute(f"SELECT * FROM {table_name} LIMIT {BATCH_SIZE} OFFSET {offset};")
        rows = sqlite_cursor.fetchall()
        if not rows:
            break  

        placeholders = ", ".join(["%s"] * len(column_names))
        insert_query = f"INSERT INTO {table_name} ({', '.join(column_names)}) VALUES ({placeholders})"
        
        execute_batch(pg_cursor, insert_query, rows, page_size=1000)
        pg_conn.commit() 
        
        offset += BATCH_SIZE
        print(f"✅ Inserted {offset}/{total_rows} rows into {table_name}")

    print(f"✅ Migration complete for table: {table_name}")

pg_cursor.close()
pg_conn.close()
sqlite_cursor.close()
sqlite_conn.close()

print("🎉 Migration Completed Successfully!")
