##  MySQL_Analysis_Tmp

In [3]:
from sqlalchemy import create_engine
import pandas as pd

# 连接信息
username = ''
password = ''
host = ''
port = ''
database = ''

# 连接到 mysql 系统库
engine = create_engine(
    f"mysql+pymysql://{username}:{password}@{host}:{port}/{database}",
    connect_args={"charset": "utf8mb4"}
)

### 1
* 哪些机场是最繁忙的？（出发 + 到达 总航线数量）
* 商业意义：找出最重要的hub机场，可以决定哪里优先增加运力、广告、合作。

In [4]:
query = """
SELECT
    a.name AS airport_name,
    a.city,
    a.country,
    (COALESCE(departures.route_count, 0) + COALESCE(arrivals.route_count, 0)) AS total_routes
FROM airports a
LEFT JOIN (
    SELECT source_airport_id, COUNT(*) AS route_count
    FROM routes
    GROUP BY source_airport_id
) departures ON a.airport_id = departures.source_airport_id
LEFT JOIN (
    SELECT destination_airport_id, COUNT(*) AS route_count
    FROM routes
    GROUP BY destination_airport_id
) arrivals ON a.airport_id = arrivals.destination_airport_id
ORDER BY total_routes DESC
LIMIT 10;
"""
df = pd.read_sql(query, con=engine)
df.head(100)

Unnamed: 0,airport_name,city,country,total_routes
0,Ericachester International Airport,Pageville,Venezuela,7
1,Lake Thomasmouth International Airport,Josephport,Chile,6
2,Port Laura International Airport,North Michele,Germany,4
3,Carolinemouth International Airport,North Kennethburgh,Tokelau,3
4,West Anne International Airport,Griffinstad,French Southern Territories,3
5,Josephstad International Airport,Johnview,Uzbekistan,3
6,New Carol International Airport,East Robin,Guernsey,3
7,West Charles International Airport,Bakerchester,Philippines,3
8,East James International Airport,North Cherylborough,Australia,2
9,Ericksonfurt International Airport,Myersburgh,Bosnia and Herzegovina,2


### 2
* 哪家航空公司承运的航班最多？（按照航班数量统计）
* 商业意义：了解市场份额最大的航空公司，决定资源投放、市场合作重点。

In [5]:
query = """
SELECT
    al.name AS airline_name,
    COUNT(f.flight_id) AS flight_count
FROM flights f
JOIN routes r ON f.route_id = r.route_id
JOIN airlines al ON r.airline_id = al.airline_id
GROUP BY al.airline_id
ORDER BY flight_count DESC
LIMIT 10;
"""
df = pd.read_sql(query, con=engine)
df.head(100)

Unnamed: 0,airline_name,flight_count
0,Robinson and Sons,11
1,"Rose, Ellis and Little",9
2,Ross and Sons,7
3,"Olson, Rogers and Harrison",6
4,Perez Inc,6
5,Guerrero Group,4
6,Roberson LLC,3
7,Newman and Sons,3
8,"Flowers, Hill and Smith",3
9,Nelson Ltd,3


### 3
* 哪种飞机型号（Aircraft）被使用得最多？（按航班数量统计）
* 商业意义：了解资产利用率，未来采购什么飞机型号。

In [7]:
query = """
SELECT
    ac.model,
    ac.manufacturer,
    COUNT(f.flight_id) AS usage_count
FROM flights f
JOIN aircrafts ac ON f.aircraft_id = ac.aircraft_id
GROUP BY ac.aircraft_id
ORDER BY usage_count DESC
LIMIT 10;
"""
df = pd.read_sql(query, con=engine)
df.head(100)

Unnamed: 0,model,manufacturer,usage_count
0,Next A350,Gray-Foley,4
1,Movement A320,"Carpenter, Schmidt and Jones",4
2,Able 737,Poole Group,4
3,Family A350,"Watson, Rodriguez and Huffman",4
4,Mission A350,Blair PLC,4
5,Exist 737,Mccoy LLC,4
6,Parent 737,Peterson LLC,4
7,Edge 737,Barrett Inc,3
8,Like 737,"Brown, Thompson and Fry",3
9,Treat A350,Thomas-Ballard,2


### 4
* 哪条航线（Airport->Airport）乘客量最大？（按bookings数量统计）
* 商业意义：高需求航线可以考虑增加班次、提价、增加商务舱座位等策略。

In [6]:
query = """
SELECT
    sa.name AS source_airport,
    da.name AS destination_airport,
    COUNT(b.booking_id) AS passenger_count
FROM bookings b
JOIN flights f ON b.flight_id = f.flight_id
JOIN routes r ON f.route_id = r.route_id
JOIN airports sa ON r.source_airport_id = sa.airport_id
JOIN airports da ON r.destination_airport_id = da.airport_id
GROUP BY r.source_airport_id, r.destination_airport_id
ORDER BY passenger_count DESC
LIMIT 10;
"""
df = pd.read_sql(query, con=engine)
df.head(100)

Unnamed: 0,source_airport,destination_airport,passenger_count
0,Josephstad International Airport,West Anne International Airport,13
1,New Brooke International Airport,Ericksonfurt International Airport,11
2,Ericksonfurt International Airport,Melissabury International Airport,10
3,Castroland International Airport,Lake Thomasmouth International Airport,10
4,Matthewbury International Airport,Carolinemouth International Airport,9
5,East James International Airport,West Anne International Airport,8
6,New Carol International Airport,Lake Thomasmouth International Airport,8
7,New Brooke International Airport,Reginafurt International Airport,7
8,Stevenside International Airport,Ericachester International Airport,6
9,Armstrongbury International Airport,Port Laura International Airport,5


### 5
* 哪些国家（乘客国籍）是我们的主要客户来源？（乘客 nationality）
* 商业意义：帮助制定国际市场营销策略，比如针对某国打广告。

In [None]:
query = """
SELECT
    nationality,
    COUNT(*) AS passenger_count
FROM passengers
GROUP BY nationality
ORDER BY passenger_count DESC
LIMIT 10;
"""
df = pd.read_sql(query, con=engine)
df.head(100)

### 6
* 哪些航班经常延误？（status = ‘Delayed’ 的统计）
* 商业意义：识别需要改进准点率的航线，减少赔偿和客户流失。

In [None]:
query = """
SELECT
    f.flight_number,
    COUNT(*) AS delay_count
FROM flights f
WHERE f.status = 'Delayed'
GROUP BY f.flight_number
ORDER BY delay_count DESC
LIMIT 10;
"""
df = pd.read_sql(query, con=engine)
df.head(100)

### 7
* 各航班的平均票价是多少？（可以结合航班盈利能力）
* 商业意义：评估不同航班的定价策略是否合理。

In [8]:
query = """
SELECT
    f.flight_number,
    AVG(b.ticket_price) AS avg_ticket_price
FROM bookings b
JOIN flights f ON b.flight_id = f.flight_id
GROUP BY f.flight_id
ORDER BY avg_ticket_price DESC
LIMIT 10;
"""
df = pd.read_sql(query, con=engine)
df.head(100)

Unnamed: 0,flight_number,avg_ticket_price
0,XZ086,983.39
1,HZ270,890.92
2,VU683,887.56
3,IC643,834.255
4,WR405,772.443333
5,EQ383,769.065
6,YY076,738.046667
7,JG091,737.575
8,KF867,733.84
9,LR422,707.21


In [None]:
query = """

"""
df = pd.read_sql(query, con=engine)
df.head(100)

In [None]:
query = """

"""
df = pd.read_sql(query, con=engine)
df.head(100)