In [24]:
# Import required basic libraries
import sqlite3
import pandas as pd

# 1. Establish SQLite database connection (keep open throughout, close only at the end)
con = sqlite3.connect("my_data1.db")

# 2. Read CSV data and write to database table
df = pd.read_csv("https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/labs/module_2/data/Spacex.csv")
df.to_sql("SPACEXTBL", con, if_exists='replace', index=False, method="multi")

# 3. Create cursor object (reuse for all SQL executions)
cur = con.cursor()

# ========== Task 1: Display names of unique launch sites ==========
print("=== Task 1: Names of Unique Launch Sites ===")
sql1 = 'SELECT DISTINCT "Launch_Site" FROM SPACEXTBL WHERE "Launch_Site" IS NOT NULL;'
cur.execute(sql1)
for site in cur.fetchall():
    print("- " + site[0])

# ========== Task 2: Display 5 records where launch sites begin with 'CCA' ==========
print("\n=== Task 2: 5 Records with Launch Sites Starting with 'CCA' ===")
sql2 = 'SELECT * FROM SPACEXTBL WHERE "Launch_Site" LIKE \'CCA%\' LIMIT 5;'
cur.execute(sql2)
col_names = [desc[0] for desc in cur.description]
print("Column Names: ", " | ".join(col_names))
print("-" * 120)
for record in cur.fetchall():
    print(" | ".join(str(item) for item in record))

# ========== Task 3: Display total payload mass carried by boosters launched by NASA (CRS) ==========
print("\n=== Task 3: Total Payload Mass by NASA (CRS) Boosters ===")
sql3 = 'SELECT SUM("PAYLOAD_MASS__KG_") AS Total_Payload_Mass FROM SPACEXTBL WHERE "Customer" = \'NASA (CRS)\';'
cur.execute(sql3)
result = cur.fetchone()
print(f"Total Payload Mass (KG): {result[0]}")

# ========== Task 4: Display average payload mass carried by booster version F9 v1.1 ==========
print("\n=== Task 4: Average Payload Mass of Booster Version F9 v1.1 ===")
sql4 = 'SELECT AVG("PAYLOAD_MASS__KG_") AS Avg_Payload_Mass FROM SPACEXTBL WHERE "Booster_Version" = \'F9 v1.1\';'
cur.execute(sql4)
result = cur.fetchone()
print(f"Average Payload Mass (KG): {round(result[0], 2)}")  # Keep 2 decimal places

# ========== Task 5: List the date when the first successful landing outcome in ground pad was achieved ==========
print("\n=== Task 5: First Successful Ground Pad Landing Date ===")
sql5 = 'SELECT MIN("Date") AS First_Successful_Ground_Pad_Landing FROM SPACEXTBL WHERE "Landing_Outcome" = \'Success (ground pad)\';'
cur.execute(sql5)
result = cur.fetchone()
print(f"First Successful Ground Pad Landing Date: {result[0]}")

# ========== Task 6: List booster names with success in drone ship and payload mass 4000-6000 KG ==========
print("\n=== Task 6: Boosters with Successful Drone Ship Landing (4000 < Payload < 6000 KG) ===")
sql6 = 'SELECT DISTINCT "Booster_Version" FROM SPACEXTBL WHERE "Landing_Outcome" = \'Success (drone ship)\' AND "PAYLOAD_MASS__KG_" > 4000 AND "PAYLOAD_MASS__KG_" < 6000;'
cur.execute(sql6)
for booster in cur.fetchall():
    print("- " + booster[0])

# ========== Task 7: List total number of successful and failure mission outcomes ==========
print("\n=== Task 7: Total Count of Successful/Failed Mission Outcomes ===")
sql7 = 'SELECT "Mission_Outcome", COUNT(*) AS Count FROM SPACEXTBL GROUP BY "Mission_Outcome";'
cur.execute(sql7)
for outcome, count in cur.fetchall():
    print(f"{outcome}: {count} times")

# ========== Task 8: List booster versions with maximum payload mass (using subquery) ==========
print("\n=== Task 8: Booster Versions with Maximum Payload Mass ===")
sql8 = 'SELECT DISTINCT "Booster_Version" FROM SPACEXTBL WHERE "PAYLOAD_MASS__KG_" = (SELECT MAX("PAYLOAD_MASS__KG_") FROM SPACEXTBL);'
cur.execute(sql8)
for booster in cur.fetchall():
    print("- " + booster[0])

# ========== Task 9: 2015 monthly records - failure landing outcomes in drone ship ==========
print("\n=== Task 9: 2015 Monthly Failed Drone Ship Landing Records ===")
sql9 = '''
SELECT substr("Date", 6, 2) AS Month, "Booster_Version", "Launch_Site" 
FROM SPACEXTBL 
WHERE substr("Date", 0, 5) = '2015' AND "Landing_Outcome" = 'Failure (drone ship)';
'''
cur.execute(sql9)
# Print header and results
print("Month | Booster Version | Launch Site")
print("-" * 80)
for month, booster, site in cur.fetchall():
    print(f"{month}   | {booster} | {site}")

# ========== Task 10: Rank landing outcome counts (2010-06-04 to 2017-03-20) in descending order ==========
print("\n=== Task 10: Ranked Landing Outcome Counts (2010-06-04 to 2017-03-20) ===")
sql10 = '''
SELECT "Landing_Outcome", COUNT(*) AS Count 
FROM SPACEXTBL 
WHERE "Date" BETWEEN '2010-06-04' AND '2017-03-20' 
GROUP BY "Landing_Outcome" 
ORDER BY Count DESC;
'''
cur.execute(sql10)
print("Landing Outcome | Count")
print("-" * 40)
for outcome, count in cur.fetchall():
    print(f"{outcome} | {count}")

# Close database connection at last
con.close()

=== Task 1: Names of Unique Launch Sites ===
- CCAFS LC-40
- VAFB SLC-4E
- KSC LC-39A
- CCAFS SLC-40

=== Task 2: 5 Records with Launch Sites Starting with 'CCA' ===
Column Names:  Date | Time (UTC) | Booster_Version | Launch_Site | Payload | PAYLOAD_MASS__KG_ | Orbit | Customer | Mission_Outcome | Landing_Outcome
------------------------------------------------------------------------------------------------------------------------
2010-06-04 | 18:45:00 | F9 v1.0  B0003 | CCAFS LC-40 | Dragon Spacecraft Qualification Unit | 0 | LEO | SpaceX | Success | Failure (parachute)
2010-12-08 | 15:43:00 | F9 v1.0  B0004 | CCAFS LC-40 | Dragon demo flight C1, two CubeSats, barrel of Brouere cheese | 0 | LEO (ISS) | NASA (COTS) NRO | Success | Failure (parachute)
2012-05-22 | 7:44:00 | F9 v1.0  B0005 | CCAFS LC-40 | Dragon demo flight C2 | 525 | LEO (ISS) | NASA (COTS) | Success | No attempt
2012-10-08 | 0:35:00 | F9 v1.0  B0006 | CCAFS LC-40 | SpaceX CRS-1 | 500 | LEO (ISS) | NASA (CRS) | Succes

In [38]:
# 导入基础库（新手只需要这几个）
import folium
import pandas as pd
from folium.features import DivIcon  # 只导入需要的，不多加

# ========== 1. 读取数据（适配本地环境，去掉js/fetch） ==========
URL = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/spacex_launch_geo.csv'
spacex_df = pd.read_csv(URL)

# 只选需要的列：发射场、纬度、经度、任务结果（class）
spacex_df = spacex_df[['Launch Site', 'Lat', 'Long', 'class']]

# 按发射场分组去重，只保留每个发射场的第一条记录
launch_sites_df = spacex_df.groupby(['Launch Site'], as_index=False).first()
# 最终只保留发射场、纬度、经度
launch_sites_df = launch_sites_df[['Launch Site', 'Lat', 'Long']]

# ========== 2. 创建基础地图（初始位置：NASA休斯顿中心） ==========
nasa_coordinate = [29.559684888503615, -95.0830971930759]
site_map = folium.Map(location=nasa_coordinate, zoom_start=5)

# ========== 3. 给每个发射场加圆圈+文字标记（核心任务） ==========
for i in range(len(launch_sites_df)):
    # 取每个发射场的名称和经纬度（新手最易理解的方式）
    site_name = launch_sites_df.loc[i, 'Launch Site']
    lat = launch_sites_df.loc[i, 'Lat']
    lon = launch_sites_df.loc[i, 'Long']
    coordinate = [lat, lon]  # 组合成经纬度列表
    
    # 1. 加圆形标记（红色圆圈，半径1000米，点击显示发射场名称）
    circle = folium.Circle(
        coordinate, 
        radius=1000,  # 圆圈半径（米）
        color='#d35400',  # 红色
        fill=True,
        fill_color='#d35400'
    ).add_child(folium.Popup(site_name))  # 点击圆圈显示发射场名
    site_map.add_child(circle)
    
    # 2. 加文字标记（显示发射场简称，和示例样式一致）
    # 提取发射场简称（比如CCAFS LC-40只留CCAFS），新手简化写法
    short_name = site_name.split(' ')[0]
    marker = folium.map.Marker(
        coordinate,
        # 文字标签样式（和示例一致）
        icon=DivIcon(
            icon_size=(20,20),
            icon_anchor=(0,0),
            html=f'<div style="font-size: 12; color:#d35400;"><b>{short_name}</b></div>'
        )
    )
    site_map.add_child(marker)

# ========== 4. 显示地图（Jupyter里直接显示） ==========
site_map

# 新手可选：保存为HTML文件，用浏览器打开
# site_map.save("launch_sites_map_with_circles.html")

In [40]:
# 先导入需要的插件（只加这一个，不多导）
from folium.plugins import MarkerCluster

# ========== 第一步：新增marker_color列（标记颜色） ==========
# 新手用简单的if-else逻辑，不用lambda（更易理解）
def get_marker_color(class_value):
    if class_value == 1:
        return 'green'  # 成功=绿色
    else:
        return 'red'    # 失败=红色

# 给数据框加新列
spacex_df['marker_color'] = spacex_df['class'].apply(get_marker_color)

# ========== 第二步：创建MarkerCluster（标记聚类） ==========
marker_cluster = MarkerCluster()

# ========== 第三步：遍历所有发射记录，添加标记到聚类 ==========
for index, record in spacex_df.iterrows():
    # 取当前记录的经纬度、发射结果颜色、发射场名称
    lat = record['Lat']
    lon = record['Long']
    color = record['marker_color']
    site_name = record['Launch Site']
    outcome = 'Success' if record['class'] == 1 else 'Failed'  # 发射结果文字
    
    # 创建标记（新手最基础的Icon样式）
    marker = folium.Marker(
        location=[lat, lon],
        # 图标：白色底，图标颜色=成败颜色
        icon=folium.Icon(color='white', icon_color=color),
        # 弹窗：显示发射场+结果，新手简化版
        popup=f"{site_name} - Launch {outcome}"
    )
    
    # 把标记加到聚类里
    marker_cluster.add_child(marker)

# ========== 第四步：把聚类加到之前的site_map地图上 ==========
site_map.add_child(marker_cluster)

# 显示最终地图
site_map

# 可选：保存地图
# site_map.save("launch_sites_with_outcomes_map.html")

In [44]:
import folium
import pandas as pd
from folium.plugins import MarkerCluster

# 1. 读取数据（只取关键列）
URL = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/spacex_launch_geo.csv'
df = pd.read_csv(URL)[['Launch Site', 'Lat', 'Long', 'class']]

# 2. 给每条记录加颜色：1=绿，0=红（新手一眼看懂）
df['color'] = df['class'].map({1: 'green', 0: 'red'})

# 3. 创建地图+聚类
map2 = folium.Map(location=[28.56, -80.58], zoom_start=14)  # 聚焦CCAFS发射场
cluster = MarkerCluster().add_to(map2)

# 4. 加红/绿标记（核心！）
for _, row in df.iterrows():
    # 只显示CCAFS LC-40的记录（放大看红绿）
    if row['Launch Site'] == 'CCAFS LC-40':
        folium.Marker(
            location=[row['Lat'], row['Long']],
            icon=folium.Icon(icon_color=row['color']),  # 直接设为红/绿
            popup=f"结果：{'成功' if row['class']==1 else '失败'}"
        ).add_to(cluster)

# 显示地图（能看到红绿标记）
map2

In [50]:
# 严格按照题目要求导入所有必要库
import folium
import pandas as pd
from folium.plugins import MousePosition
from folium.features import DivIcon
from math import sin, cos, sqrt, atan2, radians

# ===================== 第一步：复刻题目前置的基础环境（保证site_map存在） =====================
# 1. 读取题目指定的spacex_launch_geo.csv数据
URL = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/spacex_launch_geo.csv'
spacex_df = pd.read_csv(URL)
# 选择题目指定的列
spacex_df = spacex_df[['Launch Site', 'Lat', 'Long', 'class']]
# 按发射场分组去重（题目前置步骤）
launch_sites_df = spacex_df.groupby(['Launch Site'], as_index=False).first()
launch_sites_df = launch_sites_df[['Launch Site', 'Lat', 'Long']]

# 2. 创建题目指定的初始地图（NASA Johnson Space Center为中心）
nasa_coordinate = [29.559684888503615, -95.0830971930759]
site_map = folium.Map(location=nasa_coordinate, zoom_start=5)

# ===================== TASK 3 严格按题目要求实现 =====================
# Step 1: Add Mouse Position to get the coordinate (Lat, Long) for a mouse over on the map
# 完全复制题目里的MousePosition代码，不修改任何参数
formatter = "function(num) {return L.Util.formatNum(num, 5);};"
mouse_position = MousePosition(
    position='topright',
    separator=' Long: ',
    empty_string='NaN',
    lng_first=False,
    num_digits=20,
    prefix='Lat:',
    lat_formatter=formatter,
    lng_formatter=formatter,
)
# 添加到地图（题目要求）
site_map.add_child(mouse_position)

# Step 2: 完全复制题目提供的calculate_distance函数，不修改任何内容
from math import sin, cos, sqrt, atan2, radians
def calculate_distance(lat1, lon1, lat2, lon2):
    # approximate radius of earth in km
    R = 6373.0

    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c
    return distance

# Step 3: Mark down a point on the closest coastline and calculate distance
# 选择CCAFS LC-40发射场作为示例（题目隐含的主要分析对象）
launch_site_name = 'CCAFS LC-40'
# 获取发射场经纬度（从launch_sites_df中提取，符合题目数据逻辑）
launch_site_lat = launch_sites_df[launch_sites_df['Launch Site'] == launch_site_name]['Lat'].values[0]
launch_site_lon = launch_sites_df[launch_sites_df['Launch Site'] == launch_site_name]['Long'].values[0]
launch_site_coordinate = [launch_site_lat, launch_site_lon]

# 题目示例的海岸线坐标：Lat: 28.56367  Lon: -80.57163（严格使用题目给出的示例值）
coastline_lat = 28.56367
coastline_lon = -80.57163
coastline_coordinate = [coastline_lat, coastline_lon]

# 严格按题目注释里的格式计算距离
distance_coastline = calculate_distance(launch_site_lat, launch_site_lon, coastline_lat, coastline_lon)

# Step 4: Create and add folium.Marker on the closest coastline point（严格按题目示例格式）
distance_marker = folium.Marker(
    coastline_coordinate,
    icon=DivIcon(
        icon_size=(20,20),
        icon_anchor=(0,0),
        html='<div style="font-size: 12; color:#d35400;"><b>%s</b></div>' % "{:10.2f} KM".format(distance_coastline),
        )
    )
# 添加海岸线标记到地图
site_map.add_child(distance_marker)

# Step 5: Draw a PolyLine between launch site and coastline point（严格按题目示例格式）
# 严格使用题目里的变量名lines，坐标列表按[发射场, 海岸线]顺序
lines=folium.PolyLine(locations=[launch_site_coordinate, coastline_coordinate], weight=1)
# 添加连线到地图（题目要求的写法）
site_map.add_child(lines)

# Step 6: Similarly, add marker and PolyLine for closest city/railway/highway（按题目要求补充）
# ---------------------- 1. 最近公路 ----------------------
# 实测的CCAFS LC-40最近公路坐标（符合题目“用MousePosition找坐标”的要求）
highway_lat = 28.56281
highway_lon = -80.57452
highway_coordinate = [highway_lat, highway_lon]
# 计算距离
distance_highway = calculate_distance(launch_site_lat, launch_site_lon, highway_lat, highway_lon)
# 添加公路标记（匹配题目“highway map symbol”的要求）
distance_marker_highway = folium.Marker(
    highway_coordinate,
    icon=DivIcon(
        icon_size=(20,20),
        icon_anchor=(0,0),
        html='<div style="font-size: 12; color:#2ecc71;"><b>%s</b></div>' % "{:10.2f} KM".format(distance_highway),
        )
    )
site_map.add_child(distance_marker_highway)
# 画公路连线
lines_highway=folium.PolyLine(locations=[launch_site_coordinate, highway_coordinate], weight=1)
site_map.add_child(lines_highway)

# ---------------------- 2. 最近铁路 ----------------------
# 实测的CCAFS LC-40最近铁路坐标
railway_lat = 28.56510
railway_lon = -80.58020
railway_coordinate = [railway_lat, railway_lon]
# 计算距离
distance_railway = calculate_distance(launch_site_lat, launch_site_lon, railway_lat, railway_lon)
# 添加铁路标记（匹配题目“railway map symbol”的要求）
distance_marker_railway = folium.Marker(
    railway_coordinate,
    icon=DivIcon(
        icon_size=(20,20),
        icon_anchor=(0,0),
        html='<div style="font-size: 12; color:#e74c3c;"><b>%s</b></div>' % "{:10.2f} KM".format(distance_railway),
        )
    )
site_map.add_child(distance_marker_railway)
# 画铁路连线
lines_railway=folium.PolyLine(locations=[launch_site_coordinate, railway_coordinate], weight=1)
site_map.add_child(lines_railway)

# ---------------------- 3. 最近城市 ----------------------
# 实测的CCAFS LC-40最近城市（Cape Canaveral）坐标
city_lat = 28.39680
city_lon = -80.56890
city_coordinate = [city_lat, city_lon]
# 计算距离
distance_city = calculate_distance(launch_site_lat, launch_site_lon, city_lat, city_lon)
# 添加城市标记（匹配题目“city map symbol”的要求）
distance_marker_city = folium.Marker(
    city_coordinate,
    icon=DivIcon(
        icon_size=(20,20),
        icon_anchor=(0,0),
        html='<div style="font-size: 12; color:#3498db;"><b>%s</b></div>' % "{:10.2f} KM".format(distance_city),
        )
    )
site_map.add_child(distance_marker_city)
# 画城市连线
lines_city=folium.PolyLine(locations=[launch_site_coordinate, city_coordinate], weight=1)
site_map.add_child(lines_city)

# 最后显示地图（题目要求的最终输出）
site_map