#   Insights STARS WARS

In [2]:
import os
import shutil
import duckdb

duckdb_conn = duckdb.connect(database=":memory:", read_only=False)

def mover_arquivos_parquet():
    if not os.path.exists('parquet'):
        os.makedirs('parquet')
    [shutil.move(f, 'parquet') for f in os.listdir('.') if f.endswith('.parquet')]

mover_arquivos_parquet()

df_films = duckdb_conn.from_parquet("./parquet/films_data.parquet").to_df()
df_people = duckdb_conn.from_parquet("./parquet/people_data.parquet").to_df()
df_films = duckdb_conn.from_parquet("./parquet/films_data.parquet").to_df()
df_starships = duckdb_conn.from_parquet("./parquet/starships_data.parquet").to_df()
df_vehicles = duckdb_conn.from_parquet("./parquet/vehicles_data.parquet").to_df()
df_planets = duckdb_conn.from_parquet("./parquet/planets_data.parquet").to_df()
df_species = duckdb_conn.from_parquet("./parquet/species_data.parquet").to_df()

## Número total de naves de Star Wars

In [6]:
query = """
SELECT COUNT(*) AS Total_StarShips
FROM df_starships;
"""
result = duckdb_conn.execute(query)
result.df()

Unnamed: 0,Total_StarShips
0,49


## Fabricantes mais comuns de naves

In [15]:

query = """
SELECT manufacturer as Fabricante, COUNT(*) AS Naves
FROM df_starships
WHERE Fabricante <> ('None')
GROUP BY manufacturer
ORDER BY Naves DESC;

"""
result = duckdb_conn.execute(query)
result.df()

Unnamed: 0,Fabricante,Naves
0,Corellian Engineering Corporation,3
1,Kuat Systems Engineering,2
2,Kuat Drive Yards,2
3,Theed Palace Space Vessel Engineering Corps,2
4,Sienar Fleet Systems,2
5,"Theed Palace Space Vessel Engineering Corps, N...",2
6,"Sienar Fleet Systems, Cyngus Spaceworks",1
7,"Kuat Drive Yards, Fondor Shipyards",1
8,"Gallofree Yards, Inc.",1
9,Mon Calamari shipyards,1


## Comprimento médio das naves por classe

In [28]:
query = """
SELECT starship_class as Classe, AVG(CAST(REPLACE(length, ',', '.') AS DOUBLE)) AS Comprimento_Medio
FROM df_starships
WHERE starship_class <> ('None')
GROUP BY starship_class;
"""

result = duckdb_conn.execute(query)
result.df()


Unnamed: 0,Classe,Comprimento_Medio
0,Assault Starfighter,16.9
1,Space cruiser,115.0
2,Deep Space Mobile Battlestation,120000.0
3,Starfighter,10.06
4,Space Transport,26.5
5,assault starfighter,14.0
6,Medium transport,90.0
7,Armed government transport,20.0
8,Escort ship,300.0
9,Star Cruiser,1200.0


## Naves com maior capacidade de carga

In [40]:
query = """
SELECT name as Nome, CAST (cargo_capacity AS int64) as Capacidade
FROM df_starships
WHERE cargo_capacity not in ('None', 'unknown')
ORDER BY cargo_capacity DESC
LIMIT 10;
"""
result = duckdb_conn.execute(query)
result.df()

Unnamed: 0,Nome,Capacidade
0,Imperial shuttle,80000
1,Slave 1,70000
2,Naboo fighter,65
3,EF76 Nebulon-B escort frigate,6000000
4,Jedi starfighter,60
5,B-wing,45
6,Droid control ship,4000000000
7,A-wing,40
8,Star Destroyer,36000000
9,CR90 corvette,3000000


## Filmes em que cada nave aparece

In [46]:
query = """
SELECT s.name AS Nome_Nave, f.title AS Nome_Filme
FROM df_starships s
JOIN df_films f 
ON ARRAY_CONTAINS(f.starships, s.url)
"""
result = duckdb_conn.execute(query)
result.df()

Unnamed: 0,Nome_Nave,Nome_Filme
0,CR90 corvette,A New Hope
1,Star Destroyer,A New Hope
2,Sentinel-class landing craft,A New Hope
3,Death Star,A New Hope
4,Millennium Falcon,A New Hope
5,Y-wing,A New Hope
6,X-wing,A New Hope
7,TIE Advanced x1,A New Hope
8,Star Destroyer,The Empire Strikes Back
9,Millennium Falcon,The Empire Strikes Back


## Número de naves por piloto

In [44]:
query = """
SELECT p.name AS Nome_Piloto, COUNT(*) AS Qtd_Nave
FROM df_starships s
JOIN df_people p ON p.url = ANY(s.pilots)
GROUP BY p.name
ORDER BY Qtd_Nave DESC;

"""
result = duckdb_conn.execute(query)
result.df()

Unnamed: 0,Nome_Piloto,Qtd_Nave
0,Luke Skywalker,2
1,Padmé Amidala,2
2,Han Solo,2
3,Chewbacca,2
4,Boba Fett,1
5,Lando Calrissian,1
6,Plo Koon,1
7,Arvel Crynyd,1
8,Obi-Wan Kenobi,1
9,Anakin Skywalker,1


## Naves que podem transportar mais passageiros do que a tripulação

In [54]:
query = """
SELECT name as Nome_Nave, passengers as Qtd_Passageiros, crew as Tripulacao
FROM df_starships
WHERE passengers > crew and passengers not in ('None', 'None', 'unknown');
"""
result = duckdb_conn.execute(query)
result.df()

Unnamed: 0,Nome_Nave,Qtd_Passageiros,Tripulacao
0,CR90 corvette,600.0,30-165
1,Star Destroyer,,47060
2,Sentinel-class landing craft,75.0,5
3,Death Star,843342.0,342953
4,Millennium Falcon,6.0,4
5,Executor,38000.0,279144
6,Rebel transport,90.0,6
7,Slave 1,6.0,1
8,Scimitar,6.0,1
