#   Insights STARS WARS

In [1]:
import os
import shutil
import duckdb

duckdb_conn = duckdb.connect(database=":memory:", read_only=False)

def mover_arquivos_parquet():
    if not os.path.exists('parquet'):
        os.makedirs('parquet')
    [shutil.move(f, 'parquet') for f in os.listdir('.') if f.endswith('.parquet')]

mover_arquivos_parquet()

df_films = duckdb_conn.from_parquet("./parquet/films_data.parquet").to_df()
df_people = duckdb_conn.from_parquet("./parquet/people_data.parquet").to_df()
df_films = duckdb_conn.from_parquet("./parquet/films_data.parquet").to_df()
df_starships = duckdb_conn.from_parquet("./parquet/starships_data.parquet").to_df()
df_vehicles = duckdb_conn.from_parquet("./parquet/vehicles_data.parquet").to_df()
df_planets = duckdb_conn.from_parquet("./parquet/planets_data.parquet").to_df()
df_species = duckdb_conn.from_parquet("./parquet/species_data.parquet").to_df()

## Número total de veículos de Star Wars

In [5]:
query = """
SELECT COUNT(*) AS Total_Veiculos
FROM df_vehicles;

"""
result = duckdb_conn.execute(query)
result.df()

Unnamed: 0,Total_Veiculos
0,79


## Fabricantes mais comuns de veículos

In [13]:

query = """
SELECT manufacturer as Fabricante, COUNT(*) AS Qtd_Veiculos
FROM df_vehicles
WHERE manufacturer not in ('None', 'unknown')
GROUP BY manufacturer
ORDER BY Qtd_Veiculos DESC;
"""
result = duckdb_conn.execute(query)
result.df()

Unnamed: 0,Fabricante,Qtd_Veiculos
0,,40
1,Sienar Fleet Systems,3
2,Baktoid Armor Workshop,3
3,Rothana Heavy Engineering,3
4,Appazanna Engineering Works,2
5,Huppla Pasa Tisc Shipwrights Collective,2
6,"Kuat Drive Yards, Imperial Department of Milit...",2
7,Kuat Drive Yards,2
8,Haor Chall Engineering,2
9,Corellia Mining Corporation,1


## Classes de veículos mais comuns

In [15]:
query = """
SELECT vehicle_class as Classe_Veiculo, COUNT(*) AS Qtd_Veiculo
FROM df_vehicles
WHERE vehicle_class not in ('None', 'unknown')
GROUP BY vehicle_class
ORDER BY Qtd_Veiculo DESC;
"""
result = duckdb_conn.execute(query)
result.df()


Unnamed: 0,Classe_Veiculo,Qtd_Veiculo
0,repulsorcraft,7
1,airspeeder,5
2,walker,4
3,starfighter,4
4,speeder,3
5,gunship,2
6,wheeled walker,2
7,assault walker,1
8,repulsorcraft cargo skiff,1
9,wheeled,1


## Veículos com o maior número de pilotos

In [31]:
query = """
SELECT v.name AS Nome_Veiculo, 
       COUNT(p.url) AS Numero_Pilotos
FROM df_vehicles as v
JOIN df_people as p 
ON ARRAY_CONTAINS(p.vehicles, v.url)
WHERE v.vehicle_class NOT IN ('None', 'unknown')
GROUP BY v.name
ORDER BY Numero_Pilotos DESC;
 
"""
result = duckdb_conn.execute(query)
result.df()

Unnamed: 0,Nome_Veiculo,Numero_Pilotos
0,Imperial Speeder Bike,2
1,Tribubble bongo,2
2,Snowspeeder,2
3,Sith speeder,1
4,AT-ST,1
5,Zephyr-G swoop bike,1
6,Flitknot speeder,1
7,Koro-2 Exodrive airspeeder,1
8,XJ-6 airspeeder,1
9,Tsmeu-6 personal wheel bike,1


## Veículos com a maior capacidade de carga

In [26]:
query = """
SELECT name AS Nome_Veiculo, 
       CASE 
           WHEN TRY_CAST(cargo_capacity AS int64) IS NULL THEN NULL 
           ELSE CAST(cargo_capacity AS int64) 
       END AS Capacidade_Carga
FROM df_vehicles
WHERE cargo_capacity NOT IN ('None', 'unknown')


"""
result = duckdb_conn.execute(query)
result.df()

Unnamed: 0,Nome_Veiculo,Capacidade_Carga
0,Sand Crawler,50000.0
1,T-16 skyhopper,50.0
2,X-34 landspeeder,5.0
3,TIE/LN starfighter,65.0
4,Snowspeeder,10.0
5,TIE bomber,
6,AT-AT,1000.0
7,AT-ST,200.0
8,Storm IV Twin-Pod cloud car,10.0
9,Sail barge,2000000.0


## Veículos que aparecem em mais filmes

In [34]:
query = """
SELECT v.name AS Nome_Veiculo, COUNT(f.url) AS Numero_Filmes
FROM df_vehicles v
JOIN df_films f 
ON ARRAY_CONTAINS(f.vehicles, v.url)
GROUP BY v.name
ORDER BY Numero_Filmes DESC;
"""
result = duckdb_conn.execute(query)
result.df()

Unnamed: 0,Nome_Veiculo,Numero_Filmes
0,TIE/LN starfighter,3
1,Vulture Droid,2
2,Neimoidian shuttle,2
3,Sand Crawler,2
4,AT-TE,2
5,AT-AT,2
6,AT-ST,2
7,TIE bomber,2
8,LAAT/i,2
9,SPHA,1
