## 0. Understanding Data


In [7]:
# import required modules
import sqlite3
import pandas as pd

In [8]:
# create a connection 
conn = sqlite3.connect('starwars.db')

In [9]:
# take a look at the stored tables
tables = ['people', 'films', 'starships', 'vehicles', 'species', 'planets']

for table in tables:
    display(table)
    display(pd.read_sql(f'SELECT * FROM {table} LIMIT 1', conn))

'people'

Unnamed: 0,id,name,birth_year,eye_color,gender,hair_color,height,mass,skin_color
0,1,Luke Skywalker,19BBY,blue,male,blond,172,77,fair


'films'

Unnamed: 0,id,title,episode_id,opening_crawl,director,producer,release_date
0,1,A New Hope,4,It is a period of civil war.\r\nRebel spaceshi...,George Lucas,"Gary Kurtz, Rick McCallum",1977-05-25


'starships'

Unnamed: 0,id,name,model,starship_class,manufacturer,cost_in_credits,length,crew,passengers,max_atmosphering_speed,hyperdrive_rating,MGLT,cargo_capacity,consumables
0,2,CR90 corvette,CR90 corvette,corvette,Corellian Engineering Corporation,3500000,150,30-165,600,950,2.0,60,3000000,1 year


'vehicles'

Unnamed: 0,id,name,model,vehicle_class,manufacturer,length,cost_in_credits,crew,passengers,max_atmosphering_speed,cargo_capacity,consumables
0,4,Sand Crawler,Digger Crawler,wheeled,Corellia Mining Corporation,36.8,150000,46,30,30,50000,2 months


'species'

Unnamed: 0,id,name,average_height,average_lifespan,classification,designation,eye_colors,hair_colors,homeworld,language,skin_colors
0,1,Human,180,120,mammal,sentient,"brown, blue, green, hazel, grey, amber","blonde, brown, black, red",https://swapi.dev/api/planets/9/,Galactic Basic,"caucasian, black, asian, hispanic"


'planets'

Unnamed: 0,id,name,diameter,rotation_period,orbital_period,gravity,population,climate,terrain,surface_water
0,1,Tatooine,10465,23,304,1 standard,200000,arid,desert,1


In [13]:
connection_table = ["people_films", "people_species","people_starships","people_vehicles", "people_planets"]
for item in connection_table:
    display(item)
    display(pd.read_sql(f'SELECT * FROM {item} LIMIT 1', conn))
    

'people_films'

Unnamed: 0,person_id,film_id
0,1,1


'people_species'

Unnamed: 0,person_id,specie_id
0,1,1


'people_starships'

Unnamed: 0,person_id,starship_id
0,1,12


'people_vehicles'

Unnamed: 0,person_id,vehicle_id
0,1,14


'people_planets'

Unnamed: 0,person_id,planet_id
0,1,1


## 1. Required SQL analysis

### 1.1 Who is the most famous characters in Star Wars films.
- Requirment: `1 request with INNER`
- Explanation: By using INNER JOIN, we return characters' occurance in films. As each character must appear in one film, it makes sense to use inner join here.

In [38]:
pd.read_sql(
    '''
    with people_films_table as (
        select 
            p.name,
            count(p.id) as count
        from 
            people p
        inner join
            people_films pf on p.id = pf.person_id
        inner join
            films f on pf.film_id = f.id
        group by
            p.id
    )
    
    select 
        * 
    from 
        people_films_table
    order by
        count desc
    limit 10


    ''', 
    conn
)

DatabaseError: Execution failed on sql '
    with people_films_table as (
        select 
            p.name
            p.id,
            count(p.id) as count
        from 
            people p
        inner join
            people_films pf on p.id = pf.person_id
        inner join
            films f on pf.film_id = f.id
        group by
            p.id
    )
    
    select 
        * 
    from 
        people_films_table
    order by
        count desc
    limit 10


    ': near ".": syntax error

### 1.2 Who has the higest occurence in terms of pilots
- Requirment: `1 request with LEFT`
- Explanation: By using LEFT JOIN, we return people regardless of whether they are the pilots of vehicles or starships.

In [35]:
pd.read_sql(
    '''
    with people_starships_table as (
        select 
            p.id,
            p.name,
            count(p.name) as starships_occurence,
            group_concat(s.name, ", ") as starship_names
        from 
            people p
        left join 
            people_starships ps on p.id = ps.person_id
        left join
            starships s on ps.starship_id = s.id
        group by
            p.id,
            p.name
    ),

    people_vehicles_table as (
        select 
            p.id,
            p.name,
            count(p.name) as vehicle_occurence,
            group_concat(v.name, ", ") as vehicle_names
        from 
            people p
        left join 
            people_vehicles pv on p.id = pv.person_id
        left join
            vehicles v on pv.vehicle_id = v.id
        group by
            p.id,
            p.name
    )

    select 
        p.name as name,
        (ifnull(ps.starships_occurence, 0) + ifnull(pv.vehicle_occurence, 0)) as total_occurence,
        ps.starships_occurence,
        ps.starship_names,
        pv.vehicle_occurence,
        pv.vehicle_names
    from 
        people p
    left join
        people_starships_table ps on p.id = ps.id
    left join 
        people_vehicles_table pv on p.id = pv.id
    order by
        total_occurence desc
    limit 10


    ''', 
    conn
)

Unnamed: 0,name,total_occurence,starships_occurence,starship_names,vehicle_occurence,vehicle_names
0,Palpatine,10,5,"CR90 corvette, Star Destroyer, Sentinel-class ...",5,"Sand Crawler, T-16 skyhopper"
1,Owen Lars,6,3,Sentinel-class landing craft,3,T-16 skyhopper
2,Beru Whitesun lars,6,3,Sentinel-class landing craft,3,T-16 skyhopper
3,Obi-Wan Kenobi,6,5,"Jedi starfighter, Trade Federation cruiser, Na...",1,Tribubble bongo
4,Padmé Amidala,6,3,"Naboo fighter, H-type Nubian yacht, Naboo star...",3,"Sand Crawler, T-16 skyhopper"
5,Mace Windu,6,3,Sentinel-class landing craft,3,"Sand Crawler, T-16 skyhopper"
6,Darth Vader,5,1,TIE Advanced x1,4,T-16 skyhopper
7,Anakin Skywalker,5,3,"Naboo fighter, Trade Federation cruiser, Jedi ...",2,"Zephyr-G swoop bike, XJ-6 airspeeder"
8,Han Solo,5,2,"Millennium Falcon, Imperial shuttle",3,
9,Luke Skywalker,4,2,"X-wing, Imperial shuttle",2,"Snowspeeder, Imperial Speeder Bike"


### 1.3 What are the average costs, passengers, length for each vehicle class.
- Requirment: `1 request with GROUP BY`
- Explanation: By using GROUP BY, we could get the relevant values divided by vehicle class.

In [68]:
pd.read_sql(
    '''

    ''', 
    conn
)

Unnamed: 0,vehicle_class,model,average_cost,average_passengers,average_length,model_count
0,sail barge,Modified Luxury Sail Barge,285000.0,500.0,30.0,1
1,landing craft,C-9979 landing craft,200000.0,284.0,210.0,1
2,wheeled walker,"Tsmeu-6 personal wheel bike,HAVw A6 Juggernaut",182500.0,151.0,26.45,2
3,wheeled,Digger Crawler,150000.0,30.0,36.8,1
4,droid tank,NR-N99 Persuader-class droid enforcer,49000.0,4.0,10.96,1
5,repulsorcraft,"T-16 skyhopper,X-34 landspeeder,Storm IV Twin-...",35185.71,17.0,9.6,7
6,droid starfighter,tri-fighter,20000.0,0.0,5.4,1
7,air speeder,Raddaugh Gnasp fluttercraft,14750.0,0.0,7.0,1
8,airspeeder,"t-47 airspeeder,Koro-2 Exodrive airspeeder,XJ-...",14425.0,1.0,8.95,5
9,walker,"All Terrain Scout Transport,All Terrain Tactic...",10000.0,17.0,39.6,4


### 1.4 Find out manufactures of starships who could design a passenger capacity over 50
- Requirment: `1 request with HAVING`
- Explanation: By grouping the data by manufacturer, we could use HAVING to filter out manufacutres that has the capability to desgin starships with the average passenger capcity over 50.

In [86]:
pd.read_sql(
    '''

            
    ''', 
    conn
)

Unnamed: 0,manufacturer,average_passengers
0,"Hoersch-Kessel Drive, Inc.",139000
1,"Rendili StarDrive, Free Dac Volunteers Enginee...",48247
2,"Kuat Drive Yards, Fondor Shipyards",38000
3,Botajef Shipyards,30000
4,Rothana Heavy Engineering,16000
5,"Kuat Drive Yards, Allanteen Six shipyards",2000
6,Mon Calamari shipyards,1200
7,"Imperial Department of Military Research, Sien...",843
8,Corellian Engineering Corporation,207
9,"Gallofree Yards, Inc.",90


### 1.5 Find out the oldest person in the Star Wars
- Requirment: `1 request with CTE`
- Explaination: 
    - By creating a temporary table, we conver the string `19BBY`, 19 years before the battle of yavin, to 19. And we write another query to return the oldest person's name and age.
    - Before implementing queries, we have checked no one is born ABY.

In [106]:
pd.read_sql(
    '''

    ''', 
    conn
)

Unnamed: 0,aby_count
0,0


In [109]:
pd.read_sql(
    '''

    ''', 
    conn
)

Unnamed: 0,name,birth_year
0,Yoda,896
