## 0. Understanding Data


In [1]:
# import required modules
import sqlite3
import pandas as pd

In [2]:
# create a connection 
conn = sqlite3.connect('star_wars.db')

In [3]:
# take a look at the stored tables
tables = ['people', 'films', 'starships', 'vehicles', 'species', 'planets']

for table in tables:
    display(table)
    display(pd.read_sql(f'SELECT * FROM {table} LIMIT 1', conn))

'people'

Unnamed: 0,name,birth_year,eye_color,gender,hair_color,height,mass,skin_color,homeworld,films,species,starships,vehicles,url,created,edited,unique_id
0,Luke Skywalker,19BBY,blue,male,blond,172,77,fair,1,"[""https://swapi.dev/api/films/1/"", ""https://sw...",[],"[""https://swapi.dev/api/starships/12/"", ""https...","[""https://swapi.dev/api/vehicles/14/"", ""https:...",https://swapi.dev/api/people/1/,2014-12-09T13:50:51.644000Z,2014-12-20T21:17:56.891000Z,1


'films'

Unnamed: 0,title,episode_id,opening_crawl,director,producer,release_date,species,starships,vehicles,characters,planets,url,created,edited,unique_id
0,A New Hope,4,It is a period of civil war.\r\nRebel spaceshi...,George Lucas,"Gary Kurtz, Rick McCallum",1977-05-25,"[""https://swapi.dev/api/species/1/"", ""https://...","[""https://swapi.dev/api/starships/2/"", ""https:...","[""https://swapi.dev/api/vehicles/4/"", ""https:/...","[""https://swapi.dev/api/people/1/"", ""https://s...","[""https://swapi.dev/api/planets/1/"", ""https://...",https://swapi.dev/api/films/1/,2014-12-10T14:23:31.880000Z,2014-12-20T19:49:45.256000Z,1


'starships'

Unnamed: 0,name,model,starship_class,manufacturer,cost_in_credits,length,crew,passengers,max_atmosphering_speed,hyperdrive_rating,MGLT,cargo_capacity,consumables,films,pilots,url,created,edited,unique_id
0,CR90 corvette,CR90 corvette,corvette,Corellian Engineering Corporation,3500000,150,30-165,600,950,2.0,60,3000000,1 year,"[""https://swapi.dev/api/films/1/"", ""https://sw...",[],https://swapi.dev/api/starships/2/,2014-12-10T14:20:33.369000Z,2014-12-20T21:23:49.867000Z,2


'vehicles'

Unnamed: 0,name,model,vehicle_class,manufacturer,length,cost_in_credits,crew,passengers,max_atmosphering_speed,cargo_capacity,consumables,films,pilots,url,created,edited,unique_id
0,Sand Crawler,Digger Crawler,wheeled,Corellia Mining Corporation,36.8,150000,46,30,30,50000,2 months,"[""https://swapi.dev/api/films/1/"", ""https://sw...",[],https://swapi.dev/api/vehicles/4/,2014-12-10T15:36:25.724000Z,2014-12-20T21:30:21.661000Z,4


'species'

Unnamed: 0,name,average_height,average_lifespan,classification,designation,eye_colors,hair_colors,homeworld,language,skin_colors,people,films,url,created,edited,unique_id
0,Human,180,120,mammal,sentient,"brown, blue, green, hazel, grey, amber","blonde, brown, black, red",9.0,Galactic Basic,"caucasian, black, asian, hispanic","[""https://swapi.dev/api/people/66/"", ""https://...","[""https://swapi.dev/api/films/1/"", ""https://sw...",https://swapi.dev/api/species/1/,2014-12-10T13:52:11.567000Z,2014-12-20T21:36:42.136000Z,1


'planets'

Unnamed: 0,name,diameter,rotation_period,orbital_period,gravity,population,climate,terrain,surface_water,residents,films,url,created,edited,unique_id
0,Tatooine,10465,23,304,1 standard,200000,arid,desert,1,"[""https://swapi.dev/api/people/1/"", ""https://s...","[""https://swapi.dev/api/films/1/"", ""https://sw...",https://swapi.dev/api/planets/1/,2014-12-09T13:50:49.641000Z,2014-12-20T20:58:18.411000Z,1


1. Findings:
    - To join these tables, we need to parse the links in the relevant column.
    - For example, for the table `people`, a single person might appear in several movies, which is an array: `"https://swapi.dev/api/films/1/", "https://sw...`
2. Temporary Solution:
    - Add a column named `unique_id` could be extracted from `url` column.
    - As `homeworld` column only contains a single link, we could get the planet_id in `homework` column to join tables.
3. Solution:
    - For sqlite3, the data type does not support lists, so we could shift to PostgreSQL to do the future data analysis.

In [64]:
# temporary solution to update unique_id and homework column
# tried a sql implementation and found out pandas is more intuitive
def extract_id(url):
    if pd.isnull(url) or not isinstance(url, str):
        return url
    return int(url.strip('/').split('/')[-1])


with sqlite3.connect('star_wars.db') as conn:
    for table in tables:
        df = pd.read_sql(f'select * from {table}', conn)
        
        #create unique_id column for each table
        df['unique_id'] = df['url'].apply(extract_id)

        # update homeworld column from url to id
        if 'homeworld' in df.columns:
            df['homeworld'] = df['homeworld'].apply(extract_id)
        
        df.to_sql(table, conn, if_exists='replace', index=False)

In [65]:
# take a look at the updated tables
for table in tables:
    display(table)
    display(pd.read_sql(f'SELECT * FROM {table} LIMIT 10', conn))

'people'

Unnamed: 0,name,birth_year,eye_color,gender,hair_color,height,mass,skin_color,homeworld,films,species,starships,vehicles,url,created,edited,unique_id
0,Luke Skywalker,19BBY,blue,male,blond,172,77,fair,1,"[""https://swapi.dev/api/films/1/"", ""https://sw...",[],"[""https://swapi.dev/api/starships/12/"", ""https...","[""https://swapi.dev/api/vehicles/14/"", ""https:...",https://swapi.dev/api/people/1/,2014-12-09T13:50:51.644000Z,2014-12-20T21:17:56.891000Z,1
1,C-3PO,112BBY,yellow,,,167,75,gold,1,"[""https://swapi.dev/api/films/1/"", ""https://sw...","[""https://swapi.dev/api/species/2/""]",[],[],https://swapi.dev/api/people/2/,2014-12-10T15:10:51.357000Z,2014-12-20T21:17:50.309000Z,2
2,R2-D2,33BBY,red,,,96,32,"white, blue",8,"[""https://swapi.dev/api/films/1/"", ""https://sw...","[""https://swapi.dev/api/species/2/""]",[],[],https://swapi.dev/api/people/3/,2014-12-10T15:11:50.376000Z,2014-12-20T21:17:50.311000Z,3
3,Darth Vader,41.9BBY,yellow,male,none,202,136,white,1,"[""https://swapi.dev/api/films/1/"", ""https://sw...",[],"[""https://swapi.dev/api/starships/13/""]",[],https://swapi.dev/api/people/4/,2014-12-10T15:18:20.704000Z,2014-12-20T21:17:50.313000Z,4
4,Leia Organa,19BBY,brown,female,brown,150,49,light,2,"[""https://swapi.dev/api/films/1/"", ""https://sw...",[],[],"[""https://swapi.dev/api/vehicles/30/""]",https://swapi.dev/api/people/5/,2014-12-10T15:20:09.791000Z,2014-12-20T21:17:50.315000Z,5
5,Owen Lars,52BBY,blue,male,"brown, grey",178,120,light,1,"[""https://swapi.dev/api/films/1/"", ""https://sw...",[],[],[],https://swapi.dev/api/people/6/,2014-12-10T15:52:14.024000Z,2014-12-20T21:17:50.317000Z,6
6,Beru Whitesun lars,47BBY,blue,female,brown,165,75,light,1,"[""https://swapi.dev/api/films/1/"", ""https://sw...",[],[],[],https://swapi.dev/api/people/7/,2014-12-10T15:53:41.121000Z,2014-12-20T21:17:50.319000Z,7
7,R5-D4,unknown,red,,,97,32,"white, red",1,"[""https://swapi.dev/api/films/1/""]","[""https://swapi.dev/api/species/2/""]",[],[],https://swapi.dev/api/people/8/,2014-12-10T15:57:50.959000Z,2014-12-20T21:17:50.321000Z,8
8,Biggs Darklighter,24BBY,brown,male,black,183,84,light,1,"[""https://swapi.dev/api/films/1/""]",[],"[""https://swapi.dev/api/starships/12/""]",[],https://swapi.dev/api/people/9/,2014-12-10T15:59:50.509000Z,2014-12-20T21:17:50.323000Z,9
9,Obi-Wan Kenobi,57BBY,blue-gray,male,"auburn, white",182,77,fair,20,"[""https://swapi.dev/api/films/1/"", ""https://sw...",[],"[""https://swapi.dev/api/starships/48/"", ""https...","[""https://swapi.dev/api/vehicles/38/""]",https://swapi.dev/api/people/10/,2014-12-10T16:16:29.192000Z,2014-12-20T21:17:50.325000Z,10


'films'

Unnamed: 0,title,episode_id,opening_crawl,director,producer,release_date,species,starships,vehicles,characters,planets,url,created,edited,unique_id
0,A New Hope,4,It is a period of civil war.\r\nRebel spaceshi...,George Lucas,"Gary Kurtz, Rick McCallum",1977-05-25,"[""https://swapi.dev/api/species/1/"", ""https://...","[""https://swapi.dev/api/starships/2/"", ""https:...","[""https://swapi.dev/api/vehicles/4/"", ""https:/...","[""https://swapi.dev/api/people/1/"", ""https://s...","[""https://swapi.dev/api/planets/1/"", ""https://...",https://swapi.dev/api/films/1/,2014-12-10T14:23:31.880000Z,2014-12-20T19:49:45.256000Z,1
1,The Empire Strikes Back,5,It is a dark time for the\r\nRebellion. Althou...,Irvin Kershner,"Gary Kurtz, Rick McCallum",1980-05-17,"[""https://swapi.dev/api/species/1/"", ""https://...","[""https://swapi.dev/api/starships/3/"", ""https:...","[""https://swapi.dev/api/vehicles/8/"", ""https:/...","[""https://swapi.dev/api/people/1/"", ""https://s...","[""https://swapi.dev/api/planets/4/"", ""https://...",https://swapi.dev/api/films/2/,2014-12-12T11:26:24.656000Z,2014-12-15T13:07:53.386000Z,2
2,Return of the Jedi,6,Luke Skywalker has returned to\r\nhis home pla...,Richard Marquand,"Howard G. Kazanjian, George Lucas, Rick McCallum",1983-05-25,"[""https://swapi.dev/api/species/1/"", ""https://...","[""https://swapi.dev/api/starships/2/"", ""https:...","[""https://swapi.dev/api/vehicles/8/"", ""https:/...","[""https://swapi.dev/api/people/1/"", ""https://s...","[""https://swapi.dev/api/planets/1/"", ""https://...",https://swapi.dev/api/films/3/,2014-12-18T10:39:33.255000Z,2014-12-20T09:48:37.462000Z,3
3,The Phantom Menace,1,Turmoil has engulfed the\r\nGalactic Republic....,George Lucas,Rick McCallum,1999-05-19,"[""https://swapi.dev/api/species/1/"", ""https://...","[""https://swapi.dev/api/starships/31/"", ""https...","[""https://swapi.dev/api/vehicles/33/"", ""https:...","[""https://swapi.dev/api/people/2/"", ""https://s...","[""https://swapi.dev/api/planets/1/"", ""https://...",https://swapi.dev/api/films/4/,2014-12-19T16:52:55.740000Z,2014-12-20T10:54:07.216000Z,4
4,Attack of the Clones,2,There is unrest in the Galactic\r\nSenate. Sev...,George Lucas,Rick McCallum,2002-05-16,"[""https://swapi.dev/api/species/1/"", ""https://...","[""https://swapi.dev/api/starships/21/"", ""https...","[""https://swapi.dev/api/vehicles/4/"", ""https:/...","[""https://swapi.dev/api/people/2/"", ""https://s...","[""https://swapi.dev/api/planets/1/"", ""https://...",https://swapi.dev/api/films/5/,2014-12-20T10:57:57.886000Z,2014-12-20T20:18:48.516000Z,5
5,Revenge of the Sith,3,War! The Republic is crumbling\r\nunder attack...,George Lucas,Rick McCallum,2005-05-19,"[""https://swapi.dev/api/species/1/"", ""https://...","[""https://swapi.dev/api/starships/2/"", ""https:...","[""https://swapi.dev/api/vehicles/33/"", ""https:...","[""https://swapi.dev/api/people/1/"", ""https://s...","[""https://swapi.dev/api/planets/1/"", ""https://...",https://swapi.dev/api/films/6/,2014-12-20T18:49:38.403000Z,2014-12-20T20:47:52.073000Z,6


'starships'

Unnamed: 0,name,model,starship_class,manufacturer,cost_in_credits,length,crew,passengers,max_atmosphering_speed,hyperdrive_rating,MGLT,cargo_capacity,consumables,films,pilots,url,created,edited,unique_id
0,CR90 corvette,CR90 corvette,corvette,Corellian Engineering Corporation,3500000,150.0,30-165,600.0,950,2.0,60,3000000,1 year,"[""https://swapi.dev/api/films/1/"", ""https://sw...",[],https://swapi.dev/api/starships/2/,2014-12-10T14:20:33.369000Z,2014-12-20T21:23:49.867000Z,2
1,Star Destroyer,Imperial I-class Star Destroyer,Star Destroyer,Kuat Drive Yards,150000000,1600.0,47060,,975,2.0,60,36000000,2 years,"[""https://swapi.dev/api/films/1/"", ""https://sw...",[],https://swapi.dev/api/starships/3/,2014-12-10T15:08:19.848000Z,2014-12-20T21:23:49.870000Z,3
2,Sentinel-class landing craft,Sentinel-class landing craft,landing craft,"Sienar Fleet Systems, Cyngus Spaceworks",240000,38.0,5,75.0,1000,1.0,70,180000,1 month,"[""https://swapi.dev/api/films/1/""]",[],https://swapi.dev/api/starships/5/,2014-12-10T15:48:00.586000Z,2014-12-20T21:23:49.873000Z,5
3,Death Star,DS-1 Orbital Battle Station,Deep Space Mobile Battlestation,"Imperial Department of Military Research, Sien...",1000000000000,120000.0,342953,843342.0,,4.0,10,1000000000000,3 years,"[""https://swapi.dev/api/films/1/""]",[],https://swapi.dev/api/starships/9/,2014-12-10T16:36:50.509000Z,2014-12-20T21:26:24.783000Z,9
4,Millennium Falcon,YT-1300 light freighter,Light freighter,Corellian Engineering Corporation,100000,34.37,4,6.0,1050,0.5,75,100000,2 months,"[""https://swapi.dev/api/films/1/"", ""https://sw...","[""https://swapi.dev/api/people/13/"", ""https://...",https://swapi.dev/api/starships/10/,2014-12-10T16:59:45.094000Z,2014-12-20T21:23:49.880000Z,10
5,Y-wing,BTL Y-wing,assault starfighter,Koensayr Manufacturing,134999,14.0,2,0.0,1000km,1.0,80,110,1 week,"[""https://swapi.dev/api/films/1/"", ""https://sw...",[],https://swapi.dev/api/starships/11/,2014-12-12T11:00:39.817000Z,2014-12-20T21:23:49.883000Z,11
6,X-wing,T-65 X-wing,Starfighter,Incom Corporation,149999,12.5,1,0.0,1050,1.0,100,110,1 week,"[""https://swapi.dev/api/films/1/"", ""https://sw...","[""https://swapi.dev/api/people/1/"", ""https://s...",https://swapi.dev/api/starships/12/,2014-12-12T11:19:05.340000Z,2014-12-20T21:23:49.886000Z,12
7,TIE Advanced x1,Twin Ion Engine Advanced x1,Starfighter,Sienar Fleet Systems,unknown,9.2,1,0.0,1200,1.0,105,150,5 days,"[""https://swapi.dev/api/films/1/""]","[""https://swapi.dev/api/people/4/""]",https://swapi.dev/api/starships/13/,2014-12-12T11:21:32.991000Z,2014-12-20T21:23:49.889000Z,13
8,Executor,Executor-class star dreadnought,Star dreadnought,"Kuat Drive Yards, Fondor Shipyards",1143350000,19000.0,279144,38000.0,,2.0,40,250000000,6 years,"[""https://swapi.dev/api/films/2/"", ""https://sw...",[],https://swapi.dev/api/starships/15/,2014-12-15T12:31:42.547000Z,2014-12-20T21:23:49.893000Z,15
9,Rebel transport,GR-75 medium transport,Medium transport,"Gallofree Yards, Inc.",unknown,90.0,6,90.0,650,4.0,20,19000000,6 months,"[""https://swapi.dev/api/films/2/"", ""https://sw...",[],https://swapi.dev/api/starships/17/,2014-12-15T12:34:52.264000Z,2014-12-20T21:23:49.895000Z,17


'vehicles'

Unnamed: 0,name,model,vehicle_class,manufacturer,length,cost_in_credits,crew,passengers,max_atmosphering_speed,cargo_capacity,consumables,films,pilots,url,created,edited,unique_id
0,Sand Crawler,Digger Crawler,wheeled,Corellia Mining Corporation,36.8,150000,46,30,30,50000,2 months,"[""https://swapi.dev/api/films/1/"", ""https://sw...",[],https://swapi.dev/api/vehicles/4/,2014-12-10T15:36:25.724000Z,2014-12-20T21:30:21.661000Z,4
1,T-16 skyhopper,T-16 skyhopper,repulsorcraft,Incom Corporation,10.4,14500,1,1,1200,50,0,"[""https://swapi.dev/api/films/1/""]",[],https://swapi.dev/api/vehicles/6/,2014-12-10T16:01:52.434000Z,2014-12-20T21:30:21.665000Z,6
2,X-34 landspeeder,X-34 landspeeder,repulsorcraft,SoroSuub Corporation,3.4,10550,1,1,250,5,unknown,"[""https://swapi.dev/api/films/1/""]",[],https://swapi.dev/api/vehicles/7/,2014-12-10T16:13:52.586000Z,2014-12-20T21:30:21.668000Z,7
3,TIE/LN starfighter,Twin Ion Engine/Ln Starfighter,starfighter,Sienar Fleet Systems,6.4,unknown,1,0,1200,65,2 days,"[""https://swapi.dev/api/films/1/"", ""https://sw...",[],https://swapi.dev/api/vehicles/8/,2014-12-10T16:33:52.860000Z,2014-12-20T21:30:21.670000Z,8
4,Snowspeeder,t-47 airspeeder,airspeeder,Incom corporation,4.5,unknown,2,0,650,10,none,"[""https://swapi.dev/api/films/2/""]","[""https://swapi.dev/api/people/1/"", ""https://s...",https://swapi.dev/api/vehicles/14/,2014-12-15T12:22:12Z,2014-12-20T21:30:21.672000Z,14
5,TIE bomber,TIE/sa bomber,space/planetary bomber,Sienar Fleet Systems,7.8,unknown,1,0,850,none,2 days,"[""https://swapi.dev/api/films/2/"", ""https://sw...",[],https://swapi.dev/api/vehicles/16/,2014-12-15T12:33:15.838000Z,2014-12-20T21:30:21.675000Z,16
6,AT-AT,All Terrain Armored Transport,assault walker,"Kuat Drive Yards, Imperial Department of Milit...",20.0,unknown,5,40,60,1000,unknown,"[""https://swapi.dev/api/films/2/"", ""https://sw...",[],https://swapi.dev/api/vehicles/18/,2014-12-15T12:38:25.937000Z,2014-12-20T21:30:21.677000Z,18
7,AT-ST,All Terrain Scout Transport,walker,"Kuat Drive Yards, Imperial Department of Milit...",2.0,unknown,2,0,90,200,none,"[""https://swapi.dev/api/films/2/"", ""https://sw...","[""https://swapi.dev/api/people/13/""]",https://swapi.dev/api/vehicles/19/,2014-12-15T12:46:42.384000Z,2014-12-20T21:30:21.679000Z,19
8,Storm IV Twin-Pod cloud car,Storm IV Twin-Pod,repulsorcraft,Bespin Motors,7.0,75000,2,0,1500,10,1 day,"[""https://swapi.dev/api/films/2/""]",[],https://swapi.dev/api/vehicles/20/,2014-12-15T12:58:50.530000Z,2014-12-20T21:30:21.681000Z,20
9,Sail barge,Modified Luxury Sail Barge,sail barge,Ubrikkian Industries Custom Vehicle Division,30.0,285000,26,500,100,2000000,Live food tanks,"[""https://swapi.dev/api/films/3/""]",[],https://swapi.dev/api/vehicles/24/,2014-12-18T10:44:14.217000Z,2014-12-20T21:30:21.684000Z,24


'species'

Unnamed: 0,name,average_height,average_lifespan,classification,designation,eye_colors,hair_colors,homeworld,language,skin_colors,people,films,url,created,edited,unique_id
0,Human,180.0,120,mammal,sentient,"brown, blue, green, hazel, grey, amber","blonde, brown, black, red",9.0,Galactic Basic,"caucasian, black, asian, hispanic","[""https://swapi.dev/api/people/66/"", ""https://...","[""https://swapi.dev/api/films/1/"", ""https://sw...",https://swapi.dev/api/species/1/,2014-12-10T13:52:11.567000Z,2014-12-20T21:36:42.136000Z,1
1,Droid,,indefinite,artificial,sentient,,,,,,"[""https://swapi.dev/api/people/2/"", ""https://s...","[""https://swapi.dev/api/films/1/"", ""https://sw...",https://swapi.dev/api/species/2/,2014-12-10T15:16:16.259000Z,2014-12-20T21:36:42.139000Z,2
2,Wookie,210.0,400,mammal,sentient,"blue, green, yellow, brown, golden, red","black, brown",14.0,Shyriiwook,gray,"[""https://swapi.dev/api/people/13/"", ""https://...","[""https://swapi.dev/api/films/1/"", ""https://sw...",https://swapi.dev/api/species/3/,2014-12-10T16:44:31.486000Z,2014-12-20T21:36:42.142000Z,3
3,Rodian,170.0,unknown,sentient,reptilian,black,,23.0,Galatic Basic,"green, blue","[""https://swapi.dev/api/people/15/""]","[""https://swapi.dev/api/films/1/""]",https://swapi.dev/api/species/4/,2014-12-10T17:05:26.471000Z,2014-12-20T21:36:42.144000Z,4
4,Hutt,300.0,1000,gastropod,sentient,"yellow, red",,24.0,Huttese,"green, brown, tan","[""https://swapi.dev/api/people/16/""]","[""https://swapi.dev/api/films/1/"", ""https://sw...",https://swapi.dev/api/species/5/,2014-12-10T17:12:50.410000Z,2014-12-20T21:36:42.146000Z,5
5,Yoda's species,66.0,900,mammal,sentient,"brown, green, yellow","brown, white",28.0,Galactic basic,"green, yellow","[""https://swapi.dev/api/people/20/""]","[""https://swapi.dev/api/films/2/"", ""https://sw...",https://swapi.dev/api/species/6/,2014-12-15T12:27:22.877000Z,2014-12-20T21:36:42.148000Z,6
6,Trandoshan,200.0,unknown,reptile,sentient,"yellow, orange",none,29.0,Dosh,"brown, green","[""https://swapi.dev/api/people/24/""]","[""https://swapi.dev/api/films/2/""]",https://swapi.dev/api/species/7/,2014-12-15T13:07:47.704000Z,2014-12-20T21:36:42.151000Z,7
7,Mon Calamari,160.0,unknown,amphibian,sentient,yellow,none,31.0,Mon Calamarian,"red, blue, brown, magenta","[""https://swapi.dev/api/people/27/""]","[""https://swapi.dev/api/films/3/""]",https://swapi.dev/api/species/8/,2014-12-18T11:09:52.263000Z,2014-12-20T21:36:42.153000Z,8
8,Ewok,100.0,unknown,mammal,sentient,"orange, brown","white, brown, black",7.0,Ewokese,brown,"[""https://swapi.dev/api/people/30/""]","[""https://swapi.dev/api/films/3/""]",https://swapi.dev/api/species/9/,2014-12-18T11:22:00.285000Z,2014-12-20T21:36:42.155000Z,9
9,Sullustan,180.0,unknown,mammal,sentient,black,none,33.0,Sullutese,pale,"[""https://swapi.dev/api/people/31/""]","[""https://swapi.dev/api/films/3/""]",https://swapi.dev/api/species/10/,2014-12-18T11:26:20.103000Z,2014-12-20T21:36:42.157000Z,10


'planets'

Unnamed: 0,name,diameter,rotation_period,orbital_period,gravity,population,climate,terrain,surface_water,residents,films,url,created,edited,unique_id
0,Tatooine,10465,23,304,1 standard,200000,arid,desert,1,"[""https://swapi.dev/api/people/1/"", ""https://s...","[""https://swapi.dev/api/films/1/"", ""https://sw...",https://swapi.dev/api/planets/1/,2014-12-09T13:50:49.641000Z,2014-12-20T20:58:18.411000Z,1
1,Alderaan,12500,24,364,1 standard,2000000000,temperate,"grasslands, mountains",40,"[""https://swapi.dev/api/people/5/"", ""https://s...","[""https://swapi.dev/api/films/1/"", ""https://sw...",https://swapi.dev/api/planets/2/,2014-12-10T11:35:48.479000Z,2014-12-20T20:58:18.420000Z,2
2,Yavin IV,10200,24,4818,1 standard,1000,"temperate, tropical","jungle, rainforests",8,[],"[""https://swapi.dev/api/films/1/""]",https://swapi.dev/api/planets/3/,2014-12-10T11:37:19.144000Z,2014-12-20T20:58:18.421000Z,3
3,Hoth,7200,23,549,1.1 standard,unknown,frozen,"tundra, ice caves, mountain ranges",100,[],"[""https://swapi.dev/api/films/2/""]",https://swapi.dev/api/planets/4/,2014-12-10T11:39:13.934000Z,2014-12-20T20:58:18.423000Z,4
4,Dagobah,8900,23,341,,unknown,murky,"swamp, jungles",8,[],"[""https://swapi.dev/api/films/2/"", ""https://sw...",https://swapi.dev/api/planets/5/,2014-12-10T11:42:22.590000Z,2014-12-20T20:58:18.425000Z,5
5,Bespin,118000,12,5110,"1.5 (surface), 1 standard (Cloud City)",6000000,temperate,gas giant,0,"[""https://swapi.dev/api/people/26/""]","[""https://swapi.dev/api/films/2/""]",https://swapi.dev/api/planets/6/,2014-12-10T11:43:55.240000Z,2014-12-20T20:58:18.427000Z,6
6,Endor,4900,18,402,0.85 standard,30000000,temperate,"forests, mountains, lakes",8,"[""https://swapi.dev/api/people/30/""]","[""https://swapi.dev/api/films/3/""]",https://swapi.dev/api/planets/7/,2014-12-10T11:50:29.349000Z,2014-12-20T20:58:18.429000Z,7
7,Naboo,12120,26,312,1 standard,4500000000,temperate,"grassy hills, swamps, forests, mountains",12,"[""https://swapi.dev/api/people/3/"", ""https://s...","[""https://swapi.dev/api/films/3/"", ""https://sw...",https://swapi.dev/api/planets/8/,2014-12-10T11:52:31.066000Z,2014-12-20T20:58:18.430000Z,8
8,Coruscant,12240,24,368,1 standard,1000000000000,temperate,"cityscape, mountains",unknown,"[""https://swapi.dev/api/people/34/"", ""https://...","[""https://swapi.dev/api/films/3/"", ""https://sw...",https://swapi.dev/api/planets/9/,2014-12-10T11:54:13.921000Z,2014-12-20T20:58:18.432000Z,9
9,Kamino,19720,27,463,1 standard,1000000000,temperate,ocean,100,"[""https://swapi.dev/api/people/22/"", ""https://...","[""https://swapi.dev/api/films/5/""]",https://swapi.dev/api/planets/10/,2014-12-10T12:45:06.577000Z,2014-12-20T20:58:18.434000Z,10


## 1. Required SQL analysis

### 1.1 What's the distribution of citizens across different planets
- Requirement: `1 request with INNER`
- Explanation: By using INNER JOIN, we only return people that have a known homeworld.

In [66]:
pd.read_sql(
    '''
    select 
        p2.name as planet_name,
        count(p1.name) as people_count
    from people p1
    inner join 
        planets p2
    on p1.homeworld = p2.unique_id
    group by
        p2.name
    order by 
        people_count desc
    ''', 
    conn
)

Unnamed: 0,planet_name,people_count
0,Naboo,11
1,Tatooine,10
2,unknown,5
3,Kamino,3
4,Coruscant,3
5,Alderaan,3
6,Ryloth,2
7,Mirial,2
8,Kashyyyk,2
9,Corellia,2


### 1.2 What's the distribution of species across different planets
- Requirement: `1 request with LEFT`
- Explanation: By using LEFT JOIN, we return species regardless of whether their homeworld is known.

In [67]:
pd.read_sql(
    '''
    select 
        p2.name as planet_name,
        count(s.name) as species_count
    from species s
    left join 
        planets p2
    on s.homeworld = p2.unique_id
    group by
        p2.name
    order by 
        species_count desc
    ''', 
    conn
)

Unnamed: 0,planet_name,species_count
0,unknown,1
1,Zolan,1
2,Vulpter,1
3,Utapau,1
4,Tund,1
5,Troiken,1
6,Trandosha,1
7,Toydaria,1
8,Tholoth,1
9,Sullust,1


### 1.3 What are the average costs, passengers, length for each vehicle class.
- Requirement: `1 request with GROUP BY`
- Explanation: By using GROUP BY, we could get the relevant values divided by vehicle class.

In [68]:
pd.read_sql(
    '''
        select 
            vehicle_class,
            group_concat(model) as model,
            round(avg(cost_in_credits),2) as average_cost,
            round(avg(passengers)) as average_passengers,
            round(avg(length), 2) as average_length,
            count(model) as model_count
        from 
            vehicles
        group by
            vehicle_class
        order by 
            average_cost desc
    ''', 
    conn
)

Unnamed: 0,vehicle_class,model,average_cost,average_passengers,average_length,model_count
0,sail barge,Modified Luxury Sail Barge,285000.0,500.0,30.0,1
1,landing craft,C-9979 landing craft,200000.0,284.0,210.0,1
2,wheeled walker,"Tsmeu-6 personal wheel bike,HAVw A6 Juggernaut",182500.0,151.0,26.45,2
3,wheeled,Digger Crawler,150000.0,30.0,36.8,1
4,droid tank,NR-N99 Persuader-class droid enforcer,49000.0,4.0,10.96,1
5,repulsorcraft,"T-16 skyhopper,X-34 landspeeder,Storm IV Twin-...",35185.71,17.0,9.6,7
6,droid starfighter,tri-fighter,20000.0,0.0,5.4,1
7,air speeder,Raddaugh Gnasp fluttercraft,14750.0,0.0,7.0,1
8,airspeeder,"t-47 airspeeder,Koro-2 Exodrive airspeeder,XJ-...",14425.0,1.0,8.95,5
9,walker,"All Terrain Scout Transport,All Terrain Tactic...",10000.0,17.0,39.6,4


### 1.4 Find out manufactures of starships who could design a passenger capacity over 50
- Requirement: `1 request with HAVING`
- Explanation: By grouping the data by manufacturer, we could use HAVING to filter out manufacutres that has the capability to desgin starships with the average passenger capcity over 50.

In [86]:
pd.read_sql(
    '''
        select 
            manufacturer,
            cast(avg(passengers) as int) as average_passengers
        from 
            starships
        group by 
            manufacturer
        having 
            average_passengers>50
        order by 
            average_passengers desc
            
    ''', 
    conn
)

Unnamed: 0,manufacturer,average_passengers
0,"Hoersch-Kessel Drive, Inc.",139000
1,"Rendili StarDrive, Free Dac Volunteers Enginee...",48247
2,"Kuat Drive Yards, Fondor Shipyards",38000
3,Botajef Shipyards,30000
4,Rothana Heavy Engineering,16000
5,"Kuat Drive Yards, Allanteen Six shipyards",2000
6,Mon Calamari shipyards,1200
7,"Imperial Department of Military Research, Sien...",843
8,Corellian Engineering Corporation,207
9,"Gallofree Yards, Inc.",90


### 1.5 Find out the oldest person in the Star Wars
- Requirement: `1 request with CTE`
- Explaination: 
    - By creating a temporary table, we conver the string `19BBY`, 19 years before the battle of yavin, to 19. And we write another query to return the oldest person's name and age.
    - Before implementing queries, we have checked no one is born ABY.

In [106]:
pd.read_sql(
    '''
        select
            count(*) as aby_count
        from
            people 
        where birth_year like '%ABY';
    ''', 
    conn
)

Unnamed: 0,aby_count
0,0


In [109]:
pd.read_sql(
    '''
        with cleaned_birthyear as (
            select 
                name,
                cast(replace(birth_year, 'BBY', '') as integer) as birth_year_bby
            from 
                people
            where 
                birth_year != "unknown"
        )

        select 
            name,
            max(birth_year_bby) as birth_year
        from
            cleaned_birthyear
    ''', 
    conn
)

Unnamed: 0,name,birth_year
0,Yoda,896


Unnamed: 0,COUNT(*)
0,0
