## 0. Read the Documentation to have a better understanding of the data.

1. There are six data resources, including `films`, `people`, `planets`,  `species`, `starships` and `vehicles`. We could use different tables to store these data.  
2. Responses are in Json format, where we could get attributes of the `films`, `people` etc.

In [None]:
# install the required modules
! pip install requests pandas

In [None]:
# import the required modules
import sqlite3
import requests
import pandas as pd
import json

## 1. Define the data schema for tables.

In [None]:
# define the data schmea and write relevant SQL statements according to the documentation
create_tables = ['''
CREATE TABLE IF NOT EXISTS people (
    name TEXT PRIMARY KEY,
    birth_year TEXT,
    eye_color TEXT,
    gender TEXT,
    hair_color TEXT,
    height TEXT,
    mass TEXT,
    skin_color TEXT,
    homeworld TEXT,
    films TEXT, 
    species TEXT,
    starships TEXT,
    vehicles TEXT,
    url TEXT,
    created TEXT,
    edited TEXT
);
''',
'''
CREATE TABLE IF NOT EXISTS films (
    title TEXT PRIMARY KEY,
    episode_id INTEGER,
    opening_crawl TEXT,
    director TEXT,
    producer TEXT,
    release_date DATE,
    species TEXT,
    starships TEXT,
    vehicles TEXT,
    characters TEXT, 
    planets TEXT,
    url TEXT,
    created TEXT,
    edited TEXT
);
''',
'''
CREATE TABLE IF NOT EXISTS starships (
    name TEXT PRIMARY KEY,
    model TEXT,
    starship_class TEXT,
    manufacturer TEXT,
    cost_in_credits TEXT,
    length TEXT,
    crew TEXT,
    passengers TEXT,
    max_atmosphering_speed TEXT,
    hyperdrive_rating TEXT,
    MGLT TEXT,
    cargo_capacity TEXT,
    consumables TEXT,
    films TEXT,
    pilots TEXT,
    url TEXT,
    created TEXT,
    edited TEXT
);
''',
'''
CREATE TABLE IF NOT EXISTS vehicles (
    name TEXT PRIMARY KEY,
    model TEXT,
    vehicle_class TEXT,
    manufacturer TEXT,
    length TEXT,
    cost_in_credits TEXT,
    crew TEXT,
    passengers TEXT,
    max_atmosphering_speed TEXT,
    cargo_capacity TEXT,
    consumables TEXT,
    films TEXT,
    pilots TEXT,
    url TEXT,
    created TEXT,
    edited TEXT
);
''',
'''
CREATE TABLE IF NOT EXISTS species (
    name TEXT PRIMARY KEY,
    average_height TEXT,
    average_lifespan TEXT,
    classification TEXT,
    designation TEXT,
    eye_colors TEXT,
    hair_colors TEXT,
    homeworld TEXT,
    language TEXT,
    skin_colors TEXT,
    people TEXT,
    films TEXT,
    url TEXT,
    created TEXT,
    edited TEXT
);
''',
'''
CREATE TABLE IF NOT EXISTS planets (
    name TEXT PRIMARY KEY,
    diameter TEXT,
    rotation_period TEXT,
    orbital_period TEXT,
    gravity TEXT,
    population TEXT,
    climate TEXT,
    terrain TEXT,
    surface_water TEXT,
    residents TEXT,
    films TEXT,
    url TEXT,
    created TEXT,
    edited TEXT
);
'''
]

In [None]:
# connect to the database
conn = sqlite3.connect('star_wars.db')
c = conn.cursor()

# create relevant tables
for sql_statement in create_tables:
    c.execute(sql_statement)

conn.commit()

## 2. Get the data from the Star Wars API and store them in local databases.

### 2.1 Using People as the First Example

In [None]:
# check the requests output
people_response = requests.get('https://swapi.dev/api/people/')
people_data = people_response.json()

# there might be several pages for a single data resource
all_people_data = []
all_people_data.extend(people_data['results'])

while people_data["next"]:
    people_data = requests.get(people_data["next"]).json()
    all_people_data.extend(people_data['results'])

print(all_people_data)

The result is a list of jsons(persons), we could sort them and store them one by one.

```
{'name': 'Luke Skywalker', 'height': '172', 'mass': '77', 'hair_color': 'blond', 'skin_color': 'fair', 'eye_color': 'blue', 'birth_year': '19BBY', 'gender': 'male', 'homeworld': 'https://swapi.dev/api/planets/1/', 'films': ['https://swapi.dev/api/films/1/', 'https://swapi.dev/api/films/2/', 'https://swapi.dev/api/films/3/', 'https://swapi.dev/api/films/6/'], 'species': [], 'vehicles': ['https://swapi.dev/api/vehicles/14/', 'https://swapi.dev/api/vehicles/30/'], 'starships': ['https://swapi.dev/api/starships/12/', 'https://swapi.dev/api/starships/22/'], 'created': '2014-12-09T13:50:51.644000Z', 'edited': '2014-12-20T21:17:56.891000Z', 'url': 'https://swapi.dev/api/people/1/'}, 
```

In [None]:
# check if we get the right number of the data
print(len(all_people_data) == people_data['count']) 

In [None]:
def save_to_person(person):
    '''
    Insert individual person data to the people table.
    '''
    with sqlite3.connect('star_wars.db') as conn:
        c = conn.cursor()
        # avoid duplicates 
        c.execute("SELECT * FROM people WHERE name = ?", (person['name'],))
        if c.fetchone() is None:
            c.execute("""
                INSERT INTO people (name, height, mass, hair_color, skin_color, eye_color, birth_year, gender, homeworld, url, created, edited)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            """, (person['name'], person['height'], person['mass'], person['hair_color'], person['skin_color'], person['eye_color'], person['birth_year'], person['gender'], person['homeworld'], person['url'], person['created'], person['edited']))
        conn.commit()


In [None]:
# iterate through the json response and store them into the table
for person in all_people_data:
    save_to_person(person)

In [None]:
# check if we have already store it correctly by counting the rows in the table
conn = sqlite3.connect('star_wars.db')

c = conn.cursor()
check_data = c.execute("SELECT count(*) FROM people").fetchone()[0]
print(check_data)


### 2.2 Extract the Responses and Store the Data to Other Tables.

Follow the "Don't Repeat Yourself" principle to write functions that could be reused for other tables.

In [None]:
# save parsing data to tables
# dynamic SQL queries are required.
def save_to_table(table_name, item):
    '''
    Insert individual data to the table.

    Item is a dict. Sample: {'title': 'A New Hope', 'episode_id': 4, 'opening_crawl': "It is a period of civil war.\
    '''    
    with sqlite3.connect('star_wars.db') as conn:
        c = conn.cursor()

        # when inserting a list of values, errors occur
        # use serialization to convert lists to strings
        serialized_item = {k: json.dumps(v) if isinstance(v, list) else v for k, v in item.items()}

        # generalize the Insert into and avoid duplicates SQL statement
        ### Insert: INSERT INTO people (name, ...) VALUES (?,...)
        ### Duplicate: SELECT * FROM people WHERE name = ?, (person['name'],
        # variables from the SQL statement: table_name, column_name(the keys of the item), place_holder("?"), primary_key

        column_name = ', '.join(serialized_item.keys())
        place_holder = ', '.join(['?'] * len(serialized_item))
        primary_key, primary_key_value = list(serialized_item.items())[0]

        insert_sql = f"INSERT INTO {table_name} ({column_name}) VALUES ({place_holder})"
        check_sql = f"SELECT * FROM {table_name} WHERE {primary_key} = ?"
        
        c.execute(check_sql, (primary_key_value,))

        if c.fetchone() is None:
            c.execute(insert_sql, tuple(serialized_item.values()))
        conn.commit()

    

In [None]:
# parse the data, store them into the table by calling the funtion
tables = ['people', 'films', 'starships', 'vehicles', 'species', 'planets']
# tables = ['films'] 
for table_name in tables:
    response = requests.get(f'https://swapi.dev/api/{table_name}')

    while response:
        response_data = response.json()

        for item in response_data["results"]:
            # each item is a dict 
            # print(item)
            save_to_table(table_name, item)

        next_url = response_data["next"]

        if next_url is None:
            break
        
        response = requests.get(next_url)


In [None]:
# check the count of table data matches the API count or not
def check_table_rows(table_name):
    with sqlite3.connect('star_wars.db') as conn:
        c = conn.cursor()
        rows_count = c.execute(f"SELECT count(*) FROM {table_name}").fetchone()[0]
    return rows_count

def check_api_count(table_name):
    response = requests.get(f'https://swapi.dev/api/{table_name}')

    api_count = response.json()["count"]
    rows_count =check_table_rows(table_name)

    if api_count != rows_count:
        print(f"Check {table_name}, there are rows mismatches.")
    else:
        print(f"{table_name} pass the validatation.")

for table_name in tables:
    check_api_count(table_name)
