## 1. Request Data from Star Wars API
This part is modified based on `request.py` from the Udacity Data Engineer Project.

In [3]:
import  requests
import json
import sqlite3

In [2]:
class RequestStarWars:
    def __init__(self):
        self._base_urls = {
            "films": "https://swapi.dev/api/films/",
            "people": "https://swapi.dev/api/people/",
            "planets": "https://swapi.dev/api/planets/",
            "species": "https://swapi.dev/api/species/",
            "starships": "https://swapi.dev/api/starships/",
            "vehicles": "https://swapi.dev/api/vehicles/"
        }

    def get_content(self, type):
        url = self._base_urls[type]
        all_data = []

        while url:
            response = requests.get(url)
            if response.status_code == 200:
                response_data = response.json()
                all_data.extend(response_data['results'])
                
                url = response_data["next"]

            else:
                print(f"Request completed with Error. Response Code : {response.status_code}")
                break
        # get id from url    
        for item in all_data:
            item['id'] = int(item['url'] .strip('/').split('/')[-1])
            
        return all_data

In [20]:
table_names = ['people', 'films', 'starships', 'vehicles', 'species', 'planets']

In [39]:
import os
os.makedirs('data', exist_ok=True)

request_api = RequestStarWars()

for table_name in table_names:
    data = request_api.get_content(table_name)
    with open(f'data/{table_name}.json', 'w') as file:
        json.dump(data, file, indent=4)

In [4]:
request_api = RequestStarWars()

data = request_api.get_content('films')
with open(f'data/films.json', 'w') as file:
    json.dump(data, file, indent=4)

In [6]:
# examine the content
with open("data/people.json", 'r') as file:
        result = json.load(file)
        for i in result:
                print(type(i))

<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'dict'>
<class 'di

## 2. Create Schemas and Build Connections

In [5]:
# define the data schmea and write relevant SQL statements according to the documentation
# the main change here is add id column for each table

create_tables = ['''
CREATE TABLE IF NOT EXISTS people (
    id INTEGER PRIMARY KEY,
    name TEXT,
    birth_year TEXT,
    eye_color TEXT,
    gender TEXT,
    hair_color TEXT,
    height TEXT,
    mass TEXT,
    skin_color TEXT,
    homeworld TEXT,
    films TEXT, 
    species TEXT,
    starships TEXT,
    vehicles TEXT,
    url TEXT,
    created TEXT,
    edited TEXT
);
''',
'''
CREATE TABLE IF NOT EXISTS films (
    id INTEGER PRIMARY KEY,
    title TEXT,
    episode_id INTEGER,
    opening_crawl TEXT,
    director TEXT,
    producer TEXT,
    release_date DATE,
    species TEXT,
    starships TEXT,
    vehicles TEXT,
    characters TEXT, 
    planets TEXT,
    url TEXT,
    created TEXT,
    edited TEXT
);
''',
'''
CREATE TABLE IF NOT EXISTS starships (
    id INTEGER PRIMARY KEY,
    name TEXT,
    model TEXT,
    starship_class TEXT,
    manufacturer TEXT,
    cost_in_credits TEXT,
    length TEXT,
    crew TEXT,
    passengers TEXT,
    max_atmosphering_speed TEXT,
    hyperdrive_rating TEXT,
    MGLT TEXT,
    cargo_capacity TEXT,
    consumables TEXT,
    films TEXT,
    pilots TEXT,
    url TEXT,
    created TEXT,
    edited TEXT
);
''',
'''
CREATE TABLE IF NOT EXISTS vehicles (
    id INTEGER PRIMARY KEY,
    name TEXT,
    model TEXT,
    vehicle_class TEXT,
    manufacturer TEXT,
    length TEXT,
    cost_in_credits TEXT,
    crew TEXT,
    passengers TEXT,
    max_atmosphering_speed TEXT,
    cargo_capacity TEXT,
    consumables TEXT,
    films TEXT,
    pilots TEXT,
    url TEXT,
    created TEXT,
    edited TEXT
);
''',
'''
CREATE TABLE IF NOT EXISTS species (
    id INTEGER PRIMARY KEY,
    name TEXT,
    average_height TEXT,
    average_lifespan TEXT,
    classification TEXT,
    designation TEXT,
    eye_colors TEXT,
    hair_colors TEXT,
    homeworld TEXT,
    language TEXT,
    skin_colors TEXT,
    people TEXT,
    films TEXT,
    url TEXT,
    created TEXT,
    edited TEXT
);
''',
'''
CREATE TABLE IF NOT EXISTS planets (
    id INTEGER PRIMARY KEY,
    name TEXT,
    diameter TEXT,
    rotation_period TEXT,
    orbital_period TEXT,
    gravity TEXT,
    population TEXT,
    climate TEXT,
    terrain TEXT,
    surface_water TEXT,
    url TEXT,
    created TEXT,
    edited TEXT
);
'''
]

In [6]:
# pick up some connections that i'm interested in
create_connections = [
    '''
    -- people_films
    CREATE TABLE IF NOT EXISTS people_films (
    person_id INTEGER,
    film_id INTEGER,
    PRIMARY KEY (person_id, film_id),
    FOREIGN KEY (person_id) REFERENCES people(id),
    FOREIGN KEY (film_id) REFERENCES films(id));
    ''',
    '''
    --people_species
    CREATE TABLE IF NOT EXISTS people_species (
    person_id INTEGER,
    species_id INTEGER,
    PRIMARY KEY (person_id, species_id),
    FOREIGN KEY (person_id) REFERENCES people(id),
    FOREIGN KEY (species_id) REFERENCES species(id)
    );
    ''',
    ''' 
    --people_starships
    CREATE TABLE IF NOT EXISTS people_starships (
    person_id INTEGER,
    starship_id INTEGER,
    PRIMARY KEY (person_id, starship_id),
    FOREIGN KEY (person_id) REFERENCES people(id),
    FOREIGN KEY (starship_id) REFERENCES starships(id)
    );
    ''',
    '''
    -- people_vehicles
    CREATE TABLE IF NOT EXISTS people_vehicles (
    person_id INTEGER,
    vehicle_id INTEGER,
    PRIMARY KEY (person_id, vehicle_id),
    FOREIGN KEY (person_id) REFERENCES people(id),
    FOREIGN KEY (vehicle_id) REFERENCES vehicles(id)
    );
    ''',
    '''
    CREATE TABLE IF NOT EXISTS people_planets (
    person_id INTEGER,
    planet_id INTEGER,
    PRIMARY KEY (person_id, planet_id),
    FOREIGN KEY (person_id) REFERENCES people(id),
    FOREIGN KEY (planet_id) REFERENCES planets(id)
    );
    '''
]

## 3. Insert values to the tables

In [8]:
people_dicts = {
    'films': {},
    'species': {},
    'starships': {},
    'vehicles': {},
    'planets': {}
}

with open('data/people.json', 'r') as file:
    people_data = json.load(file)
    for person in people_data:
        person_id = person['id']  
        for field in people_dicts.keys():
            if field in person and person[field]:
                people_dicts[field][person_id] = [int(url.split('/')[-2]) for url in person[field]]

print(people_dicts)


{'films': {1: [1, 2, 3, 6], 2: [1, 2, 3, 4, 5, 6], 3: [1, 2, 3, 4, 5, 6], 4: [1, 2, 3, 6], 5: [1, 2, 3, 6], 6: [1, 5, 6], 7: [1, 5, 6], 8: [1], 9: [1], 10: [1, 2, 3, 4, 5, 6], 11: [4, 5, 6], 12: [1, 6], 13: [1, 2, 3, 6], 14: [1, 2, 3], 15: [1], 16: [1, 3, 4], 18: [1, 2, 3], 19: [1], 20: [2, 3, 4, 5, 6], 21: [2, 3, 4, 5, 6], 22: [2, 3, 5], 23: [2], 24: [2], 25: [2, 3], 26: [2], 27: [3], 28: [3], 29: [3], 30: [3], 31: [3], 32: [4], 33: [4, 5, 6], 34: [4], 35: [4, 5, 6], 36: [4, 5], 37: [4], 38: [4], 39: [4], 40: [4, 5], 41: [4], 42: [4], 43: [4, 5], 44: [4], 45: [3], 46: [4, 5, 6], 47: [4], 48: [4], 49: [4], 50: [4], 51: [4, 5, 6], 52: [4, 5, 6], 53: [4, 5, 6], 54: [4, 6], 55: [4, 6], 56: [4, 6], 57: [4], 58: [4, 5, 6], 59: [4, 5], 60: [5], 61: [5], 62: [5], 63: [5, 6], 64: [5, 6], 65: [5], 66: [5], 67: [5, 6], 68: [5, 6], 69: [5], 70: [5], 71: [5], 72: [5], 73: [5], 74: [5], 75: [5, 6], 76: [5], 77: [5], 78: [5, 6], 79: [6], 80: [6], 81: [1, 6], 82: [5, 6], 83: [6]}, 'species': {2: [2],

In [1]:
class DatabaseDriver:
    def __init__(self, db_name):
        self._conn = sqlite3.connect(db_name)
        self._cur = self._conn.cursor()

    def execute_query(self, query, params=None):
        if params:
            self._cur.execute(query, params)
        else:
            self._cur.execute(query)
        self._conn.commit()

    def save_data_to_table(self, table_name, item, primary_key_field):
        '''
        Insert individual data to the table.
        Item is a dict.
        '''    
        # use serialization to convert lists to strings
        serialized_item = {k: json.dumps(v) if isinstance(v, list) else v for k, v in item.items()}

        # generalize the Insert into and avoid duplicates SQL statement
        column_name = ', '.join(serialized_item.keys())
        place_holder = ', '.join(['?'] * len(serialized_item))
        primary_key_value = serialized_item.get(primary_key_field)

        insert_sql = f"INSERT INTO {table_name} ({column_name}) VALUES ({place_holder})"
        check_sql = f"SELECT * FROM {table_name} WHERE {primary_key_field} = ?"
        
        self._cur.execute(check_sql, (primary_key_value,))
        if self._cur.fetchone() is None:
            self._cur.execute(insert_sql, tuple(serialized_item.values()))
        self._conn.commit()

    def save_data_connections(self, connection_name, item):
        insert_statements = [f"INSERT INTO people_films (person_id, film_id) VALUES ({person_id}, {film_id});"for film_id in film_ids
]

    def close(self):
        self._cur.close()
        self._conn.close()