# ETL - Geodata
This notebook is used to extract and transform the data from CSV file saved in `/Datasets` generated as output from `dataExplorationJJyotsna - Q1&Geojson` and load it into the SQLite database.
The file consists of the city when maximum actors were from in the corresponding country. It also has the Latitude and longitude coordinates as fetchd from `geoapify` though API calls in `dataExplorationJJyotsna - Q1&Geojson` module


In [1]:
# Dependencies
import pandas as pd
from pathlib import Path

In [2]:
# Import SQL Alchemy
from sqlalchemy import create_engine

# Import and establish Base for which classes will be constructed 
import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
from sqlalchemy import desc

# Import modules to declare columns and column data types
from sqlalchemy import Column, Integer, String, Float, Boolean

## Import CSVs
#### Movies CSV
- Source: `Datasets/country_coordinates.csv`
- Table: `city` in `Server/movies_db.sqlite`

In [3]:
# Get the country data
country_csv = Path('../Datasets/country_coordinates.csv')
country_df = pd.read_csv(country_csv)

# Get the datatype from the DataFrame
country_df.dtypes


Unnamed: 0         int64
birth_country     object
birth_city        object
Lat              float64
Lon              float64
dtype: object

In [4]:
# Get Base
Base = declarative_base()

In [5]:
# Create character class
class City(Base):
    __tablename__ = 'city'
    id = Column(Integer, primary_key=True)
    birth_country = Column(String)
    birth_city = Column(String)
    Lat  = Column(Float)
    Lon  = Column(Float)

In [6]:
# Create a connection to a SQLite database
engine = create_engine('sqlite:///movies_db.sqlite')

# Create the tables within the database
Base.metadata.create_all(engine)
# Start session
session = Session(bind=engine)

In [7]:
# Loop through characters DataFrame and retrieve data
for index, row in country_df.iterrows():
    id = index
    birth_country = country_df.loc[index,'birth_country']
    birth_city = country_df.loc[index,'birth_city']
    Lat = country_df.loc[index,'Lat']    
    Lon = country_df.loc[index,'Lon']
    
    # Add data to database
    session.add(City(
    id = id,
    birth_country = birth_country,
    birth_city = birth_city,
    Lat = Lat,
    Lon = Lon        
    ))
print(f"{len(country_df)} rows ready for commit.")

50 rows ready for commit.


In [8]:
# Commit changes to session
session.commit()

# Close session
session.close()