# ETL - Financial Information
This notebook is used to extract and transform the financial data (gross revenue and budget) from the CSV files in `/Datasets` and load them into the SQLite database.

In [1]:
# Dependencies
import pandas as pd
from pathlib import Path

# Import SQL Alchemy
from sqlalchemy import create_engine

# Import and establish Base for which classes will be constructed 
import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
from sqlalchemy import desc

# Import modules to declare columns and column data types
from sqlalchemy import Column, Integer, String, Float, Boolean

## Extract movies datasets

In [8]:
Movies = Path("../Datasets/movies.csv")
movies_df = pd.read_csv(Movies)

Omdb = Path("../Datasets/omdb.csv")
omdb_df = pd.read_csv(Omdb)

In [9]:
movies_df.columns

Index(['movieid', 'title', 'mpaa_rating', 'budget', 'gross', 'release_date',
       'genre', 'runtime', 'rating', 'rating_count', 'summary'],
      dtype='object')

## Transform

In [10]:
# Use subset of datasets columns
movies_df = movies_df[['movieid', 'title', 'mpaa_rating', 'genre','budget', 'gross','rating',]]
omdb_df = omdb_df[['Title',  'Genre',  'Country', 'Awards','DVD']]

# Rename column
movies_df = movies_df.rename(columns={"title":"Title"})

# Merge dataset
FirstQuestMerged = pd.merge(omdb_df, movies_df, on="Title", how="inner")
FirstQuestMerged["ROI"] = (FirstQuestMerged ["gross"] - FirstQuestMerged ["budget"]) / FirstQuestMerged ["budget"]

In [11]:
FirstQuestMerged = FirstQuestMerged[['movieid','Title', 'budget', 'gross', 'ROI', 'rating']]
FirstQuestMerged.head()

Unnamed: 0,movieid,Title,budget,gross,ROI,rating
0,1,Look Who's Talking,7500000,296000000,38.466667,5.9
1,2,Driving Miss Daisy,7500000,145793296,18.439106,7.4
2,3,Turner & Hooch,13000000,71079915,4.467686,7.2
3,4,Born on the Fourth of July,14000000,161001698,10.500121,7.2
4,5,Field of Dreams,15000000,84431625,4.628775,7.5


In [12]:
FirstQuestMerged.dtypes

movieid      int64
Title       object
budget       int64
gross        int64
ROI        float64
rating     float64
dtype: object

## Load

In [40]:
# Get Base
Base = declarative_base()

In [41]:
# Create Financials class
class Financials(Base):
    __tablename__ = 'financials'
    movieid = Column(Integer, primary_key=True)
    Title = Column(String)
    budget = Column(Float)
    gross = Column(Float)
    rating = Column(Float)
    ROI = Column(Float)

In [42]:
# Create a connection to a SQLite database
engine = create_engine('sqlite:///../Server/movies_db.sqlite')

# Create the tables within the database
Base.metadata.create_all(engine)

# Start session
session = Session(bind=engine)

In [44]:
# Loop through movies DataFrame and retrieve data
for index, row in FirstQuestMerged.iterrows():
    Title = FirstQuestMerged.loc[index,'Title']
    budget = FirstQuestMerged.loc[index,'budget']
    gross = FirstQuestMerged.loc[index,'gross']
    rating = FirstQuestMerged.loc[index,'rating']
    ROI = FirstQuestMerged.loc[index,'ROI']
    

    # Add data to database
    session.add(Financials(
        Title = Title,
        budget = budget,
        gross = gross,
        rating = rating,
        ROI = ROI
    ))

print(f"{len(FirstQuestMerged)} rows ready for commit.")


621 rows ready for commit.


In [45]:
# Commit changes to session
session.commit()

In [46]:
# Close session
session.close()