In [None]:
!pip install ipython-sql

# The code install the ipython-sql package using the pip package manager.
# pip is a tool used to install and manage Python packages. 
# The ! symbol at the beginning of the line is used in Jupyter notebooks to run shell commands.

In [2]:
import pandas as pd
import sqlite3

In [3]:
%load_ext sql

# This is a Jupyter notebook magic command that loads an SQL extension.
# This allows user to execute SQL queries directly in the Jupyter notebook.
# We can connect to any database which is supported by SQLAlchemy, in this project we will use SQLite database.

In [4]:
%sql sqlite:///MovieNow.db
# %sql is the magic command to connect to a SQLite database.
# The command sets the connection string to the default SQLite database.

##### Creating a database

In [None]:
# Create blank tables inside SQLite database
%sql --file ./MovieNow-sqlite.sql

In [4]:
# Load Actors CSV file into a DataFrame
actors = 'actors.csv'
actsin = 'actsin.csv'
customers = 'customers.csv'
movies = 'movies.csv'
renting = 'renting.csv'

df_at = pd.read_csv(actors)
df_as = pd.read_csv(actsin)
df_c = pd.read_csv(customers)
df_m = pd.read_csv(movies)
df_r = pd.read_csv(renting)

# Connect to SQLite database
conn = sqlite3.connect('MovieNow.db')

# Write DataFrame to SQLite table
df_at.to_sql('actors', conn, index=False, if_exists='replace')
df_as.to_sql('actsin', conn, index=False, if_exists='replace')
df_c.to_sql('customers', conn, index=False, if_exists='replace')
df_m.to_sql('movies', conn, index=False, if_exists='replace')
df_r.to_sql('renting', conn, index=False, if_exists='replace')

# Close the connection
conn.close()

##### Showing the tables

In [5]:
%%sql
SELECT *
FROM actors
LIMIT 5;

 * sqlite:///MovieNow.db
Done.


actor_id,name,year_of_birth,nationality,gender
1,Abbie Cornish,1982.0,Australia,female
2,Adam Sandler,1966.0,USA,male
3,Al Pacino,1940.0,USA,male
4,Amy Adams,1974.0,USA,female
5,Andrea Riseborough,1981.0,British,female


In [7]:
%%sql
SELECT *
FROM actsin
LIMIT 5;

 * sqlite:///MovieNow.db
Done.


actsin_id,movie_id,actor_id
1,37,1
2,56,2
3,10,3
4,14,3
5,29,3


In [10]:
%%sql
SELECT *
FROM customers
LIMIT 5;

 * sqlite:///MovieNow.db
Done.


id,name,nationality,gender,dob,membersince
1,Robert Bohm,Austria,male,1980-07-30,2018-09-02
2,Wolfgang Ackermann,Austria,male,1971-11-17,2018-10-15
3,Daniela Herzog,Austria,female,1974-08-07,2019-02-14
4,Julia Jung,Austria,female,1991-01-04,2017-11-22
5,Juliane Kirsch,Austria,female,1977-03-01,2018-12-16


In [11]:
%%sql
SELECT *
FROM movies
LIMIT 5;

 * sqlite:///MovieNow.db
Done.


movie_id,title,genre,runtime,year_of_releas,renting_price
1,One Night at McCool's,Comedy,93,2001,2.09
2,Swordfish,Drama,99,2001,2.19
3,What Women Want,Comedy,127,2001,2.59
4,Training Day,Drama,122,2001,1.79
5,The Fellowship of the Ring,Science Fiction & Fantasy,178,2001,2.59


In [12]:
%%sql
SELECT *
FROM renting
LIMIT 5;

 * sqlite:///MovieNow.db
Done.


renting_id,customer_id,movie_id,rating,date_renting
1,41,8,,2018-10-09
2,10,29,10.0,2017-03-01
3,108,45,4.0,2018-06-08
4,39,66,8.0,2018-10-22
5,104,15,7.0,2019-03-18


### Basic Information

#### What its the total number of the members?

In [20]:
%%sql
SELECT COUNT(DISTINCT name) AS number_of_members
FROM customers;

 * sqlite:///MovieNow.db
Done.


number_of_members
123


#### What is the total number of the movies?

In [38]:
%%sql
SELECT COUNT(DISTINCT title) AS number_of_movies
FROM movies;

 * sqlite:///MovieNow.db
Done.


number_of_movies
71


#### What is the income per year?

In [15]:
%%sql
SELECT strftime('%Y', date_renting) AS year,
       ROUND(SUM(renting_price),2) AS yearly_income
FROM renting AS r
LEFT JOIN movies AS m
ON r.movie_id = m.movie_id
GROUP BY strftime('%Y', date_renting)
ORDER BY year DESC;

 * sqlite:///MovieNow.db
Done.


year,yearly_income
2019,354.51
2018,658.02
2017,263.19


Movie rent per year:

In [16]:
%%sql
SELECT strftime('%Y', date_renting) AS year,
       COUNT(renting_id) AS n_renting
FROM renting
GROUP BY strftime('%Y', date_renting);

 * sqlite:///MovieNow.db
Done.


year,n_renting
2017,121
2018,298
2019,159


Highly rented movie in 2018:

In [21]:
%%sql
SELECT title, COUNT(renting_id) AS n_renting
FROM renting AS r
LEFT JOIN movies AS m
ON r.movie_id = m.movie_id
WHERE date_renting BETWEEN '2018-01-01' AND '2018-12-31'
GROUP BY r.movie_id
ORDER BY n_renting DESC
LIMIT 10;

 * sqlite:///MovieNow.db
Done.


title,n_renting
Bridget Jones - The Edge of Reason,9
Training Day,9
Harry Potter and the Prisoner of Azkaban,8
Fair Game,7
Monster,7
25th Hour,7
One Night at McCool's,7
The Hunger Games,6
Harry Potter and the Deathly Hallows – Part 2,6
Harry Potter and the Deathly Hallows – Part 1,6


### Movie details

#### What are the top 5 rented movies?

In [5]:
%%sql
SELECT title, COUNT(renting_id) AS n_rented, COUNT(rating) AS n_rating
FROM renting
LEFT JOIN movies
ON renting.movie_id = movies.movie_id
GROUP BY renting.movie_id
ORDER BY n_rented DESC
LIMIT 5;

 * sqlite:///MovieNow.db
Done.


title,n_rented,n_rating
The Kingdom,15,10
Training Day,14,9
Harry Potter and the Half-Blood Prince,13,11
World Trade Center,13,8
Monster,13,7


In [16]:
%%sql
SELECT title, COUNT(rating) AS n_rated
FROM renting
LEFT JOIN movies
ON renting.movie_id = movies.movie_id
GROUP BY renting.movie_id
ORDER BY n_rated DESC
LIMIT 5;

 * sqlite:///MovieNow.db
Done.


title,n_rated
Harry Potter and the Half-Blood Prince,11
The Kingdom,10
Training Day,9
Winter's Bone,8
World Trade Center,8


#### What is the top 5 movies that have the highest average rating?
Note: only choose movie that has more than the average number of rating

In [7]:
%%sql
WITH count_rating AS (
    SELECT COUNT(rating) AS n_rating
    FROM renting
    GROUP BY movie_id
)

SELECT title,
       COUNT(rating) AS number_rating,
       ROUND(AVG(rating),2) AS avg_rating
FROM movies AS m
LEFT JOIN renting AS r
ON m.movie_id = r.movie_id
GROUP BY r.movie_id
HAVING number_rating > (
    SELECT ROUND(AVG(n_rating),2)
    FROM count_rating
)
ORDER BY avg_rating DESC, number_rating DESC
LIMIT 5;

 * sqlite:///MovieNow.db
Done.


title,number_rating,avg_rating
No Country for Old Men,5,9.6
Django Unchained,6,9.33
Secondhand Lions,5,9.0
Ghost Rider: Spirit of Vengeance,5,9.0
Young Adult,6,8.83


#### What are movies that have rating above average?

In [17]:
%%sql
SELECT m.title, AVG(r.rating) AS avg_movie_rating, COUNT(rating) AS n_rating
FROM renting AS r
JOIN movies AS m ON r.movie_id = m.movie_id
GROUP BY m.title
HAVING AVG(r.rating) > (
    SELECT AVG(rating)
    FROM renting
)
ORDER BY avg_movie_rating DESC;


 * sqlite:///MovieNow.db
Done.


title,avg_movie_rating,n_rating
Astro Boy,10.0,2
The Fellowship of the Ring,9.75,4
No Country for Old Men,9.6,5
Django Unchained,9.333333333333334,6
What Women Want,9.0,2
Secondhand Lions,9.0,5
Ghost Rider: Spirit of Vengeance,9.0,5
Young Adult,8.833333333333334,6
Harry Potter and the Philosopher's Stone,8.8,5
Fool's Gold,8.75,4


#### How much income did each movie generate? 

In [70]:
%%sql
SELECT rm.title,
       ROUND(SUM(rm.renting_price),2) AS income_movie
FROM
       (SELECT m.title,  
               m.renting_price
       FROM renting AS r
       LEFT JOIN movies AS m
       ON r.movie_id=m.movie_id) AS rm
GROUP BY rm.title
ORDER BY income_movie DESC
LIMIT 5; 

 * sqlite:///MovieNow.db
Done.


title,income_movie
Bridget Jones - The Edge of Reason,37.57
Fair Game,34.68
The Kingdom,31.35
Two for the Money,30.69
Simone,29.59


#### What is the popular genre among customers?

In [9]:
%%sql
SELECT genre, COUNT(renting_id) AS n_rented
FROM renting AS r
LEFT JOIN movies AS m
ON r.movie_id = m.movie_id
GROUP BY genre
ORDER BY n_rented DESC;

 * sqlite:///MovieNow.db
Done.


genre,n_rented
Drama,319
Science Fiction & Fantasy,95
Comedy,69
Mystery & Suspense,39
Other,20
Action & Adventure,16
Animation,12
Art House & International,8


In [12]:
%%sql
SELECT genre, COUNT(movie_id) AS n_movie
FROM movies
GROUP BY genre
ORDER BY n_movie DESC;

 * sqlite:///MovieNow.db
Done.


genre,n_movie
Drama,36
Science Fiction & Fantasy,11
Comedy,10
Mystery & Suspense,6
Action & Adventure,3
Other,2
Animation,2
Art House & International,1


### Customer details

Grouping based on age:<br>
1970 - 1979<br>
1980 - 1989<br>
1990 - 1999<br>

In [18]:
%%sql
WITH date_ranges AS (
    SELECT
        CASE
            WHEN date_of_birth BETWEEN '1970-01-01' AND '1979-12-31' THEN '1970s'
            WHEN date_of_birth BETWEEN '1980-01-01' AND '1989-12-31' THEN '1980s'
            WHEN date_of_birth BETWEEN '1990-01-01' AND '1999-12-31' THEN '1990s'
            ELSE 'Other'
        END AS birth_decade,
        customer_id
    FROM customers
)

SELECT
    birth_decade,
    COUNT(*) AS people_count
FROM date_ranges
GROUP BY birth_decade;

 * sqlite:///MovieNow.db
Done.


birth_decade,people_count
1970s,38
1980s,34
1990s,51


In [17]:
%%sql
WITH date_ranges AS (
    SELECT
        CASE
            WHEN date_of_birth BETWEEN '1970-01-01' AND '1979-12-31' THEN '1970s'
            WHEN date_of_birth BETWEEN '1980-01-01' AND '1989-12-31' THEN '1980s'
            WHEN date_of_birth BETWEEN '1990-01-01' AND '1999-12-31' THEN '1990s'
            ELSE 'Other'
        END AS birth_decade,
        customer_id
    FROM customers
)

SELECT 

 * sqlite:///MovieNow.db
Done.


birth_decade,customer_id
1980s,1
1970s,2
1970s,3
1990s,4
1970s,5
1990s,6
1990s,7
1990s,8
1980s,9
1980s,10


### Actors