In [1]:
import configparser
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
config = configparser.ConfigParser()

In [3]:
config.read('clusterdvd.config')

['clusterdvd.config']

In [4]:
config['POSTGRES']['PG_HOST']

'172.17.0.2'

In [5]:
db = config['POSTGRES']['PG_DB']
user = config['POSTGRES']['PG_UNAME']
passwd = config['POSTGRES']['PG_PASS']
port = config['POSTGRES']['PG_PORT']
host = config['POSTGRES']['PG_HOST']

Using the pandas internal read_sql method to get the data from the DB

In [7]:
credentials = "postgresql://{}:{}@{}:{}/{}".format(user,passwd,host,port,db)

In [8]:
credentials

'postgresql://postgres:1234@172.17.0.2:5432/dvdrental'

In [9]:
filmTable = pd.read_sql("""SELECT * FROM film""",con=credentials)

In [10]:
filmTable.head(2)

Unnamed: 0,film_id,title,description,release_year,language_id,rental_duration,rental_rate,length,replacement_cost,rating,last_update,special_features,fulltext
0,133,Chamber Italian,A Fateful Reflection of a Moose And a Husband ...,2006,1,7,4.99,117,14.99,NC-17,2013-05-26 14:50:58.951,[Trailers],'chamber':1 'fate':4 'husband':11 'italian':2 ...
1,384,Grosse Wonderful,A Epic Drama of a Cat And a Explorer who must ...,2006,1,5,4.99,49,19.99,R,2013-05-26 14:50:58.951,[Behind the Scenes],'australia':18 'cat':8 'drama':5 'epic':4 'exp...


Getting the schema from the existing tables

In [24]:
def getSchema(tableName, credentials):
    schema = pd.read_sql("""SELECT * FROM information_schema.columns where table_name='{}'""".format(tableName),con=credentials)
    return schema

In [26]:
getSchema('inventory',credentials)

Unnamed: 0,table_catalog,table_schema,table_name,column_name,ordinal_position,column_default,is_nullable,data_type,character_maximum_length,character_octet_length,...,is_identity,identity_generation,identity_start,identity_increment,identity_maximum,identity_minimum,identity_cycle,is_generated,generation_expression,is_updatable
0,dvdrental,public,inventory,inventory_id,1,nextval('inventory_inventory_id_seq'::regclass),NO,integer,,,...,NO,,,,,,NO,NEVER,,YES
1,dvdrental,public,inventory,film_id,2,,NO,smallint,,,...,NO,,,,,,NO,NEVER,,YES
2,dvdrental,public,inventory,store_id,3,,NO,smallint,,,...,NO,,,,,,NO,NEVER,,YES
3,dvdrental,public,inventory,last_update,4,now(),NO,timestamp without time zone,,,...,NO,,,,,,NO,NEVER,,YES


In [30]:
def makeTable(query, credentials):
    try:
        pd.read_sql(query, credentials)
        return
    except Exception as e:
        print(e)

In [32]:
createDim1 = """CREATE TABLE dimDate(
                date_key integer NOT NULL PRIMARY KEY,
                date date NOT NULL
            )"""

In [33]:
makeTable(createDim1, credentials)

This result object does not return rows. It has been closed automatically.


In [54]:
# getting to the revenue that the films have fetched
query1 = f"""SELECT p.payment_id, p.rental_id, p.amount FROM payment as p"""

Writing the function that takes the query, credentials and returns the dataframe

In [12]:
def queryBase(query,credentials):
    requiredTable = pd.read_sql(query,con=credentials)
    return requiredTable

Querying the single table

In [53]:
query1tab= queryBase(query1,credentials)
query1tab.head(3)

Unnamed: 0,payment_id,rental_id,amount
0,17503,1520,7.99
1,17504,1778,1.99
2,17505,1849,7.99


In [55]:
queryRental = f"""SELECT * FROM rental"""
queryBase(queryRental,credentials).head(2)

Unnamed: 0,rental_id,rental_date,inventory_id,customer_id,return_date,staff_id,last_update
0,2,2005-05-24 22:54:33,1525,459,2005-05-28 19:40:33,1,2006-02-16 02:30:53
1,3,2005-05-24 23:03:39,1711,408,2005-06-01 22:12:39,1,2006-02-16 02:30:53


Joining the rental table 

In [57]:
query2 = f"""SELECT p.payment_id,p.rental_id,p.amount,r.inventory_id from payment as p
JOIN rental r on p.rental_id=r.rental_id"""

queryBase(query2,credentials).head(2)

Unnamed: 0,payment_id,rental_id,amount,inventory_id
0,17503,1520,7.99,3419
1,17504,1778,1.99,2512


Referring the tables that we are planning to join, here referring inventory table

In [65]:
queryInventory = f"""SELECT * FROM inventory"""
queryBase(queryInventory,credentials).head(2)

Unnamed: 0,inventory_id,film_id,store_id,last_update
0,1,1,1,2006-02-15 10:09:17
1,2,1,1,2006-02-15 10:09:17


joining the inventory table

In [67]:
query3 = f"""SELECT p.payment_id, p.rental_id, p.amount, r.inventory_id,
            i.film_id FROM payment as p
            JOIN rental as r on p.rental_id = r.rental_id
            JOIN inventory as i on r.inventory_id = i.inventory_id"""

In [68]:
queryBase(query3,credentials)

Unnamed: 0,payment_id,rental_id,amount,inventory_id,film_id
0,17503,1520,7.99,3419,749
1,17504,1778,1.99,2512,552
2,17505,1849,7.99,2507,551
3,17506,2829,2.99,2047,445
4,17507,3130,7.99,2569,563
...,...,...,...,...,...
14591,32094,12682,2.99,1148,254
14592,32095,14107,0.99,3998,870
14593,32096,13756,4.99,1146,253
14594,32097,15293,0.99,1219,270


Reviewing the film table

In [75]:
queryFilm = f"""SELECT * FROM film"""
queryBase(queryFilm,credentials).head(2)

Unnamed: 0,film_id,title,description,release_year,language_id,rental_duration,rental_rate,length,replacement_cost,rating,last_update,special_features,fulltext
0,133,Chamber Italian,A Fateful Reflection of a Moose And a Husband ...,2006,1,7,4.99,117,14.99,NC-17,2013-05-26 14:50:58.951,[Trailers],'chamber':1 'fate':4 'husband':11 'italian':2 ...
1,384,Grosse Wonderful,A Epic Drama of a Cat And a Explorer who must ...,2006,1,5,4.99,49,19.99,R,2013-05-26 14:50:58.951,[Behind the Scenes],'australia':18 'cat':8 'drama':5 'epic':4 'exp...


Joining the film table

In [76]:
query4 = f"""SELECT p.payment_id, p.rental_id, p.amount, i.inventory_id, 
            f.film_id, f.title, f.rental_rate
            FROM payment as p
            JOIN rental as r on p.rental_id  = r.rental_id
            JOIN inventory as i on r.inventory_id = i.inventory_id
            JOIN film as f on i.film_id = f.film_id"""

In [87]:
queryBase(query4,credentials)

Unnamed: 0,payment_id,rental_id,amount,inventory_id,film_id,title,rental_rate
0,17503,1520,7.99,3419,749,Rules Human,4.99
1,17504,1778,1.99,2512,552,Majestic Floats,0.99
2,17505,1849,7.99,2507,551,Maiden Home,4.99
3,17506,2829,2.99,2047,445,Hyde Doctor,2.99
4,17507,3130,7.99,2569,563,Massacre Usual,4.99
...,...,...,...,...,...,...,...
14591,32094,12682,2.99,1148,254,Driver Annie,2.99
14592,32095,14107,0.99,3998,870,Swarm Gold,0.99
14593,32096,13756,4.99,1146,253,Drifter Commandments,4.99
14594,32097,15293,0.99,1219,270,Earth Vision,0.99


In [14]:
query5 = f"""SELECT f.title, p.amount, f.film_id  
            FROM payment as p
            JOIN rental as r ON p.rental_id=r.rental_id
            JOIN inventory as i ON r.inventory_id=i.inventory_id
            JOIN film as f ON i.film_id=f.film_id"""

queryBase(query5,credentials).head(2)

Unnamed: 0,title,amount,film_id
0,Rules Human,7.99,749
1,Majestic Floats,1.99,552


Group by on the film title and sorting with Descending order

In [21]:
query6 = f"""SELECT f.title, count(*) as count, sum(p.amount) as reve FROM payment as p
            JOIN rental as r ON p.rental_id = r.rental_id
            JOIN inventory as i ON r.rental_id= i.inventory_id
            JOIN film as f ON i.film_id = f.film_id
            GROUP BY f.title
            ORDER BY SUM(p.amount) DESC"""

In [22]:
queryBase(query6,credentials)

Unnamed: 0,title,count,reve
0,Saturday Lambs,8,46.92
1,Swarm Gold,8,43.92
2,Sweethearts Suspects,8,42.92
3,Timberland Sky,7,41.93
4,Trip Newton,8,41.92
...,...,...,...
713,Matrix Snowman,2,1.98
714,Newsies Story,1,0.99
715,Holy Tadpole,1,0.99
716,Dwarfs Alter,1,0.99
