## Load ipython-sql and connect to database

In [1]:
%load_ext sql

In [2]:
import os, re
from IPython.display import display_html

CONNECTION_STRING = os.getenv('AWSGPDBCONN')

cs = re.match('^postgresql:\/\/(\S+):(\S+)@(\S+):(\S+)\/(\S+)$', CONNECTION_STRING)

DB_USER   = cs.group(1)
DB_PWD    = cs.group(2)
DB_SERVER = cs.group(3)
DB_PORT   = cs.group(4)
DB_NAME   = cs.group(5)

%reload_ext sql
%sql $CONNECTION_STRING

'Connected: gpadmin@gpadmin'

In [3]:
%sql SELECT VERSION();

 * postgresql://gpadmin:***@ec2-3-10-169-185.eu-west-2.compute.amazonaws.com:5432/gpadmin
1 rows affected.


version
"PostgreSQL 9.4.24 (Greenplum Database 6.3.0 build commit:77aa1b6e4486adbaede9f5f2864a04fc3a512e93) on x86_64-unknown-linux-gnu, compiled by gcc (GCC) 6.4.0, 64-bit compiled on Jan 9 2020 23:10:47"


In [4]:
%sql SET search_path=pricing, public;

 * postgresql://gpadmin:***@ec2-3-10-169-185.eu-west-2.compute.amazonaws.com:5432/gpadmin
Done.


[]

# Pricing Optimization

### 1_Run_Linear_Regression

In [5]:
%%sql
DROP TABLE IF EXISTS pricing.model_results CASCADE;
DROP TABLE IF EXISTS pricing.model_results_summary CASCADE;

-- You can choose to analyze the pricing.flight_history table if you want.

SELECT madlib.linregr_train(
    'pricing.flight_history',
    'pricing.model_results',
    'Sales',
    'ARRAY[
    -- intercept
    1,
    --prices
    Price,
    Price_Comp1,
    Price_Comp2,
    Price_Comp3, 
    Price_Comp4, 
    --seasonality 
    Flight_Month, 
    CASE WHEN Flight_Weekday=2 THEN 1 ELSE 0 END, 
    CASE WHEN Flight_Weekday=3 THEN 1 ELSE 0 END, 
    CASE WHEN Flight_Weekday=4 THEN 1 ELSE 0 END, 
    CASE WHEN Flight_Weekday=5 THEN 1 ELSE 0 END, 
    CASE WHEN Flight_Weekday=6 THEN 1 ELSE 0 END, 
    CASE WHEN Flight_Weekday=7 THEN 1 ELSE 0 END, 
    Holiday_Indicator, 
    --trend
    CURRENT_DATE-flight_date]',
    'routeid, origin, destination, class, days_to_flight'
);


 * postgresql://gpadmin:***@ec2-3-10-169-185.eu-west-2.compute.amazonaws.com:5432/gpadmin
Done.
Done.
1 rows affected.


linregr_train


### 2_Score_Linear_Regression_Model

In [6]:
%%sql
ANALYZE pricing.to_be_priced_flights;
ANALYZE pricing.model_results;

DROP TABLE IF EXISTS pricing.scoring CASCADE;
CREATE TABLE pricing.scoring AS (
    SELECT
        t.RouteID, t.Route_Origin, t.Route_Destination, t.Class, t.Flight_Date, t.Days_To_Flight,
        LEAST(coef[2], -0.0001) as price_coef,
        madlib.array_dot (
        ARRAY[
        1,
        0, -- Own price is zeroed out
        t.Price_Comp1,
        t.Price_Comp2,
        t.Price_Comp3,
        t.Price_Comp4,
        t.Flight_Month,
        CASE WHEN t.Flight_Weekday = 2 THEN 1 ELSE 0 END,
        CASE WHEN t.Flight_Weekday = 3 THEN 1 ELSE 0 END,
        CASE WHEN t.Flight_Weekday = 4 THEN 1 ELSE 0 END,
        CASE WHEN t.Flight_Weekday = 5 THEN 1 ELSE 0 END,
        CASE WHEN t.Flight_Weekday = 6 THEN 1 ELSE 0 END,
        CASE WHEN t.Flight_Weekday = 7 THEN 1 ELSE 0 END,
        Holiday_Indicator,
        CURRENT_DATE - flight_date
        ]::FLOAT8[], model.coef) as predicted_sales
    FROM
        pricing.to_be_priced_flights t, pricing.model_results model
    WHERE
        t.RouteID = model.RouteID
        AND t.Class = model.Class
        AND t.Days_To_Flight = model.Days_To_Flight AND model.coef is NOT NULL
  )
DISTRIBUTED BY(RouteID);

 * postgresql://gpadmin:***@ec2-3-10-169-185.eu-west-2.compute.amazonaws.com:5432/gpadmin
Done.
Done.
Done.
480666144 rows affected.


[]

### 3_Input_For_QP

In [7]:
%%sql
ANALYZE pricing.scoring;

DROP TABLE IF EXISTS pricing.input_for_QP CASCADE;

CREATE TABLE pricing.input_for_QP AS
    SELECT routeID
        , Route_Origin
        , Route_Destination
        , class
        , flight_Date
        , array_agg(price_coef order by days_to_flight) price_coef_array
        , array_agg(predicted_sales order by days_to_flight) predicted_sales_array
    FROM pricing.scoring
    GROUP BY routeID
        , Route_Origin
        , Route_Destination
        , class
        , flight_Date
DISTRIBUTED BY (routeID);

 * postgresql://gpadmin:***@ec2-3-10-169-185.eu-west-2.compute.amazonaws.com:5432/gpadmin
Done.
Done.
34333296 rows affected.


[]

### 4_Optimize_Daily_Prices

In [8]:
%%sql
CREATE OR REPLACE FUNCTION pricing.r_max (integer, integer) RETURNS integer AS $$
if (arg1 > arg2) 
    return(arg1) 
else 
    return(arg2)
$$ LANGUAGE plr STRICT;

 * postgresql://gpadmin:***@ec2-3-10-169-185.eu-west-2.compute.amazonaws.com:5432/gpadmin
Done.


[]

In [9]:
%%sql

CREATE OR REPLACE FUNCTION r_solve_QP(a float8[], b float8[], c integer)
RETURNS float8[] AS 
$$
    library(quadprog)
    
    Dmat<- matrix(0,14,14)
    diag(Dmat) <- -2*a 
    dvec <- b 
    
    Amat <- matrix(0,14,15)
    Amat[,1] <- -a
    
    diag(Amat[,2:15]) <- a
    bvec  <- c(-c+sum(b),-b)

    qp<-solve.QP(Dmat,dvec,Amat,bvec=bvec)
    
    return(qp$solution)
$$ 
LANGUAGE 'plr';

 * postgresql://gpadmin:***@ec2-3-10-169-185.eu-west-2.compute.amazonaws.com:5432/gpadmin
Done.


[]

In [10]:
%%sql
ANALYZE pricing.input_for_qp;

DROP TABLE IF EXISTS pricing.optimal_prices CASCADE;

CREATE TABLE pricing.optimal_prices
AS
SELECT
  routeid, Route_Origin, Route_Destination,
  "class",
  flight_date,
  CASE
    WHEN class='Economy' THEN pricing.r_solve_QP(price_coef_array, predicted_sales_array, 200)
    WHEN class='Business' THEN pricing.r_solve_QP(price_coef_array, predicted_sales_array, 15)
    ELSE pricing.r_solve_QP(price_coef_array, predicted_sales_array, 10) END as optimal_prices
FROM pricing.input_for_qp
DISTRIBUTED BY (routeid, "class", flight_date);

 * postgresql://gpadmin:***@ec2-3-10-169-185.eu-west-2.compute.amazonaws.com:5432/gpadmin
Done.
Done.
34333296 rows affected.


[]

### 5_Query_Optimized_Prices

In [11]:
%%sql
SELECT Route_Origin, Route_Destination, class, flight_date, unnest(array[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]) as days_to_flight,
unnest(optimal_prices) as optimal_pricing
from pricing.optimal_prices
where routeid IN (169, 184)
AND flight_date BETWEEN '2020-03-01' AND '2020-04-01'
;

 * postgresql://gpadmin:***@ec2-3-10-169-185.eu-west-2.compute.amazonaws.com:5432/gpadmin
26880 rows affected.


route_origin,route_destination,class,flight_date,days_to_flight,optimal_pricing
LAX,ANC,Business,2020-03-06,1,1714.7494407582226
LAX,ANC,Business,2020-03-06,2,2141.9689869422564
LAX,ANC,Business,2020-03-06,3,2527.972799186876
LAX,ANC,Business,2020-03-06,4,1768.5791871665172
LAX,ANC,Business,2020-03-06,5,2329.388807514492
LAX,ANC,Business,2020-03-06,6,1737.14418719449
LAX,ANC,Business,2020-03-06,7,2024.9880975426909
LAX,ANC,Business,2020-03-06,8,2180.4161035432426
LAX,ANC,Business,2020-03-06,9,1824.6804019061487
LAX,ANC,Business,2020-03-06,10,1310.0631026160163


In [12]:
%%sql
SELECT Route_Origin, Route_Destination, class, flight_date, unnest(array[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]) as days_to_flight,
unnest(optimal_prices) as optimal_pricing
FROM pricing.optimal_prices
WHERE flight_date='2020-03-10' and class='Business'
LIMIT 10;

 * postgresql://gpadmin:***@ec2-3-10-169-185.eu-west-2.compute.amazonaws.com:5432/gpadmin
10 rows affected.


route_origin,route_destination,class,flight_date,days_to_flight,optimal_pricing
AUS,DFW,Business,2020-03-10,1,1966.5969581738584
AUS,DFW,Business,2020-03-10,2,1621.0865737188146
AUS,DFW,Business,2020-03-10,3,2179.3851247840903
AUS,DFW,Business,2020-03-10,4,2708.772531190958
AUS,DFW,Business,2020-03-10,5,2018.6583420738
AUS,DFW,Business,2020-03-10,6,1802.4298954724304
AUS,DFW,Business,2020-03-10,7,1621.6224004518176
AUS,DFW,Business,2020-03-10,8,3021.879883124924
AUS,DFW,Business,2020-03-10,9,1598.0346873252845
AUS,DFW,Business,2020-03-10,10,1643.044303855217


In [13]:
%%sql
SELECT Route_Origin, Route_Destination, class, flight_date, unnest(array[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]) as days_to_flight,
unnest(optimal_prices) as optimal_pricing
FROM pricing.optimal_prices
WHERE flight_date='2020-01-17' and routeid=2544 and class='First-Class' 

 * postgresql://gpadmin:***@ec2-3-10-169-185.eu-west-2.compute.amazonaws.com:5432/gpadmin
140 rows affected.


route_origin,route_destination,class,flight_date,days_to_flight,optimal_pricing
ABQ,OMA,First-Class,2020-01-17,1,5007.548461885527
ABQ,OMA,First-Class,2020-01-17,2,19147.187798572384
ABQ,OMA,First-Class,2020-01-17,3,10798.318792096885
ABQ,OMA,First-Class,2020-01-17,4,4025.7838080515394
ABQ,OMA,First-Class,2020-01-17,5,20252.836050252576
ABQ,OMA,First-Class,2020-01-17,6,10601.661335022482
ABQ,OMA,First-Class,2020-01-17,7,24585.687094282755
ABQ,OMA,First-Class,2020-01-17,8,6850.653276235269
ABQ,OMA,First-Class,2020-01-17,9,30496.265962830013
ABQ,OMA,First-Class,2020-01-17,10,2530.2371154963
