In [None]:
!pip install SQLAlchemy==1.4.49
!pip install ipython-sql==0.4.1

In [None]:
%load_ext sql

In [None]:
%sql postgresql://[ID]:[PW]@[Redshift endpoint]

### analytics 테스트 테이블 만들기

In [None]:
# %%sql

# ALTER TABLE raw_data.user_session_channel RENAME COLUMN session TO sessionid;
# sessionid를 session로 만들어서 ALTER TABLE를 사용해서 변경

In [None]:
%%sql

CREATE TABLE analytics.mau_summary AS
SELECT
  TO_CHAR(A.ts, 'YYYY-MM') AS month,
  COUNT(DISTINCT B.userid) AS mau
FROM raw_data.session_timestamp A
JOIN raw_data.user_session_channel B ON A.sessionid = B.sessionid
GROUP BY 1
ORDER BY 1 DESC;

### 사용자 그룹 권한 설정하기

#### analytics_authors

In [None]:
%%sql

GRANT ALL ON SCHEMA analytics TO GROUP analytics_authors;
GRANT ALL ON ALL TABLES IN SCHEMA analytics TO GROUP analytics_authors;

GRANT ALL ON SCHEMA adhoc TO GROUP analytics_authors;
GRANT ALL ON ALL TABLES IN SCHEMA adhoc TO GROUP analytics_authors;

GRANT USAGE ON SCHEMA raw_data TO GROUP analytics_authors;
GRANT SELECT ON ALL TABLES IN SCHEMA raw_data TO GROUP analytics_authors;

#### analytics_users

In [None]:
%%sql

GRANT USAGE ON SCHEMA analytics TO GROUP analytics_users;
GRANT SELECT ON ALL TABLES IN SCHEMA analytics TO GROUP analytics_users;

GRANT ALL ON SCHEMA adhoc TO GROUP analytics_users;
GRANT ALL ON ALL TABLES IN SCHEMA adhoc TO GROUP analytics_users;

GRANT USAGE ON SCHEMA raw_data TO GROUP analytics_users;
GRANT SELECT ON ALL TABLES IN SCHEMA raw_data TO GROUP analytics_users;

#### pii_users

In [None]:
%%sql

GRANT USAGE ON SCHEMA pii TO GROUP pii_users;
GRANT SELECT ON ALL TABLES IN SCHEMA pii TO GROUP pii_users;

#### yeojun으로 로그인해서 raw_data 테이블을 수정하려 시도해보기

In [None]:
%%sql

ALTER USER yeojun PASSWORD [password]

In [None]:
%sql postgresql://[ID]:[PW]@[Redshift endpoint]

In [None]:
%%sql

DELETE FROM raw_data.user_session_channel;
# 에러 발생: permission denied for relation user_session_channel

### Redshift Spectrum 테스트

In [None]:
%%sql

-- AWSGlueConsoleFullAccess
CREATE EXTERNAL SCHEMA external_schema
FROM data catalog
database 'myspectrum_db'
iam_role [IAM ARN]
create external database if not exists;

In [None]:
%%sql

CREATE TABLE raw_data.user_property AS
SELECT
  userid,
  CASE WHEN CAST(random() * 2 as int) = 0 THEN 'male' ELSE 'female' END gender,
  (CAST(random() * 50 as int)+18) age
FROM(
  SELECT DISTINCT userid
  FROM raw_data.user_session_channel
);

In [None]:
%%sql

CREATE EXTERNAL TABLE external_schema.user_session_channel(
  userid integer,
  sessionid varchar(32),
  channel varchar(32)
)
row format delimited
fields terminated by ','
stored as textfile
location 's3://yeojun-test-bucket/usc/';
-- 's3://yeojun-test-bucket/usc/user_session_channel.csv' 폴더가 아닌 파일까지 붙여넣기해서 에러발생

In [None]:
%%sql

SELECT gender, COUNT(1)
FROM external_schema.user_session_channel usc
JOIN raw_data.user_property up on usc.userid = up.userid
GROUP BY 1;

### Redshift SageMaker

In [None]:
%%sql

CREATE TABLE raw_data.orange_telecom_customers (
    state varchar,
    account_length integer,
    area_code integer, 
    international_plan varchar,
    voice_mail_plan varchar, 
    number_vmail_messages integer,
    total_day_minutes decimal, 
    total_day_calls integer,
    total_day_charge decimal, 
    total_eve_minutes decimal,
    total_eve_calls integer, 
    total_eve_charge decimal,
    total_night_minutes decimal, 
    total_night_calls integer,
    total_night_charge decimal, 
    total_intl_minutes decimal,
    total_intl_calls integer, 
    total_intl_charge decimal,
    customer_service_calls integer, 
    churn varchar,
    purpose varchar
);


In [None]:
%%sql

COPY raw_data.orange_telecom_customers
FROM 's3://yeojun-test-bucket/redshift_ml/train.csv'
credentials 'aws_iam_role=[aws_iam_role]'
delimiter ',' dateformat 'auto' timeformat 'auto' IGNOREHEADER 1
removequotes;

In [None]:
%%sql

CREATE MODEL orange_telecom_customers_model
FROM(
    SELECT
        state, account_length, area_code, international_plan, voice_mail_plan, 
        number_vmail_messages, total_day_minutes, total_day_calls, 
        total_day_charge, total_eve_minutes, total_eve_calls, total_eve_charge, 
        total_night_minutes, total_night_calls, total_night_charge, 
        total_intl_minutes, total_intl_calls, total_intl_charge, 
        customer_service_calls, churn
    FROM raw_data.orange_telecom_customers
    WHERE purpose ='Train'   
)
TARGET churn
FUNCTION ml_fn_orange_telecom_customers
IAM_ROLE 'arn:[aws_iam_arn]'
SETTINGS(
    S3_BUCKET 'yeojun-test-bucket'
);

In [None]:
%%sql

SHOW MODEL orange_telecom_customers_model;

In [None]:
%%sql

SELECT churn,
    ml_fn_orange_telecom_customers(
        state, account_length, area_code, international_plan, voice_mail_plan,
        number_vmail_messages, total_day_minutes, total_day_calls,
        total_day_charge, total_eve_minutes, total_eve_calls, total_eve_charge,
        total_night_minutes, total_night_calls, total_night_charge,
        total_intl_minutes, total_intl_calls, total_intl_charge,
        customer_service_calls
    ) AS "prediction"
FROM raw_data.orange_telecom_customers
WHERE purpose = 'Test';

In [None]:
%%sql

Drop Model orange_telecom_customers_model;

### Redshift 관련 유지보수

In [38]:
%%sql

vacuum full