### teradataml 확인

In [103]:
import teradataml

# 17.20.00.04 이상 권장
print(teradataml.__version__)

17.20.00.06


### Database Connection 생성
* create_context()를 이용하여 DB 접속
* 내부적으로 SQLAlchemy를 보안 처리 및 커넥션 관리, Native Teradata DB Driver 연동등을 감싸서 처리. 

In [104]:
%run -i ../startup.ipynb

from teradataml import create_context

eng = create_context(host = 'host.docker.internal', username='demo_user', password = password)

... Logon successful
Connected as: teradatasql://demo_user:xxxxx@host.docker.internal/dbc




#### 주피터 파이썬 커널에서 바로 SQL 수행하기
* create_context() 로 반환된 Engine 객체에서 SQL 쿼리를 직접 수행하고 결과 반환. 
* engine의 connection 객체를 받은 뒤 execute(sql)메소드를 이용하여 DB Cursor를 반환받고, cursor 객체의 fetchall()을 이용
* pandas의 read_sql()함수에 인자로 sql과 Engine을 입력하여 결과를 Pandas DataFrame으로 반환 받는 방법이 있음.
* SQLAlchemy 2.x 이후 1.X 대비 주요 API가 많은 변경


In [49]:
qry = '''
SELECT top 5 tablename, tablekind, creatorname, createtimestamp
from dbc.tablesv
where databasename='demo_user'
'''

# 쿼리 수행 후 파이썬 리스트 캑체로 결과 반환
with eng.connect() as conn:
    cursor = conn.execute(text(qry))
    result = cursor.fetchall()
    print(result)
    
# with eng.connect() as conn:
#     result = conn.execute(text(qry)).fetchall()
#     print(result)

[('space_report', 'P ', 'DBC', datetime.datetime(2024, 3, 2, 0, 37, 18)), ('remove_data', 'P ', 'DBC', datetime.datetime(2024, 3, 2, 0, 37, 18)), ('get_data', 'P ', 'DBC', datetime.datetime(2024, 3, 2, 0, 37, 15)), ('titanic', 'T ', 'demo_user', datetime.datetime(2024, 3, 18, 20, 3, 42))]


In [50]:
import pandas as pd
from sqlalchemy.sql import text 

qry = '''
SELECT top 5 tablename, tablekind, creatorname, createtimestamp
from dbc.tablesv
where databasename='demo_user'
'''

# 쿼리 수행 후 pandas DataFrame으로 결과 반환. con 인자로 engine또는 Connection 객체 입력. 
pd.read_sql(text(qry), eng)

Unnamed: 0,TableName,TableKind,CreatorName,CreateTimeStamp
0,space_report,P,DBC,2024-03-02 00:37:18
1,get_data,P,DBC,2024-03-02 00:37:15
2,remove_data,P,DBC,2024-03-02 00:37:18
3,titanic,T,demo_user,2024-03-18 20:03:42


In [107]:
%%sql

# sql magic의 경우 현재 패스워드 암호화 처리 이슈 
SELECT top 5 tablename, tablekind, creatorname, createtimestamp
from dbc.tablesv
where databasename='demo_user'

Environment variable $DATABASE_URL not set, and no connect string given.
Connection info needed in SQLAlchemy format, example:
               postgresql://username:password@hostname/dbname
               or an existing connection: dict_keys([])


### CSV 파일을 tml DataFrame으로 로딩하기
* tml DataFrame은 DB Table을 기반으로 함. 
* csv 파일을 DB Table로 생성하거나 csv-> pandas DataFrame -> DB Table로 변환 필요
* teradataml은 pandas DataFrame을 DB Table로 생성하는 copy_to_sql() 함수 제공. 
* copy_to_sql() 함수의 주요 인자는 
   * df: table로 변환될 DataFrame 객체
   * table_name은 DataFrame이 저장될 Table 명
   * if_exists는 기존 Table이 이미 존재하고 있을 경우
       * replace: 해당 테이블을 삭제하고 다시 만듬
       * fail: 오류 발생 후 종료
       * append: 해당 테이블에 insert 수행
   

In [51]:
# titatnic dataset download
!wget https://raw.githubusercontent.com/chulminkw/PerfectGuide/master/1%EC%9E%A5/titanic/train.csv -O titanic.csv
!ls -lia

--2024-03-19 04:12:21--  https://raw.githubusercontent.com/chulminkw/PerfectGuide/master/1%EC%9E%A5/titanic/train.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 61194 (60K) [text/plain]
Saving to: ‘titanic.csv’


2024-03-19 04:12:22 (5.01 MB/s) - ‘titanic.csv’ saved [61194/61194]

total 416
5429818 drwxr-xr-x  3 jovyan users   4096 Mar 19 04:11 .
5419308 drwx------ 73 jovyan  1000   4096 Mar 18 23:14 ..
5429820 -rw-r--r--  1 jovyan users  85497 Mar 19 04:11 dataframe_operation.ipynb
5429822 -rw-r--r--  1 jovyan users 180341 Mar 18 23:21 encoding_scaling.ipynb
5429823 drwxr-xr-x  2 jovyan users   4096 Mar 19 00:31 .ipynb_checkpoints
5429821 -rw-r--r--  1 jovyan users  79546 Mar 18 23:21 preprocessing.ipynb
5429825 -rw-r--r--  1 jovyan users  61194 Mar 19

In [52]:
import pandas as pd
from teradataml import copy_to_sql

# csv 파일을 pandas DataFrame으로 로딩
pd_titanic_df = pd.read_csv('titanic.csv')
pd_titanic_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [53]:
pd_titanic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [54]:
# pandas DataFrame을 Table로 생성. 
# schema를 설정하지 않으면 default schema로
# 만약 if_exists='replace' 를 설정하지 않고 append로 설정할 경우 
# Nan이 있는 컬럼값의 경우 append가 실패하여 오류가 발생할 수 있음. 예를 들어 float NaN값을 Varchar로 인식하여 오류 발생.  
#copy_to_sql(df=pd_titanic_df, table_name='titanic', if_exists='replace')
copy_to_sql(df=pd_titanic_df, table_name='titanic', if_exists='replace', primary_index='PassengerId')

In [55]:
import pandas as pd
from sqlalchemy.sql import text 

qry = '''
SELECT top 5 tablename, tablekind, creatorname, createtimestamp
from dbc.tablesv
where databasename='demo_user' order by createtimestamp desc
'''

# titanic 테이블이 DB에 생성되었음을 확인. 
pd.read_sql(text(qry), eng)

Unnamed: 0,TableName,TableKind,CreatorName,CreateTimeStamp
0,titanic,T,demo_user,2024-03-19 00:11:53
1,remove_data,P,DBC,2024-03-02 00:37:18
2,space_report,P,DBC,2024-03-02 00:37:18
3,get_data,P,DBC,2024-03-02 00:37:15


In [56]:
import pandas as pd
from sqlalchemy.sql import text

pd.set_option('display.max_colwidth', None)

# demo_user의 테이블 리스트 조사
def show_current_tables():
    qry = '''
    SELECT databasename, tablename, tablekind, creatorname, requesttext
    from dbc.tablesv
    where databasename='demo_user' order by createtimestamp desc
    '''

    # titanic 테이블이 DB에 생성되었음을 확인. 
    return pd.read_sql(text(qry), eng)

show_current_tables()

Unnamed: 0,DataBaseName,TableName,TableKind,CreatorName,RequestText
0,demo_user,titanic,T,demo_user,"\rCREATE multiset TABLE titanic (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rprimary index( ""PassengerId"" )"
1,demo_user,space_report,P,DBC,
2,demo_user,remove_data,P,DBC,
3,demo_user,get_data,P,DBC,


### Teradataml DataFrame 생성. 
* Table을 기반으로 DataFrame 생성. 
* Pandas DataFrame과 유사한 API 제공. 
* Teradataml DataFrame은 Teradata DB내의 View로 생성됨. 하지만 DataFrame 객체 생성시에 만들어지지 않고 DataFrame객체.head()와 같이 실제 사용이 될 경우 만들어짐. 

In [57]:
from teradataml import DataFrame, in_schema

titanic_df = DataFrame('titanic')
#titanic_df = DataFrame(in_schema(schema_name='demo_user', table_name='titanic'))
#titanic_df = DataFrame.from_table('titanic')
titanic_df.head(3)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S


In [58]:
print(type(titanic_df))

<class 'teradataml.dataframe.dataframe.DataFrame'>


In [60]:
show_current_tables()

Unnamed: 0,DataBaseName,TableName,TableKind,CreatorName,RequestText
0,demo_user,ml__select__1710823601648766,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710823601648766"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic"") as temp_table where tdml_row_num < 3) as temp_table"
1,demo_user,titanic,T,demo_user,"\rCREATE multiset TABLE titanic (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rprimary index( ""PassengerId"" )"
2,demo_user,space_report,P,DBC,
3,demo_user,remove_data,P,DBC,
4,demo_user,get_data,P,DBC,


In [61]:
titanic_df = DataFrame.from_table(table_name='titanic')
print(type(titanic_df))

<class 'teradataml.dataframe.dataframe.DataFrame'>


In [62]:
# DataFrame 객체를 생성하였지만, View는 만들어지지 않음. 
show_current_tables()

Unnamed: 0,DataBaseName,TableName,TableKind,CreatorName,RequestText
0,demo_user,ml__select__1710823601648766,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710823601648766"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic"") as temp_table where tdml_row_num < 3) as temp_table"
1,demo_user,titanic,T,demo_user,"\rCREATE multiset TABLE titanic (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rprimary index( ""PassengerId"" )"
2,demo_user,space_report,P,DBC,
3,demo_user,remove_data,P,DBC,
4,demo_user,get_data,P,DBC,


In [63]:
# head()를 이용하여 실제 값을 Return 받아야 할 경우 DB에 새로운 View가 생성됨. 
titanic_df.head(5)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S


In [40]:
# DB에 새로운 View가 생성됨을 확인 
show_current_tables()

Unnamed: 0,DataBaseName,TableName,TableKind,CreatorName,RequestText
0,demo_user,ml__select__1710811710090265,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710811710090265"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic"") as temp_table where tdml_row_num < 5) as temp_table"
1,demo_user,ml__select__1710809000093754,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710809000093754"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic"") as temp_table where tdml_row_num < 3) as temp_table"
2,demo_user,ml__select__1710813372224578,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710813372224578"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic"") as temp_table where tdml_row_num < 3) as temp_table"
3,demo_user,titanic,T,demo_user,"\rCREATE multiset TABLE titanic (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rprimary index( ""PassengerId"" )"
4,demo_user,space_report,P,DBC,
5,demo_user,remove_data,P,DBC,
6,demo_user,get_data,P,DBC,


In [41]:
titanic_df = DataFrame.from_table(table_name='titanic')
print(type(titanic_df))

<class 'teradataml.dataframe.dataframe.DataFrame'>


In [44]:
#SQL 기반으로 생성
titanic_df = DataFrame.from_query("select PassengerId, Pclass from demo_user.titanic")
print(type(titanic_df))
titanic_df.head(5)

<class 'teradataml.dataframe.dataframe.DataFrame'>


PassengerId,Pclass
3,3
5,3
4,1
2,1
1,3


In [45]:
from teradataml import DataFrame, in_schema

titanic_df = DataFrame(in_schema(schema_name='demo_user', table_name='titanic'))
print(type(titanic_df))
titanic_df.head(5)

<class 'teradataml.dataframe.dataframe.DataFrame'>


PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S


### DataFrame 메타 정보
* pandas DataFrame과 유사하게, info(), describe() 및 shape, dtypes, sizes, columns, shape 제공
* tdTypes 속성은 DB 테이블/뷰의 컬럼 타입을 나타냄. 

In [64]:
titanic_df.info()

<class 'teradataml.dataframe.dataframe.DataFrame'>
Data columns (total 12 columns):
PassengerId      int
Survived         int
Pclass           int
Name             str
Sex              str
Age            float
SibSp            int
Parch            int
Ticket           str
Fare           float
Cabin            str
Embarked         str
dtypes: int(5), str(5), float(2)


In [65]:
titanic_df.describe()

func,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.454
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.384,2.309,29.699,0.523,0.382,32.204
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.329
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.91
std,257.354,0.487,0.836,14.526,1.103,0.806,49.693


In [66]:
# 컬럼의 파이썬 타입
titanic_df.dtypes

COLUMN NAME,TYPE
PassengerId,int
Survived,int
Pclass,int
Name,str
Sex,str
Age,float
SibSp,int
Parch,int
Ticket,str
Fare,float


In [67]:
# DB table/view 컬럼 타입
titanic_df.tdtypes

COLUMN NAME,TYPE
PassengerId,BIGINT()
Survived,BIGINT()
Pclass,BIGINT()
Name,"VARCHAR(length=1024, charset='UNICODE')"
Sex,"VARCHAR(length=1024, charset='UNICODE')"
Age,FLOAT()
SibSp,BIGINT()
Parch,BIGINT()
Ticket,"VARCHAR(length=1024, charset='UNICODE')"
Fare,FLOAT()


In [68]:
# 컬럼명을 파이썬 리스트로 반환
titanic_df.columns

['PassengerId',
 'Survived',
 'Pclass',
 'Name',
 'Sex',
 'Age',
 'SibSp',
 'Parch',
 'Ticket',
 'Fare',
 'Cabin',
 'Embarked']

In [69]:
titanic_df.shape

(891, 12)

### DataFrame Index
* teradataml DataFrame 생성 시 DataFrame Index는 무조건적으로 생성됨(Pandas DataFrame도 마찬가지)
* pandas DataFrame Index와 다르게 teradataml DataFrame의 Index는 물리적으로 생성되는 것이 아닌 DB View내에서 row_number() over (order by index 컬럼)으로 지정됨
* DataFrame의 Index와 DB Table의 Index는 차이가 있음. 

In [70]:
# Pandas DataFrame을 DB Table로 생성. Primary key가 없는 Table 생성.
copy_to_sql(df=pd_titanic_df, table_name='titanic_nopk', if_exists='replace')

In [71]:
show_current_tables()

Unnamed: 0,DataBaseName,TableName,TableKind,CreatorName,RequestText
0,demo_user,titanic_nopk,O,demo_user,"\rCREATE multiset TABLE titanic_nopk (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rNO PRIMARY INDEX"
1,demo_user,ml___frmqry_v_1710824136076046,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml___frmqry_v_1710824136076046"" AS SELECT \r\t \r\tcast('count' as varchar(6)) as ""func"", \r\tCAST(count(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(count(""Survived"") AS NUMBER) AS ""Survived"", CAST(count(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(count(""Age"") AS NUMBER) AS ""Age"", CAST(count(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(count(""Parch"") AS NUMBER) AS ""Parch"", CAST(count(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('mean' as varchar(6)) as ""func"", \r\tCAST(avg(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(avg(""Survived"") AS NUMBER) AS ""Survived"", CAST(avg(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(avg(""Age"") AS NUMBER) AS ""Age"", CAST(avg(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(avg(""Parch"") AS NUMBER) AS ""Parch"", CAST(avg(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('std' as varchar(6)) as ""func"", \r\tCAST(stddev_samp(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(stddev_samp(""Survived"") AS NUMBER) AS ""Survived"", CAST(stddev_samp(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(stddev_samp(""Age"") AS NUMBER) AS ""Age"", CAST(stddev_samp(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(stddev_samp(""Parch"") AS NUMBER) AS ""Parch"", CAST(stddev_samp(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('min' as varchar(6)) as ""func"", \r\tCAST(min(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(min(""Survived"") AS NUMBER) AS ""Survived"", CAST(min(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(min(""Age"") AS NUMBER) AS ""Age"", CAST(min(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(min(""Parch"") AS NUMBER) AS ""Parch"", CAST(min(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('25%' as varchar(6)) as ""func"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('50%' as varchar(6)) as ""func"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('75%' as varchar(6)) as ""func"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('max' as varchar(6)) as ""func"", \r\tCAST(max(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(max(""Survived"") AS NUMBER) AS ""Survived"", CAST(max(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(max(""Age"") AS NUMBER) AS ""Age"", CAST(max(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(max(""Parch"") AS NUMBER) AS ""Parch"", CAST(max(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"""
2,demo_user,ml__select__1710828109166316,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710828109166316"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic"") as temp_table where tdml_row_num < 5) as temp_table"
3,demo_user,ml__select__1710823601648766,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710823601648766"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic"") as temp_table where tdml_row_num < 3) as temp_table"
4,demo_user,titanic,T,demo_user,"\rCREATE multiset TABLE titanic (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rprimary index( ""PassengerId"" )"
5,demo_user,space_report,P,DBC,
6,demo_user,remove_data,P,DBC,
7,demo_user,get_data,P,DBC,


In [72]:
# PK가 없는 테이블일 경우 첫번째 컬럼으로 row_number() over (order by 첫번째 컬럼) 으로 index 생성
titanic_nopk_df = DataFrame(table_name='titanic_nopk', index_label='PassengerId')
titanic_nopk_df.head(3)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S


In [73]:
show_current_tables()

Unnamed: 0,DataBaseName,TableName,TableKind,CreatorName,RequestText
0,demo_user,ml__select__1710826588406894,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710826588406894"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic_nopk"") as temp_table where tdml_row_num < 3) as temp_table"
1,demo_user,titanic_nopk,O,demo_user,"\rCREATE multiset TABLE titanic_nopk (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rNO PRIMARY INDEX"
2,demo_user,ml___frmqry_v_1710824136076046,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml___frmqry_v_1710824136076046"" AS SELECT \r\t \r\tcast('count' as varchar(6)) as ""func"", \r\tCAST(count(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(count(""Survived"") AS NUMBER) AS ""Survived"", CAST(count(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(count(""Age"") AS NUMBER) AS ""Age"", CAST(count(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(count(""Parch"") AS NUMBER) AS ""Parch"", CAST(count(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('mean' as varchar(6)) as ""func"", \r\tCAST(avg(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(avg(""Survived"") AS NUMBER) AS ""Survived"", CAST(avg(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(avg(""Age"") AS NUMBER) AS ""Age"", CAST(avg(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(avg(""Parch"") AS NUMBER) AS ""Parch"", CAST(avg(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('std' as varchar(6)) as ""func"", \r\tCAST(stddev_samp(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(stddev_samp(""Survived"") AS NUMBER) AS ""Survived"", CAST(stddev_samp(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(stddev_samp(""Age"") AS NUMBER) AS ""Age"", CAST(stddev_samp(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(stddev_samp(""Parch"") AS NUMBER) AS ""Parch"", CAST(stddev_samp(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('min' as varchar(6)) as ""func"", \r\tCAST(min(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(min(""Survived"") AS NUMBER) AS ""Survived"", CAST(min(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(min(""Age"") AS NUMBER) AS ""Age"", CAST(min(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(min(""Parch"") AS NUMBER) AS ""Parch"", CAST(min(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('25%' as varchar(6)) as ""func"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('50%' as varchar(6)) as ""func"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('75%' as varchar(6)) as ""func"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('max' as varchar(6)) as ""func"", \r\tCAST(max(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(max(""Survived"") AS NUMBER) AS ""Survived"", CAST(max(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(max(""Age"") AS NUMBER) AS ""Age"", CAST(max(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(max(""Parch"") AS NUMBER) AS ""Parch"", CAST(max(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"""
3,demo_user,ml__select__1710828109166316,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710828109166316"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic"") as temp_table where tdml_row_num < 5) as temp_table"
4,demo_user,ml__select__1710823601648766,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710823601648766"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic"") as temp_table where tdml_row_num < 3) as temp_table"
5,demo_user,titanic,T,demo_user,"\rCREATE multiset TABLE titanic (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rprimary index( ""PassengerId"" )"
6,demo_user,space_report,P,DBC,
7,demo_user,remove_data,P,DBC,
8,demo_user,get_data,P,DBC,


In [75]:
# Primary Key가 있는 Table 생성
copy_to_sql(df=pd_titanic_df, table_name='titanic_pk', if_exists='replace', primary_index='PassengerId')

In [76]:
titanic_pk_df = DataFrame(table_name='titanic_pk')
titanic_pk_df.head(3)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S


In [77]:
show_current_tables()

Unnamed: 0,DataBaseName,TableName,TableKind,CreatorName,RequestText
0,demo_user,ml__select__1710827932360474,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710827932360474"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic_pk"") as temp_table where tdml_row_num < 3) as temp_table"
1,demo_user,titanic_pk,T,demo_user,"\rCREATE multiset TABLE titanic_pk (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rprimary index( ""PassengerId"" )"
2,demo_user,ml__select__1710826588406894,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710826588406894"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic_nopk"") as temp_table where tdml_row_num < 3) as temp_table"
3,demo_user,titanic_nopk,O,demo_user,"\rCREATE multiset TABLE titanic_nopk (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rNO PRIMARY INDEX"
4,demo_user,ml___frmqry_v_1710824136076046,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml___frmqry_v_1710824136076046"" AS SELECT \r\t \r\tcast('count' as varchar(6)) as ""func"", \r\tCAST(count(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(count(""Survived"") AS NUMBER) AS ""Survived"", CAST(count(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(count(""Age"") AS NUMBER) AS ""Age"", CAST(count(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(count(""Parch"") AS NUMBER) AS ""Parch"", CAST(count(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('mean' as varchar(6)) as ""func"", \r\tCAST(avg(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(avg(""Survived"") AS NUMBER) AS ""Survived"", CAST(avg(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(avg(""Age"") AS NUMBER) AS ""Age"", CAST(avg(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(avg(""Parch"") AS NUMBER) AS ""Parch"", CAST(avg(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('std' as varchar(6)) as ""func"", \r\tCAST(stddev_samp(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(stddev_samp(""Survived"") AS NUMBER) AS ""Survived"", CAST(stddev_samp(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(stddev_samp(""Age"") AS NUMBER) AS ""Age"", CAST(stddev_samp(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(stddev_samp(""Parch"") AS NUMBER) AS ""Parch"", CAST(stddev_samp(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('min' as varchar(6)) as ""func"", \r\tCAST(min(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(min(""Survived"") AS NUMBER) AS ""Survived"", CAST(min(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(min(""Age"") AS NUMBER) AS ""Age"", CAST(min(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(min(""Parch"") AS NUMBER) AS ""Parch"", CAST(min(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('25%' as varchar(6)) as ""func"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('50%' as varchar(6)) as ""func"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('75%' as varchar(6)) as ""func"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('max' as varchar(6)) as ""func"", \r\tCAST(max(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(max(""Survived"") AS NUMBER) AS ""Survived"", CAST(max(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(max(""Age"") AS NUMBER) AS ""Age"", CAST(max(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(max(""Parch"") AS NUMBER) AS ""Parch"", CAST(max(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"""
5,demo_user,ml__select__1710828109166316,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710828109166316"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic"") as temp_table where tdml_row_num < 5) as temp_table"
6,demo_user,ml__select__1710823601648766,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710823601648766"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic"") as temp_table where tdml_row_num < 3) as temp_table"
7,demo_user,titanic,T,demo_user,"\rCREATE multiset TABLE titanic (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rprimary index( ""PassengerId"" )"
8,demo_user,space_report,P,DBC,
9,demo_user,remove_data,P,DBC,


In [78]:
# from_query()로 DataFrame을 생성할 경우, SQL이 크게 복잡하지 않다면 PK 컬럼으로 Index를 구성할 수 있음. 
query = "select pclass, passengerId from demo_user.titanic_pk";

titanic_sql_df = DataFrame.from_query(query)
titanic_sql_df.head(3)

Pclass,PassengerId
3,3
1,2
3,1


In [79]:
show_current_tables()

Unnamed: 0,DataBaseName,TableName,TableKind,CreatorName,RequestText
0,demo_user,ml__select__1710827799408526,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710827799408526"" AS select ""Pclass"",""PassengerId"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""Pclass"",""PassengerId"" from ""DEMO_USER"".""ml___frmqry_v_1710829512172197"") as temp_table where tdml_row_num < 3) as temp_table"
1,demo_user,ml___frmqry_v_1710829512172197,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml___frmqry_v_1710829512172197"" AS select pclass, passengerId from demo_user.titanic_pk"
2,demo_user,ml__select__1710827932360474,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710827932360474"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic_pk"") as temp_table where tdml_row_num < 3) as temp_table"
3,demo_user,titanic_pk,T,demo_user,"\rCREATE multiset TABLE titanic_pk (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rprimary index( ""PassengerId"" )"
4,demo_user,ml__select__1710826588406894,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710826588406894"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic_nopk"") as temp_table where tdml_row_num < 3) as temp_table"
5,demo_user,titanic_nopk,O,demo_user,"\rCREATE multiset TABLE titanic_nopk (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rNO PRIMARY INDEX"
6,demo_user,ml___frmqry_v_1710824136076046,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml___frmqry_v_1710824136076046"" AS SELECT \r\t \r\tcast('count' as varchar(6)) as ""func"", \r\tCAST(count(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(count(""Survived"") AS NUMBER) AS ""Survived"", CAST(count(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(count(""Age"") AS NUMBER) AS ""Age"", CAST(count(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(count(""Parch"") AS NUMBER) AS ""Parch"", CAST(count(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('mean' as varchar(6)) as ""func"", \r\tCAST(avg(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(avg(""Survived"") AS NUMBER) AS ""Survived"", CAST(avg(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(avg(""Age"") AS NUMBER) AS ""Age"", CAST(avg(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(avg(""Parch"") AS NUMBER) AS ""Parch"", CAST(avg(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('std' as varchar(6)) as ""func"", \r\tCAST(stddev_samp(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(stddev_samp(""Survived"") AS NUMBER) AS ""Survived"", CAST(stddev_samp(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(stddev_samp(""Age"") AS NUMBER) AS ""Age"", CAST(stddev_samp(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(stddev_samp(""Parch"") AS NUMBER) AS ""Parch"", CAST(stddev_samp(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('min' as varchar(6)) as ""func"", \r\tCAST(min(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(min(""Survived"") AS NUMBER) AS ""Survived"", CAST(min(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(min(""Age"") AS NUMBER) AS ""Age"", CAST(min(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(min(""Parch"") AS NUMBER) AS ""Parch"", CAST(min(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('25%' as varchar(6)) as ""func"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('50%' as varchar(6)) as ""func"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('75%' as varchar(6)) as ""func"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('max' as varchar(6)) as ""func"", \r\tCAST(max(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(max(""Survived"") AS NUMBER) AS ""Survived"", CAST(max(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(max(""Age"") AS NUMBER) AS ""Age"", CAST(max(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(max(""Parch"") AS NUMBER) AS ""Parch"", CAST(max(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"""
7,demo_user,ml__select__1710828109166316,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710828109166316"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic"") as temp_table where tdml_row_num < 5) as temp_table"
8,demo_user,ml__select__1710823601648766,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710823601648766"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic"") as temp_table where tdml_row_num < 3) as temp_table"
9,demo_user,titanic,T,demo_user,"\rCREATE multiset TABLE titanic (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rprimary index( ""PassengerId"" )"


In [80]:
# from_query()로 DataFrame을 생성할 때 PK가 없는 컬럼들로 이뤄진다면 단순 SQL이라도 무조건 첫번째 컬럼으로 row_number() over (order by 첫번째 컬럼)으로 Index를 생성할 수 있으므로 주의 필요.
query = "select pclass, passengerId from demo_user.titanic_pk";
titanic_sql_df = DataFrame.from_query(query)
titanic_sql_df.head(3)

Pclass,PassengerId
3,3
1,2
3,1


In [81]:
show_current_tables()

Unnamed: 0,DataBaseName,TableName,TableKind,CreatorName,RequestText
0,demo_user,ml__select__1710827994429129,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710827994429129"" AS select ""Pclass"",""PassengerId"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""Pclass"",""PassengerId"" from ""DEMO_USER"".""ml___frmqry_v_1710829395671638"") as temp_table where tdml_row_num < 3) as temp_table"
1,demo_user,ml___frmqry_v_1710829395671638,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml___frmqry_v_1710829395671638"" AS select pclass, passengerId from demo_user.titanic_pk"
2,demo_user,ml__select__1710827799408526,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710827799408526"" AS select ""Pclass"",""PassengerId"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""Pclass"",""PassengerId"" from ""DEMO_USER"".""ml___frmqry_v_1710829512172197"") as temp_table where tdml_row_num < 3) as temp_table"
3,demo_user,ml___frmqry_v_1710829512172197,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml___frmqry_v_1710829512172197"" AS select pclass, passengerId from demo_user.titanic_pk"
4,demo_user,ml__select__1710827932360474,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710827932360474"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic_pk"") as temp_table where tdml_row_num < 3) as temp_table"
5,demo_user,titanic_pk,T,demo_user,"\rCREATE multiset TABLE titanic_pk (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rprimary index( ""PassengerId"" )"
6,demo_user,ml__select__1710826588406894,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710826588406894"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic_nopk"") as temp_table where tdml_row_num < 3) as temp_table"
7,demo_user,titanic_nopk,O,demo_user,"\rCREATE multiset TABLE titanic_nopk (\r\t""PassengerId"" BIGINT, \r\t""Survived"" BIGINT, \r\t""Pclass"" BIGINT, \r\t""Name"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Sex"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Age"" FLOAT, \r\t""SibSp"" BIGINT, \r\t""Parch"" BIGINT, \r\t""Ticket"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Fare"" FLOAT, \r\t""Cabin"" VARCHAR(1024) CHAR SET UNICODE, \r\t""Embarked"" VARCHAR(1024) CHAR SET UNICODE\r)\rNO PRIMARY INDEX"
8,demo_user,ml___frmqry_v_1710824136076046,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml___frmqry_v_1710824136076046"" AS SELECT \r\t \r\tcast('count' as varchar(6)) as ""func"", \r\tCAST(count(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(count(""Survived"") AS NUMBER) AS ""Survived"", CAST(count(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(count(""Age"") AS NUMBER) AS ""Age"", CAST(count(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(count(""Parch"") AS NUMBER) AS ""Parch"", CAST(count(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('mean' as varchar(6)) as ""func"", \r\tCAST(avg(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(avg(""Survived"") AS NUMBER) AS ""Survived"", CAST(avg(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(avg(""Age"") AS NUMBER) AS ""Age"", CAST(avg(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(avg(""Parch"") AS NUMBER) AS ""Parch"", CAST(avg(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('std' as varchar(6)) as ""func"", \r\tCAST(stddev_samp(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(stddev_samp(""Survived"") AS NUMBER) AS ""Survived"", CAST(stddev_samp(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(stddev_samp(""Age"") AS NUMBER) AS ""Age"", CAST(stddev_samp(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(stddev_samp(""Parch"") AS NUMBER) AS ""Parch"", CAST(stddev_samp(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('min' as varchar(6)) as ""func"", \r\tCAST(min(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(min(""Survived"") AS NUMBER) AS ""Survived"", CAST(min(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(min(""Age"") AS NUMBER) AS ""Age"", CAST(min(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(min(""Parch"") AS NUMBER) AS ""Parch"", CAST(min(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"" \runion all\r SELECT \r\t \r\tcast('25%' as varchar(6)) as ""func"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.25) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('50%' as varchar(6)) as ""func"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('75%' as varchar(6)) as ""func"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""PassengerId"" AS NUMBER)) AS NUMBER) AS ""PassengerId"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Survived"" AS NUMBER)) AS NUMBER) AS ""Survived"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Pclass"" AS NUMBER)) AS NUMBER) AS ""Pclass"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Age"" AS NUMBER)) AS NUMBER) AS ""Age"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""SibSp"" AS NUMBER)) AS NUMBER) AS ""SibSp"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Parch"" AS NUMBER)) AS NUMBER) AS ""Parch"", CAST(percentile_cont(0.75) WITHIN GROUP (ORDER BY CAST(""Fare"" AS NUMBER)) AS NUMBER) AS ""Fare"" from ""titanic"" \runion all\r SELECT \r\t \r\tcast('max' as varchar(6)) as ""func"", \r\tCAST(max(""PassengerId"") AS NUMBER) AS ""PassengerId"", CAST(max(""Survived"") AS NUMBER) AS ""Survived"", CAST(max(""Pclass"") AS NUMBER) AS ""Pclass"", CAST(max(""Age"") AS NUMBER) AS ""Age"", CAST(max(""SibSp"") AS NUMBER) AS ""SibSp"", CAST(max(""Parch"") AS NUMBER) AS ""Parch"", CAST(max(""Fare"") AS NUMBER) AS ""Fare"" \rfrom \r\t""titanic"""
9,demo_user,ml__select__1710828109166316,V,demo_user,"CREATE VIEW ""DEMO_USER"".""ml__select__1710828109166316"" AS select ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from (select * from (select row_number() over (order by ""PassengerId"" asc) - 1 as tdml_row_num, ""PassengerId"",""Survived"",""Pclass"",""Name"",""Sex"",""Age"",""SibSp"",""Parch"",""Ticket"",""Fare"",""Cabin"",""Embarked"" from ""titanic"") as temp_table where tdml_row_num < 5) as temp_table"


### DataFrame 액세스
* DataFrame을 컬럼 단위로 액세스 하기 위해서는 [ ]를 사용하며 인자로 파이썬 리스트 [ ]를 입력 받음(단일 컬럼의 경우에도 리스트로 감싸 줘야 함)
* teradataml은 Pandas와는 다르게 1차원 Series를 제공하지 않으며, DataFrame 액세스의 결과로 DataFrame이 반환됨. 
* DataFrame은 row를 액세스 하기 위해 loc[], iloc[] 연산자 및 Boolean Indexing을 함께 제공. 

#### 컬럼레벨 DataFrame 액세스

In [82]:
# 단일 컬럼 액세스를 위해서라도 컬럼명을 리스트로 감싸야 함. 
# DataFrame[]내에 단일 컬럼명을 문자열 값으로 입력하면 SQColumnExpression이 반환됨. 
titanic_df['Pclass']

<teradataml.dataframe.sql._SQLColumnExpression at 0x7fd8fb9b6280>

In [83]:
titanic_df[['Pclass']].head(3)

Pclass
1
1
1


In [84]:
pclass_df = titanic_df[['Pclass']]
print('returned DataFrame type:', type(pclass_df))

returned DataFrame type: <class 'teradataml.dataframe.dataframe.DataFrame'>


In [85]:
pclass_df.head(3)

Pclass
1
1
1


In [86]:
titanic_df[['Pclass', 'Survived']].head(3)

Pclass,Survived
1,1
1,0
1,0


#### DataFrame select() 메소드를 이용한 컬럼레벨 액세스
* select() 메소드의 경우 [] 와 유사한 기능 제공. 다만 단일 컬럼명일 경우 리스트로 감쌀 필요가 없음. 

In [87]:
titanic_df.select('Pclass').head(3)

Pclass
1
1
1


In [88]:
titanic_df.select(['Pclass', 'Survived']).head(3)

Pclass,Survived
1,0
1,1
1,1


#### DataFrame iloc[], loc[]을 이용한 로우 레벨 액세스

In [89]:
titanic_df.iloc[0]

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S


In [90]:
titanic_df.iloc[0:2]

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S


In [91]:
titanic_df.loc[1, 'PassengerId']

PassengerId
1


#### DataFrame Boolean Indexing을 이용한 액세스
* SQLColumnExpression을 이용
* SQLColumnExpression은 and/or 등의 복합 조건으로도 활용할 수 있음

In [92]:
# Boolean Indexing 수행. titanic_df[['Age']] > 60 이 아닌, 
# titanic_df['Age'] > 60 으로 SQLColumnExpression 기반의 Boolean Indexing 조건 입력 
titanic_df[titanic_df['Age'] > 60]

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
171,0,1,"Van der hoef, Mr. Wyckoff",male,61.0,0,0,111240,33.5,B19,S
276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63.0,1,0,13502,77.9583,D7,S
253,0,1,"Stead, Mr. William Thomas",male,62.0,0,0,113514,26.55,C87,S
631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0,A23,S
556,0,1,"Wright, Mr. George",male,62.0,0,0,113807,26.55,,S
97,0,1,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
457,0,1,"Millet, Mr. Francis Davis",male,65.0,0,0,13509,26.55,E38,S
117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q
55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65.0,0,1,113509,61.9792,B30,C
852,0,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.775,,S


In [93]:
print(type(titanic_df['Age']))

<class 'teradataml.dataframe.sql._SQLColumnExpression'>


In [None]:
# 아래는 오류가 발생합니다. 
titanic_df[titanic_df[['Age']] > 60]

In [94]:
# 아래는 복합조건으로 Boolean Indxing을 수행합니다. 
titanic_df[titanic_df['Survived'] == 1 and titanic_df['Age'] > 60 ]

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
171,0,1,"Van der hoef, Mr. Wyckoff",male,61.0,0,0,111240,33.5,B19,S
55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65.0,0,1,113509,61.9792,B30,C
556,0,1,"Wright, Mr. George",male,62.0,0,0,113807,26.55,,S
852,0,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.775,,S
253,0,1,"Stead, Mr. William Thomas",male,62.0,0,0,113514,26.55,C87,S
97,0,1,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
457,0,1,"Millet, Mr. Francis Davis",male,65.0,0,0,13509,26.55,E38,S
117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q
276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63.0,1,0,13502,77.9583,D7,S
631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0,A23,S


In [95]:
# SQLColumnExpression은 변수값으로도 할당할 수 있습니다. 
# 아래는 복합조건으로 Boolean Indexing을 수행합니다. 
cond1 = titanic_df['Survived'] == 1
cond2 = titanic_df['Age'] > 60

titanic_df[cond1 and cond2]

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
171,0,1,"Van der hoef, Mr. Wyckoff",male,61.0,0,0,111240,33.5,B19,S
55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65.0,0,1,113509,61.9792,B30,C
556,0,1,"Wright, Mr. George",male,62.0,0,0,113807,26.55,,S
97,0,1,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q
852,0,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.775,,S
276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63.0,1,0,13502,77.9583,D7,S
253,0,1,"Stead, Mr. William Thomas",male,62.0,0,0,113514,26.55,C87,S
457,0,1,"Millet, Mr. Francis Davis",male,65.0,0,0,13509,26.55,E38,S
631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0,A23,S


In [96]:
# SQLColumnExpression은 아래와 같은 형태로도 가능
titanic_df[titanic_df.Survived == 1]

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0,A23,S
652,1,2,"Doling, Miss. Elsie",female,18.0,0,1,231919,23.0,,S
242,1,3,"Murphy, Miss. Katherine ""Kate""",female,,1,0,367230,15.5,,Q
570,1,3,"Jonsson, Mr. Carl",male,32.0,0,0,350417,7.8542,,S
692,1,3,"Karun, Miss. Manca",female,4.0,0,1,349256,13.4167,,C
856,1,3,"Aks, Mrs. Sam (Leah Rosen)",female,18.0,0,1,392091,9.35,,S
448,1,1,"Seward, Mr. Frederic Kimber",male,34.0,0,0,113794,26.55,,S
713,1,1,"Taylor, Mr. Elmer Zebley",male,48.0,1,0,19996,52.0,C126,S
427,1,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)",female,28.0,1,0,2003,26.0,,S
326,1,1,"Young, Miss. Marie Grice",female,36.0,0,0,PC 17760,135.6333,C32,C


In [97]:
# 아래와 같이 SQL을 이용할 수도 있습니다. 
stmt = '''
select top 3 * from demo_user.titanic
where Age > 60 and Survived = 1 order by PassengerId
'''

pd.read_sql(text(stmt), eng)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63.0,1,0,13502,77.9583,D7,S
1,484,1,3,"Turkula, Mrs. (Hedwig)",female,63.0,0,0,4134,9.5875,,S
2,571,1,2,"Harris, Mr. George",male,62.0,0,0,S.W./PP 752,10.5,,S


### DataFrame 다루기

#### assign() 메소드를 이용하여 새로운 컬럼을 추가하기
* pandas DataFrame과는 다르게 dataframe[새로운 컬럼명] = dataframe 컬럼 가공 등으로 새로운 컬럼명을 할당할 수 없으며 assign()메소드를 활용해야 함. 
* DataFrame의 컬럼 추가는 DB Table에 반영되지 않으며 View에만 반영됨. 

In [None]:
# 아래는 오류 발생. 
titanic_df['new_age'] = titanic_df[['Age']] + 10
# 아래 역시 오류 발생
#titanic_df['new_age'] = titanic_df['Age'] + 10

In [98]:
# assign()의 인자로는 titanic_df.Age와 같이 SQLColumnExpression이 입력되어야 함. 컬럼명만 부여하면 오류 발생. 
# 신규컬럼명 = 기존컬럼의 SQLColumnExression + 가공 연산과 같은 형태로 assign()인자가 입력되어야 함. 
titanic_addcol_df = titanic_df.assign(New_Age = titanic_df.Age + 10)
# assign() 수행 결과로 신규 DataFrame을 반홤함. 기존 DataFrame을 변경하지 않음. 
# 만약 기존 DataFrame에 assign() 수행 결과를 적용하고자 한다면 반환 변수를 기존 DataFrame 객체 변수로 설정하면 됨. 
#titanic_df = titanic_df.assign(New_Age = titanic_df.Age + 10)
titanic_addcol_df.head(3)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,New_Age
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,36.0
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C,48.0
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,32.0


In [99]:
# 아래와 같이 여러개의 컬럼들도 assign()으로 추가할 수 있음. 
titanic_addcol_df = titanic_df.assign(New_Age_01 = titanic_df.Age + 20, 
                                      New_Fare_01 = titanic_df.Fare * titanic_df.SibSp)
titanic_addcol_df.head(3)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,New_Age_01,New_Fare_01
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,46.0,0.0
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C,58.0,71.2833
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,42.0,7.25


In [100]:
titanic_addcol_df = titanic_df.assign(concat_str="Passenger Name: " + titanic_df.Name)
titanic_addcol_df.head(3)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,concat_str
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,"Passenger Name: Heikkinen, Miss. Laina"
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C,"Passenger Name: Cumings, Mrs. John Bradley (Florence Briggs Thayer)"
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,"Passenger Name: Braund, Mr. Owen Harris"


In [101]:
query = '''
select A.*, Age + 20 as New_Age, Fare * SibSp as New_Fare_01
from
(
    select * 
    from demo_user.titanic
) A
'''

titanic_addcol_df = DataFrame.from_query(query, index=True, index_label='PassengerId')
titanic_addcol_df.head(3)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,New_Age,New_Fare_01
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,46.0,0.0
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C,58.0,71.2833
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,42.0,7.25


In [102]:
# 아래와 같이 SQL을 이용할 수도 있습니다. 
query = '''
select top 3 A.*, Age + 20 as New_Age, Fare * SibSp as New_Fare_01
from
(
    select * 
    from demo_user.titanic
) A
'''

pd.read_sql(query, eng)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,New_Age,New_Fare_01
0,469,0,3,"Scanlan, Mr. James",male,,0,0,36209,7.725,,Q,,0.0
1,734,0,2,"Berriman, Mr. William John",male,23.0,0,0,28425,13.0,,S,43.0,0.0
2,265,0,3,"Henry, Miss. Delia",female,,0,0,382649,7.75,,Q,,0.0


#### DataFrame 컬럼 Drop
* Pandas DataFrame과 유사하게 drop() 메소드를 제공함. 다른 점은 inplace=True를 제공하지 않음. 
* terdrop() 메소드 수행 결과가 삭제된 컬럼을 반영한 DataFrame으로 반환됨. 기존 DataFrame은 변경하지 않음. 
* 만약 기존 DataFrame에 변경 적용하려면 반환 변수를 기존 DataFrame 객체 변수로 설정하면 됨. 
* drop() 메소드의 인자로 labels는 삭제될 컬럼명, axis는 0의 경우 index 레벨로, 1의 경우 column 레벨로 삭제. columns는 labels와 axis를 명시하지 않아도 무조건 column 단위로 삭제됨. 

In [None]:
# labels 인자값으로 drop() 수행. 
drop_titanic_df_01 = titanic_df.drop(labels=['Pclass', 'Survived'], axis=1)
drop_titanic_df_01.head(3)

In [None]:
# drop()은 컬럼 삭제 결과가 별도의 DataFrame으로 반환되므로 기존 titanic_df는 영향 받지 않음. 
titanic_df.head(3)

In [None]:
# columns 인자값으로 drop() 수행. 
drop_titanic_df_02 = titanic_df.drop(columns=['Pclass', 'Survived'])
drop_titanic_df_02.head(3)

In [None]:
# 만일 drop() 결과가 기존 DataFrame에 반영되고자 한다면 반환 변수를 기존 DataFrame 객체 변수로 설정하면 됨. 
titanic_df = titanic_df.drop(columns=['Pclass', 'Survived'])
titanic_df.head(3)

In [None]:
# 테스트를 위해서 다시 titanic_df DataFrame을 생성. 
titanic_df = DataFrame('titanic')
titanic_df.head(3)

### Null 처리
* Pandas에서 Missing value는 isna() 메소드를 이용하여 추출 가능
* Pandas의 Missing Value는 NA이며 NA는 None, NaN(Not a Number, Inf와 overflow등)임.
* Pandas DataFrame의 NaN은 teradataml DataFrame으로 변환 할 대 None으로 입력됨(Pandas None은 당연히 teradataml None으로 입력됨)
* teradataml의 DataFrame은 None이 곧 NA임(Teradata DB내에서는 None이 Null로 됨)
* pandas와 다르게 teradataml의 isna()는 DataFrame의 메소드가 아니라 SQLColumnExpression의 메소드임. 
* Pandas DataFrame의 isna()메소드는 전체 컬럼에서 Missing value(NA)를 추출할 수 있지만
* teradataml의 DataFrame은 특정 컬럼을 지정해서 isna()를 호출해야 함(SQL 기반이므로 where is null을 모든 컬럼들에 or 조건으로 연결하기에 부담) 
* teradataml의 isna()는 isnull()과 동일. notna()는 notnull()과 동일

In [None]:
# pandas DataFrame에서 isna()의 사용. 
pd_train_df.isna()

In [None]:
# 특정 column값이 Na 인 경우의 데이터 세트를 Boolean indexing으로 추출
pd_train_df[pd_train_df['Cabin'].isna()]

In [None]:
# terdataml DataFrame은 isna()메소드가 없으며 SQLColumnExpression의 isna()로 호출되어야 함. 
# 아래는 오류 발생 
titanic_df.isna()

In [None]:
# 아래와 같이 SQLColumnExpression에 isna()를 Boolean indexing 방식으로 적용해야함. 
# Cabin 컬럼은 DataFrame 레벨에서는 None 이지만, DB 레벨에서는 Null 인 레코드만 where cabin is null 조건으로 걸러냄
titanic_df[titanic_df.Cabin.isna() == True]

### DataFrame Sort
* 특정 컬럼 순으로 DataFrame을 정렬하기 위해서는 sort() 메소드를 활용
* sort() 는 원본 DataFrame을 변경하지 않으며 정렬된 DataFrame을 반환
* ascending=True이면 올림차순, False이면 내림차순

In [None]:
titanic_df.sort(columns="Name", ascending=True)

In [None]:
titanic_df.sort(columns=['Survived', 'Name'], ascending=True)

### DataFrame Group by
* pandas와 유사하게 groupby() 메소드를 지원
* groupby(groupby컬럼)을 수행하면 DataFrameGroupBy 객체를 반환
* DataFrame group by 보다는 SQL group by가 보다 편리
* pandas와는 조금 다른 groupby(). DataGrameGroupBy 객체에서 컬럼 선택시 반드시 group by 컬럼을 포함해야 함. 

In [None]:
titanic_df.count()

In [None]:
grp_df = titanic_df.groupby('Survived')
type(grp_df)

In [None]:
grp_df

In [None]:
# 모든 컬럼에 대해서 group by 컬럼값 레벨로 aggregation 함수인 count() 수행. 
grp_df.count()

In [None]:
# 모든 컬럼이 아닌 특정 컬럼들에 대해서만(group by 컬럼을 포함하지 않을 경우) aggregation 함수 적용이 안됨.
# 아래는 group by 컬럼인 Survived가 포함되지 않아 오류 발생
grp_df[['PassengerId', 'Name']].count()

In [None]:
# 반드시 group by 컬럼을 포함한 DataFrameGroupby 객체에 aggregation 함수를 적용해야 함. 
grp_df[['Survived', 'PassengerId', 'Name']].count()

In [None]:
# DataFrameGroupBy 객체의 모든 컬럼에 count()를 적용한 결과 DataFrame에서 원하는 컬럼만 추출. 
grp_df.count()[['Survived', 'count_PassengerId']]

In [None]:
# DataFrameGroupby에 agg()를 호출하여 여러개의 컬럼에 서로 다른 aggregation 함수 적용 가능
# agg()를 적용할 경우에는 반드시 group by 컬럼을 사용하지 않아도 됨. 
agg_format={'PassengerId':'count', 'Age':'max', 'SibSp':'sum', 'Fare':'mean'}
titanic_df.groupby('Survived').agg(agg_format)

In [None]:
# 아직 Pandas DataFrameGroupby와 같이 aggregation 적용 결과 컬럼명을 임의로 변경할 수는 없음. 
titanic_df.groupby(['Survived']).agg(age_max=('Age', 'max'), 
                                 age_mean=('Age', 'mean'), 
                                 fare_mean=('Fare', 'mean'))

### teradataml DataFrame을 CSV 파일 또는 Pandas DataFrame으로 변환
* csv 파일 저장은 to_csv() 메소드를, Pandas DataFrame 변환은 to_pandas() 메소드 호출

In [5]:
titanic_df.to_csv('titanic_unload.csv')


Data is successfully exported into titanic_unload.csv


In [6]:
# 반드시 all_rows를 True로 할당해야 전체 데이터가 저장됨. 그렇지 않으면 num_rows=99999 이하로 저장됨
titanic_load_pdf = titanic_df.to_pandas(all_rows=True)

In [7]:
import pandas as pd

# 대용량 데이터의 경우 pandas 전환 시 메모리 최적화 어려움. 
# 이 경우 csv로 unload 후 pandas의 read_csv()를 이용하되, 인자로 engine='c'를 부여
titanic_df.to_csv('titanic_unload.csv')
titanic_pdf_by_enginec = pd.read_csv('titanic_unload.csv', engine='c')


Data is successfully exported into titanic_unload.csv
