## 함수리스트 생성

In [1]:
import pandas as pd
import inspect
from inspect import signature

In [2]:
df = pd.read_csv("./data/titanic.csv")

In [3]:
df.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

In [7]:
class UserDefinedFunction():
    def add(one, two:int=2):
        """두개의 인자 값을 더해주는 함수!
            꼭 2개의 인자를 넣어주세요."""
        return one+two

    def substr(one, num):
        """문자를 자르는 함수
            꼭 1개의 인자를 넣어주세요."""
        return one.str[:-num]

In [12]:
def get_functions(function):
    function_classes = function
    result_dict = {}
    for function_class in function_classes:
        #result_dict[eval(function_class + ".__module__")] = []
        method_list = [method for method in dir(function_class) if not method.startswith('__')]
        method_dict = {}

        for method in method_list:
            attr_dict = {}
            #print(method)
            #print(type(method))
            method = eval(function_class.__name__ + "." + method)
            print(method)
            print(type(method))
            
            sig = signature(method)
            
            attr_dict["parameters"] = []
            attr_dict["defaults"] = []
            #attr_dict["kind"] = []
            attr_dict["annotations"] = []
            
            for param in sig.parameters.values():
                attr_dict["parameters"].append(param.name)
                if 'empty' in str(param.default):
                    attr_dict["defaults"].append("empty")
                else:
                    attr_dict["defaults"].append(str(param.default))
                    
                if 'empty' in str(param.annotation):
                    attr_dict["annotations"].append("empty")
                else:
                    attr_dict["annotations"].append(str(param.annotation).split()[1][1:-2])
                #attr_dict["kind"].append(param.kind)
                    
            attr_dict["comment"] = inspect.getdoc(method)
            attr_dict["len"] = method.__code__.co_argcount
            #attr_dict["source"] = inspect.getsource(method)

            value_list = []
#             for value in list(method.__annotations__.values()):
#                 value_list.append(str(value).split()[1][1:-2])
#             attr_dict["annotations"] = value_list

            method_dict[method.__name__] = attr_dict

            result_dict[function_class.__name__] = method_dict
    return result_dict

In [13]:
get_functions([UserDefinedFunction])

<function UserDefinedFunction.add at 0x7f9106ee30d0>
<class 'function'>
<function UserDefinedFunction.substr at 0x7f9106ee3160>
<class 'function'>


{'UserDefinedFunction': {'add': {'parameters': ['one', 'two'],
   'defaults': ['empty', '2'],
   'annotations': ['empty', 'int'],
   'comment': '두개의 인자 값을 더해주는 함수!\n꼭 2개의 인자를 넣어주세요.',
   'len': 2},
  'substr': {'parameters': ['one', 'num'],
   'defaults': ['empty', 'empty'],
   'annotations': ['empty', 'empty'],
   'comment': '문자를 자르는 함수\n꼭 1개의 인자를 넣어주세요.',
   'len': 2}}}

In [684]:
def param_info(func):
    sig = signature(func)
    for param in sig.parameters.values():
        print(param.name)
        print(' -', param.default)
        print(' -', param.kind)
        print(' -', param.annotation)

In [679]:
param_info(UserDefinedFunction.add)

one
 - 1
 - POSITIONAL_OR_KEYWORD
 - <class 'str'>
two
 - 2
 - POSITIONAL_OR_KEYWORD
 - <class 'int'>


## .py 파일 불러오기 -> 추후 파일 안의 클래스 읽기 필요함

In [7]:
import os
 
path_dir = './function'
 
file_list = os.listdir(path_dir)

In [8]:
file_list = [file[:-3] for file in file_list if file.endswith("py")]

In [9]:
file_list

['read_function', 'user_defined_function', 'write_function']

## 고려 사항

In [None]:
df.dropna(subset=["Sex", "Name"])

input column
- 타입구분(int, float, boolean, str(object), datetime) 
- 복수(dropna) / 단수 구분 

input data
- 타입구분(int, float, boolean, str(object), datetime) -> 모두 str로 적용되므로 함수내에서 수정 필요
- 라디오 버튼, 직접입력, 콤보박스

output type
- 단일 컬럼(단순 연산, 치환, ...) -> 기존컬럼 or new
- 복수 컬럼(one-hot-encoding, concat 필요) -> new
- 컬럼이 없는 경우(dropna, fillna) -> no

- preview 버튼 생성 - o
- 모든 컬럼은 새롭게 추가되는 형식으로 
---
- parmeters에 df의 column 명만 들어갈 수 있음(숫자 사용 x)
- 복수의 컬럼에 적용하기 어려움
- 컬렴의 타입을 유추하기 어려움
---

- 컬럼 : 복수형이면서 int타입일 경우
- target column이 하나/new/없는 경우

---
- 데이터 미리보기 후 n개의 컬럼을 불러 올 때

## run(numpy 함수) 적용

In [12]:
import pandas as pd

In [13]:
df = pd.read_csv("./data/titanic.csv")

In [404]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [9]:
class UserDefinedRunFunction():
    def plus(ser1, ser2):
        """두개의 인자 값을 더해주는 함수!
            꼭 2개의 인자를 넣어주세요."""
        return ser1 + ser2

    def substr(ser):
        """문자를 자르는 함수
            꼭 1개의 인자를 넣어주세요."""
        return ser.str[:-2]
    
    # 파라미터들은 컬럼으로만 받기 때문에 num이라는 특정 숫자를 받지 못함
    def num_round(ser, num):
        return ser.round(num)
    
    def one_hot_encoding(ser):
        return pd.get_dummies(ser)
    
    # 복수 컬럼은 받기 어려운 구조
    def drop_na(ser):
        return 

In [10]:
get_functions(UserDefinedRunFunction)

{'UserDefinedRunFunction': {'drop_na': {'parameters': ['ser'],
   'defaults': ['empty'],
   'annotations': ['empty'],
   'comment': None,
   'len': 1},
  'num_round': {'parameters': ['ser', 'num'],
   'defaults': ['empty', 'empty'],
   'annotations': ['empty', 'empty'],
   'comment': None,
   'len': 2},
  'one_hot_encoding': {'parameters': ['ser'],
   'defaults': ['empty'],
   'annotations': ['empty'],
   'comment': None,
   'len': 1},
  'plus': {'parameters': ['ser1', 'ser2'],
   'defaults': ['empty', 'empty'],
   'annotations': ['empty', 'empty'],
   'comment': '두개의 인자 값을 더해주는 함수!\n꼭 2개의 인자를 넣어주세요.',
   'len': 2},
  'substr': {'parameters': ['ser'],
   'defaults': ['empty'],
   'annotations': ['empty'],
   'comment': '문자를 자르는 함수\n꼭 1개의 인자를 넣어주세요.',
   'len': 1}}}

In [14]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [15]:
path = "./data/titanic.csv"
function_name = "plus"
#apply_column = ["Age"]
parameters = ["PassengerId", "Survived"]
target_column = "Pclass"
new_column_name = ""

In [16]:
x = ""
for param in parameters:
            x += "df['{}'],".format(param)
x = x[:-1]
print(x)

df['PassengerId'],df['Survived']


In [17]:
if target_column == "new":
    user_function = "UserDefinedRunFunction." + function_name + "(" + x + ")"
    print(user_function)
    new_column = eval(user_function)
    df = pd.concat([df, new_column], axis=1)
else:
    user_function = "UserDefinedRunFunction." + function_name + "(" + x + ")"
    df[target_column] = eval(user_function)

In [18]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,1,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,3,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,4,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,5,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,5,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## apply 적용

In [132]:
df = pd.read_csv("./data/titanic.csv")

In [133]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [230]:
class UserDefinedApplyFunction():
    def changeSex2Num(col):
        if col == "male":
            return 1
        else:
            return 0
        
    # 파라미터들은 컬럼으로만 받기 때문에 num이라는 특정 숫자르 받지 못함
    def num_round(col, num):
        return col.round(num)
    
    def one_hot_encoding(col):
        return pd.get_dummies(col)

In [231]:
get_functions(UserDefinedApplyFunction)

{'UserDefinedApplyFunction': {'changeSex2Num': {'parameters': ['col'],
   'comment': None,
   'len': 1,
   'defaults': [],
   'annotations': []},
  'num_round': {'parameters': ['col', 'num'],
   'comment': None,
   'len': 2,
   'defaults': [],
   'annotations': []},
  'one_hot_encoding': {'parameters': ['col'],
   'comment': None,
   'len': 1,
   'defaults': [],
   'annotations': []}}}

In [232]:
path = "./data/titanic.csv"
function_name = "changeSex2Num"
#apply_column = ["Age"]
parameters = ["Sex"]
target_column = "Sex"
new_column_name = ""

In [233]:
user_function = eval("UserDefinedApplyFunction."+function_name)

In [234]:
df[target_column] = df[target_column].apply(user_function)

In [235]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",1,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",0,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",1,35.0,0,0,373450,8.05,,S


In [219]:
df.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
female           uint8
male             uint8
female           uint8
male             uint8
female           uint8
male             uint8
female           uint8
male             uint8
dtype: object

In [218]:
df["Ticket"].dtypes

dtype('O')

In [220]:
str(df["Ticket"].dtypes)

'object'

## apply 적용2

In [470]:
from pandas.core.series import Series
from pandas.core.frame import DataFrame

In [412]:
df = pd.read_csv("./data/titanic.csv")

In [413]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


### 숫자 parameter 테스트

In [460]:
type(df["Name"])

pandas.core.series.Series

In [511]:
class UserDefinedApplyFunction2():
    def plus(df, col1:Series, col2:Series):
        """두개의 인자 값을 더해주는 함수!
            꼭 2개의 인자를 넣어주세요."""
        return df[col1] + df[col2]

    # columns을 던질때 복수형이면서 int형은 어떻게 처리할 것 인지
    def substr(df, col:Series=[], num:int=int):
        """문자를 자르는 함수
            꼭 1개의 인자를 넣어주세요."""
        return df[col][:-int(num)]
    
#     def one_hot_encoding(df, col):
#         return pd.get_dummies(df[col])

In [510]:
get_functions(UserDefinedApplyFunction2)

{'UserDefinedApplyFunction2': {'plus': {'parameters': ['df', 'col1', 'col2'],
   'comment': '두개의 인자 값을 더해주는 함수!\n            꼭 2개의 인자를 넣어주세요.',
   'len': 3,
   'defaults': [],
   'annotations': ['pandas.core.series.Series', 'pandas.core.series.Series']},
  'substr': {'parameters': ['df', 'col', 'num'],
   'comment': '문자를 자르는 함수\n            꼭 1개의 인자를 넣어주세요.',
   'len': 3,
   'defaults': [[], int],
   'annotations': ['pandas.core.series.Series', 'int']}}}

In [489]:
path = "./data/titanic.csv"
function_name = "substr"
#apply_column = ["Age"]
parameters = ["Sex", "2"]
target_column = "Pclass"
new_column_name = ""

In [490]:
x = ""
for param in parameters:
            x += "'{}',".format(param)
#x = x[:-1]
print(x)

'Sex','2',


In [491]:
#df.apply(UserDefinedApplyFunction.plus, args=("PassengerId", "Survived", ), axis=1)
user_function =  "df.apply(UserDefinedApplyFunction2." + function_name + ", args=(" + x +"), axis=1)"
user_function

"df.apply(UserDefinedApplyFunction2.substr, args=('Sex','2',), axis=1)"

In [492]:
eval(user_function)

0        ma
1      fema
2      fema
3      fema
4        ma
       ... 
886      ma
887    fema
888    fema
889      ma
890      ma
Length: 891, dtype: object

In [None]:
if target_column == "new":
    user_function = "UserDefinedRunFunction." + function_name + "(" + x + ")"
    print(user_function)
    new_column = eval(user_function)
    df = pd.concat([df, new_column], axis=1)
else:
    user_function = "UserDefinedRunFunction." + function_name + "(" + x + ")"
    df[target_column] = eval(user_function)

## run 적용2

In [584]:
df = pd.read_csv("./data/titanic.csv")

In [585]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [591]:
class UserDefinedRunFunction2():
    
    def one_hot_encoding(df, col):
        print(id(df))
        return pd.get_dummies(df[col])
    
    def dropna(df, cols):
        cols.split
        return df.dropna(subset=cols)
    

### one-hot-encoding

In [571]:
path = "./data/titanic.csv"
function_name = "one_hot_encoding"
#apply_column = ["Age"]
parameters = ["Sex"]
target_column = "new"
new_column_name = ""

In [572]:
x = ""
for param in parameters:
            x += "'{}',".format(param)
x = "df, " + x[:-1]
print(x)

df, 'Sex'


In [573]:
user_function = "UserDefinedRunFunction2." + function_name + "(" + x + ")"

In [574]:
user_function

"UserDefinedRunFunction2.one_hot_encoding(df, 'Sex')"

In [575]:
eval(user_function)

1954142546096


Unnamed: 0,female,male
0,0,1
1,1,0
2,1,0
3,1,0
4,0,1
...,...,...
886,0,1
887,1,0
888,1,0
889,0,1


output의 타입을 받아오는 곳이 없음. new/target/no

In [588]:
if target_column == "new":
    user_function = "UserDefinedRunFunction2." + function_name + "(" + x + ")"
    print(user_function)
    new_column = eval(user_function)
    df = pd.concat([df, new_column], axis=1)
else:
    user_function = "UserDefinedRunFunction2." + function_name + "(" + x + ")"
    df[target_column] = eval(user_function)

UserDefinedRunFunction2.one_hot_encoding(df, 'Sex')
1954142546000


### dropna

In [19]:
class UserDefinedRunFunction2():
    
    def one_hot_encoding(df, col):
        print(id(df))
        return pd.get_dummies(df[col])
    
    def dropna(df, cols):
        # df.dropna(subset = ["Sex", "Cabin"], thresh=2400, how='any')
        return df.dropna(subset=cols, axis=1)

In [20]:
path = "./data/titanic.csv"
function_name = "dropna"
#apply_column = ["Age"]
parameters = [["Sex", "Cabin"] ]
target_column = "null"
new_column_name = ""

In [22]:
x = ""
for param in parameters:
            x += "'{}',".format(param)
x = "df, " + x[:-1]
print(x)

df, '['Sex', 'Cabin']'


list가 string으로 변환되어 실행이 되지 않음

In [24]:
user_function = "UserDefinedRunFunction2." + function_name + "(" + x + ")"
user_function

"UserDefinedRunFunction2.dropna(df, '['Sex', 'Cabin']')"

In [25]:
UserDefinedRunFunction2.dropna(df, ['Sex', 'Cabin'])

KeyError: ['Sex', 'Cabin']

In [598]:
if target_column == "new":
    user_function = "UserDefinedRunFunction2." + function_name + "(" + x + ")"
    print(user_function)
    new_column = eval(user_function)
    df = pd.concat([df, new_column], axis=1)
elif target_column == "null":
    user_function "UserDefinedRunFunction2." + function_name + "(" + x + ")"
else:
    user_function = "UserDefinedRunFunction2." + function_name + "(" + x + ")"
    df[target_column] = eval(user_function)

IndentationError: expected an indented block (<ipython-input-598-d934b7d08df5>, line 7)

In [601]:
a = 1

In [602]:
b = a

In [603]:
id(a) == id(b)

True

### Scaler

In [608]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

In [631]:
class UserDefinedRunFunction2():
    
    def one_hot_encoding(df, col):
        print(id(df))
        return pd.get_dummies(df[col])
    
    def dropna(df, cols):
        # df.dropna(subset = ["Sex", "Cabin"], thresh=2400, how='any')
        return df.dropna(subset=cols, axis=1)
    
    def scaling(df, how:list = ["minmax", "standard"]):
        if how == "minmax":
            scaler = MinMaxScaler
            return scaler.fit_transform(df)
        elif how == "standard":
            scaler = StandardScaler
            return scaler.fit_transform(df)

In [632]:
get_functions(UserDefinedRunFunction2)

{'UserDefinedRunFunction2': {'dropna': {'parameters': ['df', 'cols'],
   'comment': None,
   'len': 2,
   'defaults': [],
   'annotations': []},
  'one_hot_encoding': {'parameters': ['df', 'col'],
   'comment': None,
   'len': 2,
   'defaults': [],
   'annotations': []},
  'scaling': {'parameters': ['df', 'how', 'scaler'],
   'comment': None,
   'len': 2,
   'defaults': [['minmax', 'standard']],
   'annotations': ['list']}}}