In [144]:
from attributes import *
import polars as pl
import altair as alt

# get data
df = pl.read_csv("data/2_research_ethic_recognition.csv")

In [145]:
def basic_analysis(name):
    count = df.group_by(name).len().sort(name)
    proportion = count.with_columns(
        pct = pl.col("len") / pl.col("len").sum() * 100
    )
    mean = df.select(mean = pl.col(name).mean())
    std = df.select(std = pl.col(name).std())
    

In [146]:
def draw_bar_plot(df, x, y, title, MAP, rotate=True, legend=True):

    bar_x = alt.X(x, title=title, axis=None)
    bar_x = bar_x if MAP is None else bar_x.sort(MAP.values())

    txt_x = alt.X(x, sort=MAP.values()) if MAP is not None else alt.X(x)
    lgd_x = alt.Legend(orient='bottom', direction='horizontal')
    color = alt.Color(x, title=None, legend=lgd_x) if legend else alt.Color(x, title=None, legend=None)
    color = color.sort(MAP.values()) if MAP is not None else color

    bar = df.plot.bar().encode(
        x=bar_x,
        y=alt.Y(y, title=None),
        color=color,
    ).properties(
        title=title,
        width=400,
    )

    text = bar.mark_text(
        align="center",
        baseline="bottom",
    ).encode(
        x=txt_x,
        y=y,
        text=y,
        color=alt.value("black"),
    )
    return bar + text

In [147]:
def get(title, _2022, _2023, importance):
    _2024 = df.group_by(title).len().sort(title)

    if _2024[title].len() < len(importance):
        extending = pl.DataFrame({
            title: [1],
            "len": [0]
        }).cast(pl.Schema({ "len": pl.UInt32()}))
        _2024 = _2024.extend(extending)

    _2024 = _2024.rename({"len": "2024"}).sort(title)
    _2024 = _2024.with_columns(
        pct2024 = pl.col("2024") / pl.col("2024").sum() * 100,
        mean2024 = (pl.col(title) * pl.col("2024")).sum() / _2024["2024"].sum(),
    )
    _2024 = _2024.with_columns(
        std2024 = (((pl.col(title) - _2024["mean2024"]) ** 2 * pl.col("2024")).sum() / pl.col("2024").sum()).sqrt(),
    )

    all = _2022.join(_2023, on=title, how="left").join(_2024, on=title, how="left")
    all = all.with_columns(
        total = pl.col("2022") + pl.col("2023") + pl.col("2024")
    )

    all = all.with_columns(
        pctAll = all["total"] / all["total"].sum() * 100,
        meanAll = (pl.col(title) * pl.col("total")).sum() / all["total"].sum(),
    )
    all = all.with_columns(
        stdAll = (((pl.col(title) - all["meanAll"]) ** 2 * pl.col("total")).sum() / pl.col("total").sum()).sqrt(),
    )
    return all

In [148]:
# 
# 가. 연구자의 연구윤리 인식과 실천의 중요성 인식 
# 
title = "연구윤리 인식 및 실천의 중요성 인식"

importance = [1, 2, 3, 4, 5]

# 1) counting
_2022 = pl.DataFrame({
    "Q1": importance,
    "2022": [9, 0, 4, 47, 103],
})

_2023 = pl.DataFrame({
    "Q1": importance,
    "2023": [0, 0, 2, 56, 112],
})

# basic_analysis("Q1")
get("Q1", _2022, _2023, importance)

Q1,2022,2023,2024,pct2024,mean2024,std2024,total,pctAll,meanAll,stdAll
i64,i64,i64,u32,f64,f64,f64,i64,f64,f64,f64
1,9,0,0,0.0,4.646707,0.536989,9,1.8,4.58,0.709648
2,0,0,1,0.598802,4.646707,0.536989,1,0.2,4.58,0.709648
3,4,2,2,1.197605,4.646707,0.536989,8,1.6,4.58,0.709648
4,47,56,52,31.137725,4.646707,0.536989,155,31.0,4.58,0.709648
5,103,112,112,67.065868,4.646707,0.536989,327,65.4,4.58,0.709648


In [149]:
# # 
# # 나. 연구자의 연구윤리 준수 수준 
# # 

# # 1) counting
# _2022 = pl.DataFrame({
#     "Q2": importance,
#     "2022": [1, 5, 42, 89, 26],
# })

# _2023 = pl.DataFrame({
#     "Q2": importance,
#     "2023": [0, 6, 55, 75, 34],
# })

# get("Q2", _2022, _2023)

In [150]:
# 
# 다. 연구자의 연구윤리 준수 영향 요인별
# 
print("연구윤리교육")
importance = [1, 2, 3, 4, 5]

title ="Q3_1"

# 1) counting
_2022 = pl.DataFrame({
    title: importance,
    "2022": [1, 2, 33, 88, 39],
})

_2023 = pl.DataFrame({
    title: importance,
    "2023": [2, 4, 38, 90, 36],
})

get(title, _2022, _2023, importance)

연구윤리교육


Q3_1,2022,2023,2024,pct2024,mean2024,std2024,total,pctAll,meanAll,stdAll
i64,i64,i64,u32,f64,f64,f64,i64,f64,f64,f64
1,1,2,2,1.197605,4.08982,0.795394,5,1.0,3.996,0.779733
2,2,4,4,2.39521,4.08982,0.795394,10,2.0,3.996,0.779733
3,33,38,22,13.173653,4.08982,0.795394,93,18.6,3.996,0.779733
4,88,90,88,52.694611,4.08982,0.795394,266,53.2,3.996,0.779733
5,39,36,51,30.538922,4.08982,0.795394,126,25.2,3.996,0.779733


In [151]:
# 
# 다. 연구자의 연구윤리 준수 영향 요인별
# 
print("연구윤리 관련 보도")
importance = [1, 2, 3, 4, 5]

title ="Q3_2"

# 1) counting
_2022 = pl.DataFrame({
    title: importance,
    "2022": [1, 5, 42, 89, 26],
})

_2023 = pl.DataFrame({
    title: importance,
    "2023": [0, 6, 55, 75, 34],
})

get(title, _2022, _2023, importance)

연구윤리 관련 보도


Q3_2,2022,2023,2024,pct2024,mean2024,std2024,total,pctAll,meanAll,stdAll
i64,i64,i64,u32,f64,f64,f64,i64,f64,f64,f64
1,1,0,5,2.994012,3.868263,0.892758,6,1.2,3.832,0.814724
2,5,6,5,2.994012,3.868263,0.892758,16,3.2,3.832,0.814724
3,42,55,34,20.359281,3.868263,0.892758,131,26.2,3.832,0.814724
4,89,75,86,51.497006,3.868263,0.892758,250,50.0,3.832,0.814724
5,26,34,37,22.155689,3.868263,0.892758,97,19.4,3.832,0.814724


In [152]:
# 
# 다. 연구자의 연구윤리 준수 영향 요인별
# 
print("연구자 간 멘토링")
importance = [1, 2, 3, 4, 5]

title ="Q3_3"

# 1) counting
_2022 = pl.DataFrame({
    title: importance,
    "2022": [1, 1, 13, 81, 67],
})

_2023 = pl.DataFrame({
    title: importance,
    "2023": [0, 3, 18, 8, 65],
})

_2024 = df.group_by(title).len().sort(title)

get(title, _2022, _2023, importance)

연구자 간 멘토링


Q3_3,2022,2023,2024,pct2024,mean2024,std2024,total,pctAll,meanAll,stdAll
i64,i64,i64,u32,f64,f64,f64,i64,f64,f64,f64
1,1,0,0,0.0,4.377246,0.662212,1,0.235849,4.360849,0.736178
2,1,3,2,1.197605,4.377246,0.662212,6,1.415094,4.360849,0.736178
3,13,18,11,6.586826,4.377246,0.662212,42,9.90566,4.360849,0.736178
4,81,8,76,45.508982,4.377246,0.662212,165,38.915094,4.360849,0.736178
5,67,65,78,46.706587,4.377246,0.662212,210,49.528302,4.360849,0.736178


In [153]:
# 
# 다. 연구자의 연구윤리 준수 영향 요인별
# 
print("관련 규정 및 지침")

title ="Q3_4"

# 1) counting
_2022 = pl.DataFrame({
    title: importance,
    "2022": [1, 2, 27, 90, 43],
})

_2023 = pl.DataFrame({
    title: importance,
    "2023": [0, 1, 24, 86, 59],
})

_2024 = df.group_by(title).len().sort(title)

get(title, _2022, _2023, importance)

관련 규정 및 지침


Q3_4,2022,2023,2024,pct2024,mean2024,std2024,total,pctAll,meanAll,stdAll
i64,i64,i64,u32,f64,f64,f64,i64,f64,f64,f64
1,1,0,0,0.0,4.281437,0.741307,1,0.2,4.178,0.725476
2,2,1,5,2.994012,4.281437,0.741307,8,1.6,4.178,0.725476
3,27,24,14,8.383234,4.281437,0.741307,65,13.0,4.178,0.725476
4,90,86,77,46.107784,4.281437,0.741307,253,50.6,4.178,0.725476
5,43,59,71,42.51497,4.281437,0.741307,173,34.6,4.178,0.725476


In [154]:
# 
# 다. 연구자의 연구윤리 준수 영향 요인별
# 
print("부정행위 검증 및 제재")

title ="Q3_5"

# 1) counting
_2022 = pl.DataFrame({
    title: importance,
    "2022": [1, 1, 28, 71, 62],
})

_2023 = pl.DataFrame({
    title: importance,
    "2023": [0, 2, 24, 77, 67],
})

_2024 = df.group_by(title).len().sort(title)

get(title, _2022, _2023, importance)

부정행위 검증 및 제재


Q3_5,2022,2023,2024,pct2024,mean2024,std2024,total,pctAll,meanAll,stdAll
i64,i64,i64,u32,f64,f64,f64,i64,f64,f64,f64
1,1,0,0,0.0,4.257485,0.628315,1,0.2,4.222,0.713243
2,1,2,1,0.598802,4.257485,0.628315,4,0.8,4.222,0.713243
3,28,24,14,8.383234,4.257485,0.628315,66,13.2,4.222,0.713243
4,71,77,93,55.688623,4.257485,0.628315,241,48.2,4.222,0.713243
5,62,67,59,35.329341,4.257485,0.628315,188,37.6,4.222,0.713243


In [155]:
# 
# 라. KIOST에 대한 인식
# 
print("KIOST의 연구부정행위 제보 방법 인식 여부")

df = pl.read_csv("data/4_KIOST_research_ethics_system.csv")

title ="Q26"
importance = [1, 2]

# 1) counting
_2022 = pl.DataFrame({
    title: importance,
    "2022": [74, 89],
})

_2023 = pl.DataFrame({
    title: importance,
    "2023": [80, 90],
})

_2024 = df.group_by(title).len().sort(title)

get(title, _2022, _2023, importance)


KIOST의 연구부정행위 제보 방법 인식 여부


Q26,2022,2023,2024,pct2024,mean2024,std2024,total,pctAll,meanAll,stdAll
i64,i64,i64,u32,f64,f64,f64,i64,f64,f64,f64
1,74,80,87,52.095808,1.479042,0.499561,241,48.2,1.518,0.499676
2,89,90,80,47.904192,1.479042,0.499561,259,51.8,1.518,0.499676


In [156]:
# 
# 라. KIOST에 대한 인식
# 
# 2) KIOST의 연구부정행위 제보 접수 및 처리 절차 인지
print("KIOST의 연구부정행위 제보 방법 인식 여부")

title ="Q28"
importance = [1, 2, 3, 4, 5]

# 1) counting
_2022 = pl.DataFrame({
    title: importance,
    "2022": [4, 14, 88, 44, 13],
})

_2023 = pl.DataFrame({
    title: importance,
    "2023": [7, 19, 78, 51, 15],
})

_2024 = df.group_by(title).len().sort(title)

get(title, _2022, _2023, importance)


KIOST의 연구부정행위 제보 방법 인식 여부


Q28,2022,2023,2024,pct2024,mean2024,std2024,total,pctAll,meanAll,stdAll
i64,i64,i64,u32,f64,f64,f64,i64,f64,f64,f64
1,4,7,0,0.0,3.54491,0.859652,11,2.2,3.374,0.879843
2,14,19,15,8.982036,3.54491,0.859652,48,9.6,3.374,0.879843
3,88,78,72,43.113772,3.54491,0.859652,238,47.6,3.374,0.879843
4,44,51,54,32.335329,3.54491,0.859652,149,29.8,3.374,0.879843
5,13,15,26,15.568862,3.54491,0.859652,54,10.8,3.374,0.879843
