In [6]:
import polars as pl
import altair as alt       

research_ethics_system = pl.read_csv("data/4_KIOST_research_ethics_system.csv")

In [7]:
def count_y(name):
    return research_ethics_system.group_by(pl.col(name).alias("Y")).len().rename({"len":name})   

def count_list_y(columns):
    df = pl.DataFrame({"Y":[1, 2]})
    for col in columns:
        count = research_ethics_system.group_by(pl.col(col).alias("Y")).len().rename({"len":col})
        df = df.join(count, on="Y", how="left")
    return df

def make_chart(columns):
    charts = []
    for col in columns:
        chart = alt.Chart(research_ethics_system).mark_bar().encode(
            x=f"{col}:O",
            y=f"count(ID):N",
            color=f"{col}:O"
        ).properties(
            title=f"Distribution of {col}"
        )
        charts.append(chart)
    return alt.hconcat(*charts).resolve_scale(
        y='shared'  # Synchronize the y-axis scale across all charts
    )

In [13]:

# 가. KIOST의 연구윤리 관련 규정에 대한 인지 여부

# Loop through each column and create a bar chart
columns = ["Q21_1", "Q21_2", "Q21_3", "Q21_4", "Q21_5"]
system_recognition = count_list_y(columns)
system_recognition.write_csv("figure/4/A_system_recognition.csv")
chart = make_chart(columns).save("figure/4/A_system_recognition.png")

In [14]:
# 
# 나. KIOST의 연구윤리 규정의 내용 파악 정도
# 
columns = ["Q22_1", "Q22_2", "Q22_3", "Q22_4", "Q22_5"]
regulation_understanding = count_list_y(columns)
regulation_understanding.write_csv("figure/4/B_regulation_understanding.csv")
chart = make_chart(columns)
chart.save("figure/4/B_regulation_understanding.png")

In [15]:
# 
# 다. 국가연구개발혁신법 인지 여부
# 

columns = ["Q23"]

law_recognition = count_y("Q23")
law_recognition.write_csv("figure/4/C_law_recognition.csv")
chart = make_chart(columns)
chart.save("figure/4/C_law_recognition.png")

In [16]:
# 
# 라. 국가연구개발혁신법 파악 정도
# 

# Loop through each column and create a bar chart
columns = ["Q24_1", "Q24_2", "Q24_3", "Q24_4", "Q24_5"]
law_understanding = count_list_y(columns)
law_understanding.write_csv("figure/4/D_law_understanding.csv")
charts = make_chart(columns)
charts.save("figure/4/D_law_understanding.png")

In [18]:
# 
# 마. 연구부정행위 위반 인지
# 

columns = ["Q25_1", "Q25_2", "Q25_3", "Q25_4", "Q25_5", "Q25_6", "Q25_7", "Q25_8", "Q25_9", "Q25_10"]
violation_recognition = count_list_y(columns)
violation_recognition.write_csv("figure/4/E_violation_recognition.csv")

chart = make_chart(columns)
chart.save("figure/4/E_violation_recognition.png")

In [21]:
# 
# 바. 연구부정행위 위반 내용 보고 방법
# 

# 26. 귀하는 연구부정행위를 인지하게 되었을 때 KIOST 또는 외부의 관련 기관에 제보하는 방법을 알고 있습니까?
# 27. 귀하는 연구부정행위에 대한 제보가 접수된 후 어떻게 검증되는지 그 절차에 대해 알고 있습니까?
columns = ["Q26", "Q27"]
violation_understanding = count_list_y(columns)
violation_understanding.write_csv("figure/4/F_violation_understanding.csv")

charts = make_chart(columns)
charts.save("figure/4/F_violation_understanding.png")

# 28. 귀하는 KIOST가 연구부정행위에 대한 제보 접수 및 처리를 규정에 따라 잘하고 있다고 생각하십니까?
columns = ["Q28"]
process = count_list_y(columns)
process.write_csv("figure/4/G_process.csv")

charts = make_chart(columns)
charts.save("figure/4/G_process.png")

# 귀하는 KIOST가 연구부정행위에 대한 검증을 객관적이고 공정하게 하고 있다고 생각하십니까?
columns = ["Q29"]
fairness = count_list_y(columns)
fairness.write_csv("figure/4/H_fairness.csv")

charts = make_chart(columns)
charts.save("figure/4/H_fairness.png")

In [78]:
from attributes import *
from scipy import stats

#
# Chi-square test
#
personal_info = pl.read_csv("data/6_personal_info.csv")
recognition_stats = research_ethics_system.join(personal_info, on="ID")

def run_chi2_test(target_name):
    chi_analysis = {}
    for key, value in MAP_PERSON.items():
        target = recognition_stats.pivot(target_name, index=value, values=target_name, aggregate_function='count', sort_columns=True).fill_null(0).drop(value)
        t, p, dof, _ = stats.chi2_contingency(target.to_numpy())
        chi_analysis[key] = {f"{key}_t":t, f"{key}_p":p}
    return chi_analysis

q21 = ["Q21_2", "Q21_3", "Q21_4", "Q21_5"]

q21_1 = pl.from_dict(run_chi2_test("Q21_1")) \
                .unnest("sex") \
                .unnest("age") \
                .unnest("role") \
                .unnest("degree") \
                .unnest("experience") \
                .unnest("field")

for q in q21:
    new_df = pl.from_dict(run_chi2_test(q)) \
                .unnest("sex") \
                .unnest("age") \
                .unnest("role") \
                .unnest("degree") \
                .unnest("experience") \
                .unnest("field")
    q21_1 = q21_1.vstack(new_df)

q21_1.write_csv("figure/4/A_chi2.csv")

  target = recognition_stats.pivot(target_name, index=value, values=target_name, aggregate_function='count', sort_columns=True).fill_null(0).drop(value)
