In [29]:
import polars as pl
import altair as alt

# get data
df = pl.read_csv("data/2_research_ethic_recognition.csv")

In [48]:
# 
# 가. 연구자의 연구윤리 인식과 실천의 중요성 인식 
# Research ethic recognition
# 
rer_value_counts = df.select(pl.col("Q1").value_counts(sort=True))
ResearchEthicRecognition = rer_value_counts.unnest("Q1")

alt.Chart(ResearchEthicRecognition).mark_bar().encode(
    x="Q1",
    y="count"
).properties(
    title="Research ethic recognition"
)

In [51]:
# 
# 나. 연구자의 연구윤리 준수 수준 
# Research ethic compliance
# 
rec_value_counts = df.select(pl.col("Q2").value_counts(sort=True))
ResearchEthicCompliance = rec_value_counts.unnest("Q2")

alt.Chart(ResearchEthicCompliance).mark_bar().encode(
    x="Q2",
    y="count"
).properties(
    title="Research ethic compliance"
)

In [112]:
# 
# 다. 연구자의 연구윤리 준수에 미치는 영향 요인
# Research Ethic Compliance Influencing Factors
# 1) raw data processing
# 
research_ethic_education = df.select(pl.col("Q3_1").value_counts()).unnest("Q3_1")
research_ethic_broadcast = df.select(pl.col("Q3_2").value_counts()).unnest("Q3_2")
communication_mentoring = df.select(pl.col("Q3_3").value_counts()).unnest("Q3_3")
regulation_guideline = df.select(pl.col("Q3_4").value_counts()).unnest("Q3_4")
fraud_verification_sanctions = df.select(pl.col("Q3_5").value_counts()).unnest("Q3_5")
paper_conference_material = df.select(pl.col("Q3_6").value_counts()).unnest("Q3_6")

def check_zero_and_concat(dataframe, column_name, basis=5):
    if dataframe.shape[0] < basis:
        t = pl.DataFrame({column_name: [1], "count": [0]})
        t = t.with_columns(
            pl.col(column_name).cast(pl.Int64),
            pl.col("count").cast(pl.UInt32),
        )
        return dataframe.extend(t)
    return dataframe

research_ethic_education = check_zero_and_concat(research_ethic_education, "Q3_1")
research_ethic_broadcast = check_zero_and_concat(research_ethic_broadcast, "Q3_2")
communication_mentoring = check_zero_and_concat(communication_mentoring, "Q3_3")
regulation_guideline = check_zero_and_concat(regulation_guideline, "Q3_4")
fraud_verification_sanctions = check_zero_and_concat(fraud_verification_sanctions, "Q3_5")


# Rename columns to avoid conflicts during join
research_ethic_education = research_ethic_education.rename({"Q3_1": "value", "count": "education_count"})
research_ethic_broadcast = research_ethic_broadcast.rename({"Q3_2": "value", "count": "broadcast_count"})
communication_mentoring = communication_mentoring.rename({"Q3_3": "value", "count": "mentoring_count"})
regulation_guideline = regulation_guideline.rename({"Q3_4": "value", "count": "guideline_count"})
fraud_verification_sanctions = fraud_verification_sanctions.rename({"Q3_5": "value", "count": "sanctions_count"})
paper_conference_material = paper_conference_material.rename({"Q3_6": "value", "count": "conference_count"})

# Perform join operations
joined_df = research_ethic_education.join(research_ethic_broadcast, on="value", how="inner")
joined_df = joined_df.join(communication_mentoring, on="value", how="inner")
joined_df = joined_df.join(regulation_guideline, on="value", how="inner")
joined_df = joined_df.join(fraud_verification_sanctions, on="value", how="inner")
joined_df = joined_df.join(paper_conference_material, on="value", how="inner")
joined_df = joined_df.sort(by="value")
joined_df = joined_df.with_columns(pl.col("value").cast(pl.String))

ResearchEthicComplianceInfluencingFactors = joined_df.transpose(include_header=True, column_names="value")

In [123]:
# 
# 다. 연구자의 연구윤리 준수에 미치는 영향 요인
# Research Ethic Compliance Influencing Factors
# 2) get factors
# 

ResearchEthicComplianceInfluencingFactors = ResearchEthicComplianceInfluencingFactors.with_columns(
    summation = pl.col("1") + pl.col("2") + pl.col("3") + pl.col("4") + pl.col("5"),
    average = (pl.col("1")*1 + pl.col("2")*2 + pl.col("3")*3 + pl.col("4")*4 + pl.col("5")*5)/pl.col("summation"),
)

print(ResearchEthicComplianceInfluencingFactors)

shape: (6, 8)
┌──────────────────┬─────┬─────┬─────┬─────┬─────┬───────────┬──────────┐
│ column           ┆ 1   ┆ 2   ┆ 3   ┆ 4   ┆ 5   ┆ summation ┆ average  │
│ ---              ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ ---       ┆ ---      │
│ str              ┆ u32 ┆ u32 ┆ u32 ┆ u32 ┆ u32 ┆ u32       ┆ f64      │
╞══════════════════╪═════╪═════╪═════╪═════╪═════╪═══════════╪══════════╡
│ education_count  ┆ 2   ┆ 4   ┆ 22  ┆ 88  ┆ 51  ┆ 167       ┆ 4.08982  │
│ broadcast_count  ┆ 5   ┆ 5   ┆ 34  ┆ 86  ┆ 37  ┆ 167       ┆ 3.868263 │
│ mentoring_count  ┆ 0   ┆ 2   ┆ 11  ┆ 76  ┆ 78  ┆ 167       ┆ 4.377246 │
│ guideline_count  ┆ 0   ┆ 5   ┆ 14  ┆ 77  ┆ 71  ┆ 167       ┆ 4.281437 │
│ sanctions_count  ┆ 0   ┆ 1   ┆ 14  ┆ 93  ┆ 59  ┆ 167       ┆ 4.257485 │
│ conference_count ┆ 2   ┆ 4   ┆ 30  ┆ 92  ┆ 39  ┆ 167       ┆ 3.97006  │
└──────────────────┴─────┴─────┴─────┴─────┴─────┴───────────┴──────────┘


In [126]:
# 
# 라. KIOST 연구윤리 확립을 위한 자체 노력의 정도
# Effort for KIOST Research Ethic Establishment
# @@do chi-square test
# 
effort_value_counts = df.select(pl.col("Q5").value_counts()).unnest("Q5").sort(by="Q5")

shape: (5, 2)
┌─────┬───────┐
│ Q5  ┆ count │
│ --- ┆ ---   │
│ i64 ┆ u32   │
╞═════╪═══════╡
│ 1   ┆ 2     │
│ 2   ┆ 2     │
│ 3   ┆ 57    │
│ 4   ┆ 78    │
│ 5   ┆ 28    │
└─────┴───────┘
