In [1]:
import pandas as pd
from rich import print

import os

if not os.path.exists("images"):
    os.mkdir("images")
if not os.path.exists("htmls"):
    os.mkdir("htmls")
if not os.path.exists("suggestions"):
    os.mkdir("suggestions")

img_folder = "images"
html_folder = "htmls"
suggestion_folder = "suggestions"

In [2]:
df_en = pd.read_excel("./survey_1.xlsx", header=0)

In [3]:
df_en = df_en.drop(['Timestamp'], axis=1)
column_names_en = list(df_en.columns.values)

In [4]:
df_zh = pd.read_excel("./survey_2.xlsx", header=0)

In [5]:
df_zh = df_zh.drop(['序号', '提交答卷时间', '所用时间', '来源', '来源详情','来自IP'], axis=1)
column_names_zh = list(df_zh.columns.values)

In [6]:
tail_df_zh = df_zh.iloc[:, 13:]
df_zh = df_zh.drop(column_names_zh[13:], axis=1)
half = int(len(df_zh.index) / 2)
df_zh['Q14: Knowledge Graph'] = pd.Series(['Yes'] * half + ['No'] * (len(df_zh.index) - half), index=df_zh.index)
df_zh = pd.concat([df_zh, tail_df_zh], axis=1)
column_names_zh = list(df_zh.columns.values)

In [7]:
# En
print(column_names_en)
# Zh
print(column_names_zh)

In [8]:
# Unified dataframe
df = pd.DataFrame()

## Question 1 - Interviewee role distribution

In [9]:
def get_col_df(idx: int):
    return df_en[column_names_en[idx]], df_zh[column_names_zh[idx]]

In [10]:
def naive_value_counts(idx: int):
    tmp_df_en, tmp_df_zh = get_col_df(idx)
    print("==== English Survey ====")
    value_counts_en = tmp_df_en.value_counts()
    print(value_counts_en)
    print("==== Mandarin Survey ====")
    value_counts_zh = tmp_df_zh.value_counts()
    print(value_counts_zh)
    
    return value_counts_en.to_dict(), value_counts_zh.to_dict()

In [11]:
question_no = 1
role_options_en, role_options_zh = naive_value_counts(idx=question_no - 1)
q1_df_en, q1_df_zh = get_col_df(idx=question_no - 1)

In [12]:
from pandas import DataFrame

def unify_values_in_en(local_df: DataFrame, info_dict: dict, new_role_options: list[str]):
    role_matcher = {
        list(info_dict.keys())[i] : new_role_options[i]
        for i in range(len(new_role_options))
    }
    output_series = local_df.map(lambda x: role_matcher[x])
    
    return output_series

In [13]:
new_role_options_en = ["undergraduate", "postgraduate", "other", "researcher"]
df_en_q1 = unify_values_in_en(
    local_df=q1_df_en,
    info_dict=role_options_en,
    new_role_options=new_role_options_en
)

In [14]:
new_role_options_zh = ["postgraduate", "undergraduate", "researcher", "other"]
df_zh_q1 = unify_values_in_en(
    local_df=q1_df_zh,
    info_dict=role_options_zh,
    new_role_options=new_role_options_zh
)

In [15]:
df['q1'] = pd.concat([df_en_q1, df_zh_q1], axis=0)
df.head()

Unnamed: 0,q1
0,postgraduate
1,undergraduate
2,postgraduate
3,other
4,undergraduate


In [16]:
import plotly.graph_objects as go

def form_pie_chart(labels: list[str], values: list, title: str):
    go_pie = go.Pie(labels=labels, values=values)
    fig = go.Figure(data=[go_pie])
    fig.update(layout_title_text=title)
    fig.show()
    
    fig.write_image(f"{img_folder}/q{question_no}.pdf")
    fig.write_html(f"{html_folder}/q{question_no}.html")

In [17]:
q1_text = f"Q{question_no}: Which role best describes you?"
q1_data = df.q1.value_counts().to_dict()
form_pie_chart(
    labels=list(q1_data.keys()), 
    values=list(q1_data.values()),
    title=q1_text
)

## Question 2 - Interests of Academic news

In [18]:
question_no = 2
options_q2_en, options_q2_zh = naive_value_counts(idx=question_no - 1)
df_q2_en, df_q2_zh = get_col_df(idx=question_no - 1)

In [19]:
new_q2_options_en = [True, False]
unified_q2_en = unify_values_in_en(
    local_df=df_q2_en,
    info_dict=options_q2_en,
    new_role_options=new_q2_options_en
)

In [20]:
new_q2_options_zh = [True, False]
unified_q2_zh = unify_values_in_en(
    local_df=df_q2_zh,
    info_dict=options_q2_zh,
    new_role_options=new_q2_options_zh
)

In [21]:
df['q2'] = pd.concat([unified_q2_en, unified_q2_zh], axis=0)
df.head()

Unnamed: 0,q1,q2
0,postgraduate,True
1,undergraduate,True
2,postgraduate,True
3,other,False
4,undergraduate,True


In [22]:
q2_text = f"Q{question_no}: Are you interested in the latest academic progress and may <br> check the news occasionally?"
q2_data = df.q2.value_counts().to_dict()
form_pie_chart(
    labels=list(q2_data.keys()), 
    values=list(q2_data.values()),
    title=q2_text
)

In [23]:
def print_next_question():
    if question_no < len(column_names_en):
        print(f"Next Question: {column_names_en[question_no]}")
        
print_next_question()

## Question 3 - What are your primary sources to get information from?

In [24]:
def fullwidth_to_halfwidth(s: str) -> str:
    """Converts full-width characters to half-width characters."""
    result = []
    for char in s:
        code_point = ord(char)
        if 0xFF01 <= code_point <= 0xFF5E:  # FF01-FF5E is the range for full-width characters.
            code_point -= 0xFEE0
        result.append(chr(code_point))
    ret = ''.join(result)
    return ret

In [25]:
import re

def other_regex(text: str):
    # 使用非贪婪匹配来找到〖和〗之间的内容
    matcher = re.search(r'〖(.*?)〗', text)
    if matcher:
        content = matcher.group(1)
        print(content)
        return content.split(",")
    return text

def remove_brackets(tmp: str, split_str: str) -> str:
    fast, slow = 0, 0
    cur = []
    while slow < len(tmp):
        if tmp[slow] == "(":
            fast = slow + 1
            while tmp[fast] != ")": fast += 1
            slow = fast + 1
            continue
        cur.append(tmp[slow])
        slow += 1
    multi = "".join(cur)
    if split_str == ", ": ret = [elem if elem[-1] != " " else elem[:-1] for elem in multi.split(split_str)]
    else:
        tmp_ret = [
            elem.removeprefix("其他〖") \
                .removeprefix("都没有!若有其他请填空〖") \
                .removesuffix("〗").split(",")
            for elem in multi.split(split_str)
        ]
        ret = []
        for elem in tmp_ret:
            ret.extend(elem)
    return ret

In [26]:
def simplify(s: str, split_s: str = ", ") -> str:
    reformat_str = fullwidth_to_halfwidth(s)
    reduced_str = remove_brackets(tmp=reformat_str, split_str=split_s)
    return reduced_str

def get_value_count_dict(local_df: DataFrame, lang: str = "en"):
    anull_ps = local_df.loc[(local_df.isnull() != True) & (local_df != "(跳过)")]
    if lang == "en": simplified_ps = anull_ps.map(lambda val: simplify(val)).explode()
    else: simplified_ps = anull_ps.map(lambda val: simplify(val, split_s="┋")).explode()
    return simplified_ps.value_counts()

def multi_choice_value_counts(idx: int):
    local_df_en, local_df_zh = get_col_df(idx=idx)
    print("==== English Survey ====")
    value_counts_en = get_value_count_dict(local_df=local_df_en)
    print(value_counts_en)
    print("==== Mandarin Survey ====")
    value_counts_zh = get_value_count_dict(local_df=local_df_zh, lang="zh")
    print(value_counts_zh)
    
    return value_counts_en.to_dict(), value_counts_zh.to_dict()

In [27]:
question_no = 3
df_q3_en, df_q3_zh = get_col_df(idx=question_no - 1)
options_q3_en, options_q3_zh = multi_choice_value_counts(idx=question_no - 1)

In [28]:
new_q3_options_en = [
    "Google Recommendation", "LinkedIn", "Twitter", "Subscription",
    "Stackoverflow", "Zhihu", "Weibo", "Quora", "Article", 
    "Medium", "Tiktok","Local news channels","Google search"
]
q3_buckets_en = {
    new_q3_options_en[idx]: list(options_q3_en.items())[idx][1]
    for idx in range(len(new_q3_options_en))
}

In [29]:
new_q3_options_zh = [
    "Zhihu", "Subcription", "Twitter", "Weibo",
    "Stackoverflow", "LinkedIn", "Quora", "Others", 
    "Hacker News", "Bilibili", "Arxiv", "Sci-Hub", "Wechat",
    "Nature", "Science", "ResearchGate"
]
q3_buckets_zh = {
    new_q3_options_zh[idx]: list(options_q3_zh.items())[idx][1]
    for idx in range(len(options_q3_zh))
}

In [30]:
q3_bucket = q3_buckets_en.copy()

for key in q3_buckets_zh.keys():
    if key in q3_bucket.keys():
        tmp = q3_bucket[key]
        q3_bucket[key] = tmp + q3_buckets_zh[key]
    else:
        q3_bucket[key] = q3_buckets_zh[key]

In [31]:
q3_text = f"Q{question_no}: What are your primary sources to get information from?"
q3_data = q3_bucket.copy()
form_pie_chart(
    labels=list(q3_data.keys()), 
    values=list(q3_data.values()),
    title=q3_text
)

In [32]:
print_next_question()

## Question 4 - Are you interested in a platform for gathering and presenting up-to-date academic progress?

In [33]:
df_en = df_en.fillna(value={column_names_en[3] : "No"})

In [34]:
question_no = 4
options_q4_en, options_q4_zh = naive_value_counts(idx=question_no - 1)
df_q4_en, df_q4_zh = get_col_df(idx=question_no - 1)

In [35]:
new_q4_options_en = [True, False]
unified_q4_en = unify_values_in_en(
    local_df=df_q4_en,
    info_dict=options_q4_en,
    new_role_options=new_q4_options_en
)

In [36]:
new_q4_options_zh = [True, False, False]
unified_q4_zh = unify_values_in_en(
    local_df=df_q4_zh,
    info_dict=options_q4_zh,
    new_role_options=new_q4_options_zh
)

In [37]:
df[f'q{question_no}'] = pd.concat([unified_q4_en, unified_q4_zh], axis=0)
df.head()

Unnamed: 0,q1,q2,q4
0,postgraduate,True,True
1,undergraduate,True,True
2,postgraduate,True,True
3,other,False,False
4,undergraduate,True,True


In [38]:
q4_text = f"Q{question_no}: Are you interested in a platform for gathering and presenting <br> up-to-date academic progress?"
q4_data = df.q4.value_counts().to_dict()
form_pie_chart(
    labels=list(q4_data.keys()), 
    values=list(q4_data.values()),
    title=q4_text
)

In [39]:
print_next_question()

## Question 5 - Say you have found a piece of academic news or paper interesting. Would you like to know more about related news/ paper?

In [40]:
df_en = df_en.fillna(value={column_names_en[4] : "No"})

In [41]:
question_no = 5
options_q5_en, options_q5_zh = naive_value_counts(idx=question_no - 1)
df_q5_en, df_q5_zh = get_col_df(idx=question_no - 1)

In [42]:
new_q5_options_en = [True, False]
unified_q5_en = unify_values_in_en(
    local_df=df_q5_en,
    info_dict=options_q5_en,
    new_role_options=new_q5_options_en
)

In [43]:
new_q5_options_zh = [True, False, False]
unified_q5_zh = unify_values_in_en(
    local_df=df_q5_zh,
    info_dict=options_q5_zh,
    new_role_options=new_q5_options_zh
)

In [44]:
df[f'q{question_no}'] = pd.concat([unified_q5_en, unified_q5_zh], axis=0)
q5_text = f"Q{question_no}: Say you have found a piece of academic news or paper interesting. <br> Would you like to know more about related news/paper?"
q5_data = df.q5.value_counts().to_dict()
form_pie_chart(
    labels=list(q5_data.keys()), 
    values=list(q5_data.values()),
    title=q5_text
)

In [45]:
print_next_question()

## Question 6 - How often do you read academic papers?

In [46]:
df_en = df_en.fillna(value={column_names_en[5] : "I never read one LOL"})

In [47]:
question_no = 6
options_q6_en, options_q6_zh = naive_value_counts(idx=question_no - 1)
df_q6_en, df_q6_zh = get_col_df(idx=question_no - 1)

In [48]:
new_q6_options_en = ["monthly", "weekly", "never"]
unified_q6_en = unify_values_in_en(
    local_df=df_q6_en,
    info_dict=options_q6_en,
    new_role_options=new_q6_options_en
)

In [49]:
new_q6_options_zh = ["weekly", "monthly", "never", "daily", "never"]
unified_q6_zh = unify_values_in_en(
    local_df=df_q6_zh,
    info_dict=options_q6_zh,
    new_role_options=new_q6_options_zh
)

In [50]:
df[f'q{question_no}'] = pd.concat([unified_q6_en, unified_q6_zh], axis=0)
q6_text = f"Q{question_no}: {column_names_en[question_no - 1]}"
q6_data = df.q6.value_counts().to_dict()
form_pie_chart(
    labels=list(q6_data.keys()), 
    values=list(q6_data.values()),
    title=q6_text
)

In [51]:
print_next_question()

## Question 7 - Have you ever used these platforms?

In [52]:
question_no = 7
options_q7_en, options_q7_zh = multi_choice_value_counts(idx=question_no - 1)
df_q7_en, df_q7_zh = get_col_df(idx=question_no - 1)

In [53]:
def build_bucket(data: dict,) -> dict:
    ret = {
        key.replace("/ ", "/") \
            .replace(" ", "-") \
            .lower() : value
        for key, value in data.items()
    }
    return ret
q7_bucket_en = build_bucket(data=options_q7_en)
q7_bucket_en['papers-with-codes'] = q7_bucket_en['paperswithcodes']
del q7_bucket_en['paperswithcodes']
q7_bucket_en

{'google-scholar': 23,
 'none/other': 8,
 'semantic-scholar': 7,
 'connected-papers': 2,
 'papers-with-codes': 5}

In [54]:
q7_bucket_zh = build_bucket(data=options_q7_zh)
q7_bucket_zh

{'google-scholar': 64,
 'papers-with-codes': 20,
 'semantic-scholar': 11,
 'connected-papers': 10,
 'paper-weekly': 8,
 '都没有!若有其他请填空': 3,
 'huggingface': 1,
 'jstor': 1}

In [55]:
def merge_buckets(bkt_1: dict, bkt_2: dict) -> dict:
    ret = bkt_1.copy()
    for key, value in bkt_2.items():
        if key in ret.keys(): ret[key] += value
        else: ret[key] = value
    return ret

q7_bucket = merge_buckets(bkt_1=q7_bucket_en, bkt_2=q7_bucket_zh)
q7_bucket['none/other'] += 3
del q7_bucket['都没有!若有其他请填空']

In [56]:
q7_text = f"Q{question_no}: {column_names_en[question_no - 1]}"
q7_data = q7_bucket.copy()
form_pie_chart(
    labels=list(q7_data.keys()), 
    values=list(q7_data.values()),
    title=q7_text
)

In [57]:
print_next_question()

## Question 8 - Who/ what do you rely on to know what papers to read? (yes, exclude the times when you don't know what to read)

In [58]:
question_no = 8
options_q8_en, options_q8_zh = multi_choice_value_counts(idx=question_no - 1)
df_q8_en, df_q8_zh = get_col_df(idx=question_no - 1)

In [59]:
q8_bucket_en = build_bucket(data=options_q8_en)
print(q8_bucket_en)

In [60]:
q8_bucket_zh = {
    "friends'-or-peers'-suggestions": 28,
    'most-recently-published-papers-on-leading-venues-or-journals': 44,
    'recommended-reading-lists-from-the-web': 33,
    'tutors-or-seniors': 63,
    'advice-from-chatgpt': 29
}

In [61]:
q8_bucket = merge_buckets(q8_bucket_en, q8_bucket_zh)
print(q8_bucket)

In [62]:
q8_text = f"Q{question_no}: Who/ what do you rely on to know what papers to read?<br>(yes, exclude the times when you don't know what to read)"
q8_data = q8_bucket.copy()
form_pie_chart(
    labels=list(q8_data.keys()), 
    values=list(q8_data.values()),
    title=q8_text
)

In [63]:
print_next_question()

## Question 9 - Where do you get papers to read?

In [64]:
question_no = 9
options_q9_en, options_q9_zh = multi_choice_value_counts(idx=question_no - 1)
df_q9_en, df_q9_zh = get_col_df(idx=question_no - 1)

In [65]:
q9_bucket_en = build_bucket(data=options_q9_en)
print(q9_bucket_en)

In [66]:
q9_bucket_zh = {
    'official-websites-of-databases': 64,
    'libraries': 45,
    'word-of-mouth/shared-by-friends-or-people': 26,
    'sci-hub': 23,
    'others': 3
}
q9_bucket = merge_buckets(q9_bucket_en, q9_bucket_zh)

In [67]:
q9_text = f"Q{question_no}: {column_names_en[question_no - 1]}"
q9_data = q9_bucket.copy()
form_pie_chart(
    labels=list(q9_data.keys()), 
    values=list(q9_data.values()),
    title=q9_text
)
print_next_question()

## Question 10 - Apart from directly reading the paper, do you prefer other approaches to understand latest proposed ideas?

In [68]:
question_no = 10
options_q10_en, options_q10_zh = multi_choice_value_counts(idx=question_no - 1)
df_q10_en, df_q10_zh = get_col_df(idx=question_no - 1)

In [69]:
q10_bucket_en = build_bucket(data=options_q10_en)
print(q10_bucket_en)

In [70]:
q10_bucket_zh = {
    'explanation-blogs-are-easier-to-understand-and-more-efficient': 46,
    'reading-them-by-myself-is-still-the-best-way-to-understand-them-well': 52,
    'take-classes-or-watch-videos': 35,
    'attending-academic-seminars-is-also-a-good-choice': 22
}
q10_bucket = merge_buckets(q10_bucket_en, q10_bucket_zh)

In [71]:
q10_text = f"Q{question_no}: Apart from directly reading the paper, do you prefer other <br> approaches to understand latest proposed ideas?"
q10_data = q10_bucket.copy()
form_pie_chart(
    labels=list(q10_data.keys()), 
    values=list(q10_data.values()),
    title=q10_text
)
print_next_question()

## Question 11 - When you begin to explore an unfamiliar field of research, would it help to have a tool to refine your words for precise searching?

In [72]:
df_en = df_en.fillna(value={column_names_en[10] : "Yes"})

In [73]:
question_no = 11
options_q11_en, options_q11_zh = naive_value_counts(idx=question_no - 1)
df_q11_en, df_q11_zh = get_col_df(idx=question_no - 1)

In [74]:
def binary_question_util(original_df: DataFrame, count_dict: dict, lang: str = "en", 
                         en_update: list[bool] = [True, False], zh_update: list[bool] = [True, True, False]):
    options = en_update if lang == "en" else zh_update
    unified = unify_values_in_en(
        local_df=original_df,
        info_dict=count_dict,
        new_role_options=options
    )
    return unified

In [75]:
unified_q11_en = binary_question_util(original_df=df_q11_en, count_dict=options_q11_en)
unified_q11_zh = binary_question_util(original_df=df_q11_zh, count_dict=options_q11_zh, lang="zh")
df[f'q{question_no}'] = pd.concat([unified_q11_en, unified_q11_zh], axis=0)
q11_text = f"Q{question_no}: When you begin to explore an unfamiliar field of research, <br> would it help to have a tool to refine your words for precise searching?"
q11_data = df.q11.value_counts().to_dict()
form_pie_chart(
    labels=list(q11_data.keys()), 
    values=list(q11_data.values()),
    title=q11_text
)
print_next_question()

## Question 12 - Is a summary of a paper, containing only its introduction and conclusion (even simpler than an abstract), enough to understand the context of the paper and let you decide if you want to continue to read on?

In [76]:
df_en = df_en.fillna(value={column_names_en[11] : "Yes"})
question_no = 12
options_q12_en, options_q12_zh = multi_choice_value_counts(idx=question_no - 1)
df_q12_en, df_q12_zh = get_col_df(idx=question_no - 1)

In [77]:
q12_bucket_en = build_bucket(data=options_q12_en)
print(q12_bucket_en)

In [78]:
q12_bucket_zh = {'yes': 50, 'maybe': 18, 'no': 4}
q12_bucket = merge_buckets(q12_bucket_en, q12_bucket_zh)

In [79]:
q12_text = f"Q{question_no}: Is a summary of a paper, containing only its introduction <br> and conclusion (even simpler than an abstract), enough to understand the context of the paper and let you decide if you want to continue to read on?"
q12_data = q12_bucket.copy()
form_pie_chart(
    labels=list(q12_data.keys()), 
    values=list(q12_data.values()),
    title=q12_text
)
print_next_question()

## Question 13 - How useful would it be to you, if you were to be recommended papers to read based on your history, rather than to search by yourself in a sea of paper?

In [80]:
df_en = df_en.fillna(value={column_names_en[12] : "Yes"})
question_no = 13
options_en, options_zh = multi_choice_value_counts(idx=question_no - 1)
df_q_en, df_q_zh = get_col_df(idx=question_no - 1)

In [81]:
q_bucket_en = build_bucket(data=options_en)
q_bucket_en['no'] = 0
print(q_bucket_en)

In [82]:
q_bucket_zh = {'yes': 45, 'maybe': 20, 'no': 7}
q_bucket = merge_buckets(q_bucket_en, q_bucket_zh)

In [95]:
q_text = f"Q{question_no}: How useful would it be to you, if you were to be recommended <br> papers to read based on your history, rather than to search by yourself in a sea of paper?"
q_data = q_bucket.copy()
print(q_text)
form_pie_chart(
    labels=list(q_data.keys()), 
    values=list(q_data.values()),
    title=q_text
)
print_next_question()

## Question 14 - Do you think it would help if a knowledge graph/ tree is built on how other papers are related to the one you are currently reading (Prior & Derivative Work)?

In [84]:
df_en = df_en.fillna(value={column_names_en[13] : "Yes"})
question_no = 14
options_en, options_zh = multi_choice_value_counts(idx=question_no - 1)
df_q_en, df_q_zh = get_col_df(idx=question_no - 1)

In [85]:
q_bucket_en = build_bucket(data=options_en)
q_bucket_zh = build_bucket(data=options_zh)
q_bucket = merge_buckets(q_bucket_en, q_bucket_zh)

In [86]:
q_text = f"Q{question_no}: Do you think it would help if a knowledge graph/ tree is built on <br> how other papers are related to the one you are currently reading (Prior & Derivative Work)?"
q_data = q_bucket.copy()
print(q_text)
form_pie_chart(
    labels=list(q_data.keys()), 
    values=list(q_data.values()),
    title=q_text
)
print_next_question()

## Question 15 - Have you ever used any Q&A chatbots for platform instructions and found them helpful?

In [87]:
df_en = df_en.fillna(value={column_names_en[14] : "No"})
question_no = 15
options_en, options_zh = naive_value_counts(idx=question_no - 1)
df_q_en, df_q_zh = get_col_df(idx=question_no - 1)

In [88]:
unified_en = binary_question_util(original_df=df_q_en, count_dict=options_en, lang="en", en_update=[False, True])
unified_zh = binary_question_util(original_df=df_q_zh, count_dict=options_zh, lang="zh", zh_update=[True, False, False])

In [89]:
df[f'q{question_no}'] = pd.concat([unified_en, unified_zh], axis=0)
q_text = f"Q{question_no}: Have you ever used any Q&A chatbots for platform instructions <br> and found them helpful?"
q_data = df.q15.value_counts().to_dict()
form_pie_chart(
    labels=list(q_data.keys()), 
    values=list(q_data.values()),
    title=q_text
)
print_next_question()

## Question 16 - Do you like customizing the theme your workspaces and applications for aesthetic purposes (colour, font, etc.)? 

In [90]:
df_en = df_en.fillna(value={column_names_en[15] : "Yes"})
question_no = 16
options_en, options_zh = naive_value_counts(idx=question_no - 1)
df_q_en, df_q_zh = get_col_df(idx=question_no - 1)

In [91]:
unified_en = binary_question_util(original_df=df_q_en, count_dict=options_en, lang="en", en_update=[False, True])
unified_zh = binary_question_util(original_df=df_q_zh, count_dict=options_zh, lang="zh", zh_update=[True, True, False])

In [92]:
df[f'q{question_no}'] = pd.concat([unified_en, unified_zh], axis=0)
q_text = f"Q{question_no}: Do you like customizing the theme your workspaces and <br> applications for aesthetic purposes (colour, font, etc.)?"
q_data = df.q16.value_counts().to_dict()
form_pie_chart(
    labels=list(q_data.keys()), 
    values=list(q_data.values()),
    title=q_text
)
print_next_question()

## Question 17 & 18 - Suggestions & Additionals

In [96]:
add_df_en = df_en[df_en.iloc[:, 16].isnull() == False]
sug_df_en = add_df_en.iloc[:, 16:]
sug_df_en.to_csv(f"{suggestion_folder}/en.csv", header=True)
print(sug_df_en.to_dict())

In [97]:
add_df_zh = df_zh.loc[(df_zh.iloc[:, 17].isnull() == False) & (df_zh.iloc[:, 17] != "(跳过)") & (df_zh.iloc[:, 17] != "(空)")]
sug_df_zh = add_df_zh.iloc[:, 17:]
sug_df_zh.to_csv(f"{suggestion_folder}/ch.csv", header=True)
print(sug_df_zh.to_dict())

In [113]:
sug_val_en = sug_df_en.drop([0, 39], axis=0).to_numpy()
print(sug_val_en)
sug_val_zh = sug_df_zh.drop([3, 44, 74], axis=0).to_numpy()
print(sug_val_zh)

In [117]:
import numpy as np

sug_final_df = pd.DataFrame(np.concatenate([sug_val_en, sug_val_zh]), columns=sug_df_en.columns)
sug_final_df.to_csv(f"{suggestion_folder}/sug.csv", header=True)
sug_final_df.head(30)

Unnamed: 0,Are there any improvements/ comments/ features that you'd like to contribute/ see?,Add your contact number/ email address so we can contact you for a reward in case your suggestion is being utilized by our platform! (obviously only if the previous question has been answered)
0,Search by multiple keywords would also be very...,e1275107@u.nus.edu
1,Darkmode,86066299
2,"Filters/sorting by journals, paper citations, ...",
3,这个是否涉及版权问题,如果不涉及的话，完全可以搞
4,自动追踪某个研究领域的主要专家名称并给出其近五年内的研究方向和主要课题,(空)
5,文档归类，收藏过文章的第一作者和通讯作者最新文章通知,(空)
6,可以参考 elicit.org,depykung@gmail.com
7,笔记类型的评论和分享 针对外文有类似扇贝阅读的读者评论性质的翻译 以助力于阅读非本专业的外...,741657697@qq.com
8,以论文主题为聚类中心，然后以鱼骨图展示作品的衍生关系,gwyuan93@qq.com
9,针对对自己的论文新领域，阅读新领域的论文往往是比较痛苦的，因为直接上手去硬读往往事半功倍，需...,feng_qingtian@u.nus.edu
