In [None]:
#basics
import pandas as pd
import numpy as np

#stats
import itertools
import scipy.stats
import statsmodels.stats.multitest
import statannotations.Annotator
from scipy.stats import fisher_exact
# from scipy.stats import chi2_contingency
# from scipy.stats import chi2

#tableone
from tableone import TableOne, load_dataset 

#graphing
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
mpl.rcParams['pdf.fonttype'] = 42  # edit-able in illustrator
mpl.rcParams['font.sans-serif'] = 'Arial'
pd.set_option('display.max_columns', None)
import plotly.express as px
import plot_likert

#display all outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

Import Data

In [None]:
survey = pd.read_excel({filename}.xlsx")

In [None]:
#take only columns of interest


In [None]:
survey = survey.iloc[:,5:11]

In [None]:
survey = survey.iloc[1:,:]

In [None]:
#rename columns easier
mapping = {
    survey.columns[0]: 'Role', 
    survey.columns[1]: 'Used_before',
    survey.columns[2]: 'Interested',
    survey.columns[3]: 'Education',
    survey.columns[4]: 'Research',
    survey.columns[5]: 'Healthcare',}
survey = survey.rename(columns=mapping)

## Clean 

In [None]:
survey.iloc[:,0].value_counts()

In [None]:
# take most clinical role 
survey.loc[survey.Role=='Clinical Faculty; Research Faculty', 'Role'] = 'Clinical Faculty'
survey.loc[survey.Role=='Clinical Faculty; Administrative Staff', 'Role'] = 'Clinical Faculty'
survey.loc[survey.Role=='Clinical Faculty; Other', 'Role'] = 'Clinical Faculty'
survey.loc[survey.Role=='Medical Student, Resident, Fellow; Graduate Student, Postdoc Researcher; Research Faculty', 'Role'] = 'Medical Student, Resident, Fellow'
survey.loc[survey.Role=='Medical Student, Resident, Fellow; Graduate Student, Postdoc Researcher; Other', 'Role'] = 'Medical Student, Resident, Fellow'

In [None]:
# take most academic role 
survey.loc[survey.Role=='Research Faculty; Administrative Staff', 'Role'] = 'Research Faculty'
survey.loc[survey.Role=='Research Faculty; Other', 'Role'] = 'Research Faculty'
survey.loc[survey.Role=='Graduate Student, Postdoc Researcher; Administrative Staff', 'Role'] = 'Graduate Student, Postdoc Researcher'

In [None]:
# clean misc other
survey.loc[survey.Role=='Administrative Staff; Other', 'Role'] = 'Administrative Staff'


In [None]:
role_counts = survey.iloc[:,0].value_counts().reset_index()

In [None]:
role_counts['total'] = 420

In [None]:
role_counts['ratio'] = role_counts.Role/role_counts.total

In [None]:
role_counts

In [None]:
# rewrite capitalization
survey.loc[survey.Interested=='To a Great Extent', 'Interested'] = 'To a great extent'

In [None]:
# rewrite long answer
survey.loc[survey.Healthcare=='Yes, it can only be used to help write administrative content such as emails to insurance companies or to patients', 'Healthcare'] = 'Yes, it can be used for administrative purposes'

In [None]:
# set categorical order

survey['Role'] = pd.Categorical(survey['Role'],
                                   categories=['Medical Student, Resident, Fellow', 'Graduate Student, Postdoc Researcher', 
                                               'Clinical Faculty', 'Research Faculty', 'Administrative Staff', 'Other'],
                                   ordered=True)

survey['Interested'] = pd.Categorical(survey['Interested'],
                                   categories=['Not at all', 'Very little', 'Somewhat', 'To a great extent'],
                                   ordered=True)
survey['Education'] = pd.Categorical(survey['Education'],
                                   categories=['No, it should be banned', "I don't know, it is too early to make a statement", 'Yes, it should be actively incorporated',],
                                   ordered=True)
survey['Research'] = pd.Categorical(survey['Research'],
                                   categories=['No, it should not be used at all', 
                                               "I don't know, it is too early to make a statement", 
                                               'Yes, but it should only be used to help brainstorm',
                                               "Yes, as long as its use is transparently disclosed",
                                               "Yes, disclosure is NOT needed"
                                              ],
                                   ordered=True)
survey['Healthcare'] = pd.Categorical(survey['Healthcare'],
                                   categories=['No, it should not be used at all', 
                                               "I don't know, it is too early to make a statement", 
                                               'Yes, it can be used for administrative purposes',
                                               "Yes, it can be used for any purpose",
                                              ],
                                   ordered=True)

In [None]:
#binarize responses for fishers exact testing


In [None]:
survey.Healthcare.value_counts()

In [None]:
survey['Education_binary'] = np.where(survey.Education=='Yes, it should be actively incorporated','Yes','No+unsure')

In [None]:
survey['Research_binary'] = np.where(
    survey.Research.str.contains('Yes') ,
    'Yes','No+unsure')

In [None]:
survey['Healthcare_binary'] = np.where(
    survey.Healthcare.str.contains('Yes') ,
    'Yes','No+unsure')

In [None]:
survey.sample(3)

In [None]:
sns.set_context(context='talk', font_scale=1, rc=None)

In [None]:
sns.set_palette('viridis')

# Table1

In [None]:
mytable = TableOne(survey, 
                   columns=[ 'Role', 'Used_before',  'Education_binary', 'Research_binary',
       'Healthcare_binary'], 
                   categorical=[  'Used_before', 'Education_binary', 'Research_binary',
       'Healthcare_binary'       ], 
                   nonnormal=[ ],
                   groupby='Role', missing=False, pval=True,pval_test_name=True)
mytable

In [None]:
fisher_exact([[5,13],[23,22]])[1] #med trainee vs faculty healthcare use

In [None]:
fisher_exact([[5+23,39],[25+37,20+28]])[1] #med+grad trainees vs faculty on education 

In [None]:
mytable = TableOne(survey, 
                   columns=['Role',  'Used_before',  'Education_binary', 'Research_binary',
       'Healthcare_binary'], 
                   categorical=[ 'Role',  'Used_before', 'Education_binary', 'Research_binary',
       'Healthcare_binary'       ], 
                   nonnormal=[ ],
                   groupby='Used_before', missing=False, pval=True,pval_test_name=True)
mytable

In [None]:
mytable = TableOne(survey, 
                   columns=[ 'Role', 'Used_before', 'Interested', 'Education', 'Research',
       'Healthcare'], 
                   categorical=[  'Used_before', 'Interested', 'Education', 'Research',
       'Healthcare'       ], 
                   nonnormal=[ ],
                   groupby='Role', missing=False, pval=True,pval_test_name=True)
mytable

In [None]:
mytable.to_csv("groupby_role.csv")

In [None]:
mytable = TableOne(survey, 
                   columns=[ 'Role', 'Used_before', 'Interested', 'Education', 'Research',
       'Healthcare'], 
                   categorical=[  'Role', 'Interested', 'Education', 'Research',
       'Healthcare'       ], 
                   nonnormal=[ ],
                   groupby='Used_before', missing=False, pval=True,pval_test_name=True)
mytable

In [None]:
mytable.to_csv("groupby_used.csv")

# Visualization and stats

In [None]:
plt.figure(figsize=(8,6))
sns.histplot(data=survey, y='Role', hue='Used_before', multiple="stack")
plt.show()

In [None]:
survey.groupby('Role').size().reset_index()

In [None]:
#flip to proportions
used = survey.groupby(['Role','Used_before']).size().reset_index().pivot(columns='Used_before', index='Role', values=0).reset_index()
used['percent_used'] = used['Yes']/(used['Yes']+used['No'])
used['total']=1
used

In [None]:

used['Role'] = pd.Categorical(used['Role'],
                                   categories=['Medical Student, Resident, Fellow', 'Graduate Student, Postdoc Researcher', 
                                              'Clinical Faculty', 'Research Faculty',  'Administrative Staff', 'Other'],
                                   ordered=True)

In [None]:
fig, ax = plt.subplots(figsize=(8, 4))

sns.histplot(data=survey, x='Role', )

# ax.legend_.set_bbox_to_anchor((1, 0.8))
#twist axis
ax.tick_params(axis='x', labelsize=12)
trans = mpl.transforms.Affine2D().translate(6, 0)
for t in ax.get_xticklabels():
    t.set_rotation(30)
    t.set_horizontalalignment("right")
    t.set_transform(t.get_transform() + trans)

ax.set_ylabel("Respondents", size=18)
ax.set_xlabel("  ", size=16)
fig.savefig('used_byrole.pdf')

In [None]:
fig, ax = plt.subplots(figsize=(8, 4))

bar1 = sns.barplot(x="Role",  y="total", data=used, color='grey')
bar2 = sns.barplot(x="Role", y="percent_used", data=used, color='tab:blue')
#legend
top_bar = mpatches.Patch(color='tab:blue', label='Have tried ChatGPT')
bottom_bar = mpatches.Patch(color='grey', label='Have not tried')
plt.legend(handles=[top_bar, bottom_bar])
ax.legend_.set_bbox_to_anchor((1, 0.8))
#twist axis
ax.tick_params(axis='x', labelsize=12)
trans = mpl.transforms.Affine2D().translate(6, 0)
for t in ax.get_xticklabels():
    t.set_rotation(30)
    t.set_horizontalalignment("right")
    t.set_transform(t.get_transform() + trans)

ax.set_ylabel("Proportion used", size=18)
ax.set_xlabel("Role ", size=16)
fig.savefig('proportion_used_byrole.pdf')

In [None]:
survey.groupby(['Role','Used_before']).size().reset_index().pivot(columns='Used_before', index='Role', values=0).apply(lambda x: x/sum(x)*100, axis=1).reset_index()

In [None]:
used_role = survey.groupby(['Role','Used_before']).size().reset_index().pivot(columns='Used_before', index='Role', values=0).apply(lambda x: x/sum(x)*100, axis=1).reset_index()

In [None]:
pd.melt(used_role, id_vars=['Role'], value_vars=['No','Yes'],
        var_name='Used before', value_name='Percent')

In [None]:
used_role_melt = pd.melt(used_role, id_vars=['Role'], value_vars=['No','Yes'],
        var_name='Used before', value_name='Percent')

In [None]:

fig = px.bar(used_role_melt, y='Role', x='Percent', color='Used before',
             color_discrete_sequence=px.colors.qualitative.Safe,
             # text = [i + str(j)+' %' for i,j in zip(interest_melt.Used_before, interest_melt.percent)],
             orientation='h',)
fig.write_image('used_byrole_plotly.pdf')
fig.show()

It looks like students used more than faculty or staff, let's check this 

In [None]:
stats_results = []
for d1, d2 in itertools.combinations(used.Role.unique(), 2):
        yes1 = used.Yes[used.Role==d1].values[0]
        yes2 = used.Yes[used.Role==d2].values[0]
        no1 = used.No[used.Role==d1].values[0]
        no2 = used.No[used.Role==d2].values[0]

        pval = fisher_exact([ [yes1, no1],
                                        [yes2, no2]])[1]
        stats_results.append([d1, d2, pval])
        
stats_results = pd.DataFrame(stats_results, columns=["group1", "group2","pval"])
stats_results

In [None]:
#meds+grads vs clin+res faculty
fisher_exact(
    [[30+9,23+5],
     [14+32,31+33]])[1]

In [None]:
plt.figure(figsize=(8,6))
ax = sns.histplot(data=survey, y='Interested', hue='Role',multiple='stack')
ax.legend_.set_bbox_to_anchor((1, 0.8))
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
fig = sns.histplot(data=survey, y='Role', x='Interested',cbar=True, )
ax.set_ylabel("Role", size=18)
ax.set_xlabel("Interest in using ChatGPT ", size=20)

plt.savefig('interested_byrole.pdf')

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
fig = sns.histplot(data=survey, y='Role', x='Healthcare',cbar=True, )
ax.set_ylabel("Role", size=18)
ax.set_xlabel("Can ChatGPT be used in healthcare? ", size=16)
#twist axis
ax.tick_params(axis='x', labelsize=12)
trans = mpl.transforms.Affine2D().translate(6, 0)
for t in ax.get_xticklabels():
    t.set_rotation(30)
    t.set_horizontalalignment("right")
    t.set_transform(t.get_transform() + trans)
plt.savefig('healthcare_byrole.pdf')

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
fig = sns.histplot(data=survey, y='Research', x='Healthcare',cbar=True, )
# ax.set_ylabel("Role", size=18)
# ax.set_xlabel("Can ChatGPT be used in healthcare? ", size=16)
#twist axis
ax.tick_params(axis='x', labelsize=12)
trans = mpl.transforms.Affine2D().translate(6, 0)
for t in ax.get_xticklabels():
    t.set_rotation(30)
    t.set_horizontalalignment("right")
    t.set_transform(t.get_transform() + trans)
plt.savefig('healthcare_research.pdf')

In [None]:
#show specific values
interested = survey.groupby(['Role','Interested']).size().reset_index().pivot(columns='Interested', index='Role', values=0).reset_index()
interested

In [None]:
#calculate percents 
interested=interested.set_index('Role').fillna(0).apply(lambda x: x/sum(x)*100, axis=1).reset_index()
interested

In [None]:
plt.figure(figsize=(6,6))
ax = sns.histplot(data=survey, x='Used_before', hue='Interested',multiple='stack', )
ax.legend_.set_bbox_to_anchor((1, 0.8))

ax.set_xlabel("")
ax.set_xticklabels(["Have tried ChatGPT", "Have not tried"], size=16)
plt.savefig('interested_byused.pdf')

In [None]:
plt.figure(figsize=(8,6))
ax = sns.histplot(data=survey, x='Role', hue='Research',multiple='stack', )
ax.legend_.set_bbox_to_anchor((1, 0.8))

ax.set_xlabel("")
ax.tick_params(axis='x', labelsize=12)
trans = mpl.transforms.Affine2D().translate(6, 0)
for t in ax.get_xticklabels():
    t.set_rotation(30)
    t.set_horizontalalignment("right")
    t.set_transform(t.get_transform() + trans)
# ax.set_xticklabels(["Have tried ChatGPT", "Have not tried"], size=16)
# plt.savefig('interested_byused.pdf')

In [None]:
#show specific values
interested = survey.groupby(['Used_before','Interested']).size().reset_index().pivot(columns='Interested', index='Used_before', values=0).reset_index()
interested

In [None]:
interested=interested.set_index('Used_before').fillna(0).apply(lambda x: x/sum(x)*100, axis=1).reset_index()
interested

In [None]:
interest_melt = pd.melt(interested, id_vars=['Used_before'], value_vars=['Not at all', 'Very little', 'Somewhat', 'To a great extent'],
        var_name='Interest', value_name='Percent').rename(columns={'Used_before':'Used before'})
        

In [None]:

fig = px.bar(interest_melt, y='Used before', x='Percent', color='Interest',
             color_discrete_sequence=px.colors.qualitative.Safe,
             # text = [i + str(j)+' %' for i,j in zip(interest_melt.Used_before, interest_melt.percent)],
             orientation='h',)
fig.write_image('interested_byused_plotly.pdf')
fig.show()

In [None]:
fisher_exact(
    [[40,252-40],
     [67,161]])[1]

In [None]:
survey.Used_before.value_counts()

In [None]:
168/420

In [None]:
fisher_exact(
    [[  len(survey[(survey.Used_before=='Yes')&(survey.Education=='Yes, it should be actively incorporated')]) , len(survey[(survey.Used_before=='Yes')&(survey.Education=='No, it should be banned')])],
     [    len(survey[(survey.Used_before=='No')&(survey.Education=='Yes, it should be actively incorporated')]) , len(survey[(survey.Used_before=='No')&(survey.Education=='No, it should be banned')]) ]])[1]

In [None]:
survey.groupby('Role')['Interested'].value_counts(normalize=True).mul(100).round(2).unstack().reset_index()

In [None]:
data = survey.groupby('Role')['Interested'].value_counts(normalize=True).round(2).unstack().reset_index()

In [None]:
plt.rcParams['figure.figsize'] = [8, 4]
ax = survey.groupby('Role')['Interested'].value_counts(normalize=True).round(2).unstack().reset_index().plot(x='Role', kind='bar', stacked=True,)
ax.legend(title='Interest in using ChatGPT')
ax.legend_.set_bbox_to_anchor((1, 0.8))
ax.set_ylabel("Response proportions")
#twist axis
ax.tick_params(axis='x', labelsize=12)
trans = mpl.transforms.Affine2D().translate(6, 0)
for t in ax.get_xticklabels():
    t.set_rotation(30)
    t.set_horizontalalignment("right")
    t.set_transform(t.get_transform() + trans)
plt.show()

plt.savefig('interested_byrole.pdf')

In [None]:
plt.rcParams['figure.figsize'] = [8, 4]
ax = survey.groupby('Role')['Used_before'].value_counts(normalize=True).round(2).unstack().reset_index().plot(x='Role', kind='bar', stacked=True, color=['grey','teal'])
ax.legend(title='Used before')
ax.legend_.set_bbox_to_anchor((1, 0.8))
ax.set_ylabel("Response proportions")
#twist axis
ax.tick_params(axis='x', labelsize=12)
trans = mpl.transforms.Affine2D().translate(6, 0)
for t in ax.get_xticklabels():
    t.set_rotation(30)
    t.set_horizontalalignment("right")
    t.set_transform(t.get_transform() + trans)
plt.show()

plt.savefig('used_byrole.pdf')

In [None]:
questions = ['Used_before','Interested', 'Research', 'Education', 'Healthcare']

In [None]:
for x in questions:
    plt.rcParams['figure.figsize'] = [8, 4]
    ax = survey.groupby('Role')[x].value_counts(normalize=True).round(2).unstack().reset_index().plot(x='Role', kind='bar', stacked=True,)
    
    # Reverse the order of handles and labels in the legend
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(reversed(handles), reversed(labels), loc='lower center',bbox_to_anchor=(.5, 1.0),title=f'{x} and ChatGPT')

    ax.set_ylabel("Response proportions")
    #twist axis
    ax.tick_params(axis='x', labelsize=12)
    trans = mpl.transforms.Affine2D().translate(6, 0)
    for t in ax.get_xticklabels():
        t.set_rotation(30)
        t.set_horizontalalignment("right")
        t.set_transform(t.get_transform() + trans)
    plt.show()

    plt.savefig(f'{x}_byrole.pdf')

In [None]:
for x in questions:
    data=survey.groupby('Used_before')[x].value_counts(normalize=True).round(2).unstack().reset_index()
    plt.rcParams['figure.figsize'] = [6, 4]
    ax = data.plot(x='Used_before', kind='bar', stacked=True,rot=0)
    
    # Reverse the order of handles and labels in the legend
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(reversed(handles), reversed(labels), title=f'{x} and ChatGPT')
    ax.legend_.set_bbox_to_anchor((1, 0.8))
    
    ax.set_ylabel("Response proportions")
    ax.set_xlabel(" ")
    ax.set_xticklabels(["Have not tried", "Have tried ChatGPT"], size=16)
    #twist axis
    # ax.tick_params(axis='x', labelsize=12)
    # trans = mpl.transforms.Affine2D().translate(6, 0)
    # for t in ax.get_xticklabels():
    #     t.set_rotation(10)
    #     t.set_horizontalalignment("right")
    #     t.set_transform(t.get_transform() + trans)

    plt.savefig(f'{x}_by_used.pdf')

In [None]:
len(survey[(survey.Used_before=='Yes')&(survey.Interested=='To a great extent')])

In [None]:
fisher_exact( 
    [ [  len(survey[(survey.Used_before=='Yes')&(survey.Interested=='To a great extent')]) , len(survey[(survey.Used_before=='Yes')&(survey.Interested!='To a great extent')])], 
     [len(survey[(survey.Used_before=='No')&(survey.Interested=='To a great extent')]) , len(survey[(survey.Used_before=='No')&(survey.Interested!='To a great extent')])] ])[1]

In [None]:
question1 = 'Research'
question2 = 'Education'

fisher_exact( 
    [ [  len(survey[(survey[question]=="I don't know, it is too early to make a statement")]) , len(survey[(survey[question]!="I don't know, it is too early to make a statement")])], 
     [len(survey[(survey[question2]=="I don't know, it is too early to make a statement")]) , len(survey[(survey[question2]!="I don't know, it is too early to make a statement")])] ])[1]


In [None]:
question1 = 'Research'
question2 = 'Healthcare'

fisher_exact( 
    [ [  len(survey[(survey[question]=="I don't know, it is too early to make a statement")]) , len(survey[(survey[question]!="I don't know, it is too early to make a statement")])], 
     [len(survey[(survey[question2]=="I don't know, it is too early to make a statement")]) , len(survey[(survey[question2]!="I don't know, it is too early to make a statement")])] ])[1]


In [None]:
survey.Research.value_counts()

In [None]:
len(survey[(survey[question]=="I don't know, it is too early to make a statement")])

In [None]:
#simplify responses for fishers exact testing


In [None]:
survey.Healthcare.value_counts()

In [None]:
survey['Education_binary'] = np.where(survey.Education=='Yes, it should be actively incorporated','Yes','No+unsure')

In [None]:
survey['Research_binary'] = np.where(
    survey.Research.str.contains('Yes') ,
    'Yes','No+unsure')

In [None]:
survey['Healthcare_binary'] = np.where(
    survey.Healthcare.str.contains('Yes') ,
    'Yes','No+unsure')

In [None]:
survey.sample(3)

In [None]:
survey.groupby('Used_before')['Research_binary'].value_counts()

In [None]:
survey.groupby('Used_before')['Research_binary'].value_counts(normalize=True)

In [None]:
fisher_exact([[189,63],[150,18]])[1]

In [None]:
89.3-75

In [None]:
survey.groupby('Used_before')['Healthcare_binary'].value_counts()

In [None]:
survey.groupby('Used_before')['Healthcare_binary'].value_counts(normalize=True)

In [None]:
fisher_exact([[123,129],[105,63]])[1]

In [None]:
62.5-48.8

In [None]:
survey.groupby('Used_before')['Education_binary'].value_counts()

In [None]:
survey.groupby('Used_before')['Education_binary'].value_counts(normalize=True)

In [None]:
fisher_exact([[107,61],[76,176]])[1]

In [None]:
63.9-30.2

In [None]:
for x in questions:
    data=survey.groupby('Used_before')[x].value_counts(normalize=True).round(2).unstack().reset_index()
    plt.rcParams['figure.figsize'] = [6, 4]
    ax = data.plot(x='Used_before', kind='bar', stacked=True,rot=0)
    
    # Reverse the order of handles and labels in the legend
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(reversed(handles), reversed(labels), title=f'{x} and ChatGPT')
    ax.legend_.set_bbox_to_anchor((1, 0.8))
    
    ax.set_ylabel("Response proportions")
    ax.set_xlabel(" ")
    ax.set_xticklabels(["Have not tried", "Have tried ChatGPT"], size=16)
    #twist axis
    # ax.tick_params(axis='x', labelsize=12)
    # trans = mpl.transforms.Affine2D().translate(6, 0)
    # for t in ax.get_xticklabels():
    #     t.set_rotation(10)
    #     t.set_horizontalalignment("right")
    #     t.set_transform(t.get_transform() + trans)

    plt.savefig(f'{x}_by_used.pdf')

In [None]:
g = sns.FacetGrid(survey, col="Used_before")
g.map(sns.histplot, "Education_binary", )

In [None]:
pd.melt(survey, id_vars=['Used_before'], value_vars=['Education_binary', 'Research_binary', 'Healthcare_binary'])

In [None]:
binary_melt = pd.melt(survey, id_vars=['Used_before'], value_vars=['Education_binary', 'Research_binary', 'Healthcare_binary'])

In [None]:
sns.histplot(data=binary_melt, x='variable', y='value')

In [None]:
survey.groupby(['Used_before'])['Education_binary', 'Research_binary', 'Healthcare_binary'].value_counts()

In [None]:
questions_binary = ['Used_before','Education_binary', 'Research_binary', 'Healthcare_binary']

In [None]:
for x in questions_binary:
    data=survey.groupby('Used_before')[x].value_counts(normalize=True).round(2).unstack().reset_index()
    plt.rcParams['figure.figsize'] = [6, 4]
    ax = data.plot(x='Used_before', kind='bar', stacked=True,rot=0)
    
    # Reverse the order of handles and labels in the legend
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(reversed(handles), reversed(labels), loc='lower center',bbox_to_anchor=(0.5, 1.05),title=f'{x} and ChatGPT')
    
    ax.set_ylabel("Response proportions")
    ax.set_xlabel(" ")
    ax.set_xticklabels(["Have not tried", "Have tried ChatGPT"], size=16)
    #twist axis
    # ax.tick_params(axis='x', labelsize=12)
    # trans = mpl.transforms.Affine2D().translate(6, 0)
    # for t in ax.get_xticklabels():
    #     t.set_rotation(10)
    #     t.set_horizontalalignment("right")
    #     t.set_transform(t.get_transform() + trans)

    plt.savefig(f'{x}_by_used_binary.pdf')

In [None]:
for x in questions_binary:
    data=survey.groupby('Role')[x].value_counts(normalize=True).round(2).unstack().reset_index()
    plt.rcParams['figure.figsize'] = [6, 4]
    ax = data.plot(x='Role', kind='bar', stacked=True,rot=0)
    
    # Reverse the order of handles and labels in the legend
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(reversed(handles), reversed(labels), loc='lower center',bbox_to_anchor=(0.5, 1.05),title=f'{x} and ChatGPT')
    
    ax.set_ylabel("Response proportions")
    ax.set_xlabel(" ")
    # ax.set_xticklabels(["Have not tried", "Have tried ChatGPT"], size=16)
    #twist axis
    ax.tick_params(axis='x', labelsize=12)
    trans = mpl.transforms.Affine2D().translate(6, 0)
    for t in ax.get_xticklabels():
        t.set_rotation(10)
        t.set_horizontalalignment("right")
        t.set_transform(t.get_transform() + trans)

    plt.savefig(f'{x}_by_role_binary.pdf')

In [None]:
survey.to_csv("survey-cleaned3-12-23.csv")

In [None]:
#numerical plotting David idea

In [None]:
survey.Interested.value_counts()

In [None]:
# create a dictionary to map text to numbers
interested_dictionary = {'Not at all': 0, 'Very little': 1, 'Somewhat': 2, 'To a great extent':3}

# use the replace method to replace text with numbers and store the results in a new column
survey['interested_numerical'] = survey['Interested'].replace(interested_dictionary)

In [None]:
survey['interested_numerical']=survey['interested_numerical'].astype(int)

In [None]:
fig, ax = plt.subplots(figsize=(14,8))

data = survey
x = 'Role'
y = 'interested_numerical'
# color='asper_growth_flag'

stats_results = []

for d1, d2 in itertools.combinations(data[x].unique(), 2):
        days1 = data[y][data[x]==d1].dropna()
        days2 = data[y][data[x]==d2].dropna()
        if days1.size == 0 or days2.size == 0:
            continue
        pval = scipy.stats.mannwhitneyu(days1, days2).pvalue
        stats_results.append([d1, d2, days1.size, days2.size, pval])

stats_results = pd.DataFrame(stats_results, columns=["group1", "group2",
                                                   "group1_size", "group2_size", "pval"])
stats_results["pval_adj"] = statsmodels.stats.multitest.fdrcorrection(stats_results.pval, alpha=0.05)[1]
stat_results_sign = stats_results.loc[stats_results.pval_adj < 0.05, :]
pairs = []
for _, r in stat_results_sign.iterrows():
        pairs.append((r.group1, r.group2))


sns.swarmplot(data=data, x=x, y=y, )

#twist axis
ax.tick_params(axis='x', labelsize=12)
trans = mpl.transforms.Affine2D().translate(6, 0)
for t in ax.get_xticklabels():
    t.set_rotation(10)
    t.set_horizontalalignment("right")
    t.set_transform(t.get_transform() + trans)

annotator = statannotations.Annotator.Annotator(
    ax, 
    pairs, 
    data=data, 
    x=x,
    y=y, 
    verbose=False
)
annotator._verbose = False
annotator.configure(line_width=1)
annotator.set_custom_annotations([f"p={x:.2e}" for x in stat_results_sign.pval_adj])
annotator.annotate();

In [None]:
interest_scale = \
    ['Not at all',
     'Very little',
     'Somewhat',
     'To a great extent']

In [None]:
plot_likert.plot_likert(survey.Interested, interest_scale,
                        plot_percentage=True,bar_labels=True, 
                        colors=plot_likert.colors.default_with_darker_neutral,
                        figsize=(10,5))
     

In [None]:
interested = survey.groupby(['Used_before','Interested']).size().reset_index().pivot(columns='Interested', index='Used_before', values=0).reset_index()

In [None]:
interested=interested.set_index('Used_before').fillna(0).apply(lambda x: x/sum(x)*100, axis=1)

In [None]:
interested=interested[['Not at all',
     'Very little',
     'Somewhat',
     'To a great extent']]

In [None]:
interested=interested.reindex(['No', 'Yes'])

In [None]:
ax = plot_likert.plot_counts(interested, interest_scale,
                        plot_percentage=True,#bar_labels=True, 
                        colors=plot_likert.colors.default_with_darker_neutral,
                        figsize=(8,4))
  
ax.legend(fontsize=14) 
ax.legend(bbox_to_anchor=(1, .8),title=f'Interest in using ChatGPT', title_fontsize=16,fontsize=14)
ax.set_xticklabels(ax.get_xticklabels(), fontsize=12)
ax.set_yticklabels(ax.get_yticklabels(), fontsize=12)
ax.set_ylabel("Used ChatGPT before", size=16)

In [None]:
ax.get_figure().savefig('interested_byused.pdf')

In [None]:
interested = survey.groupby(['Role','Interested']).size().reset_index().pivot(columns='Interested', index='Role', values=0).reset_index()

In [None]:
interested=interested.set_index('Role').fillna(0).apply(lambda x: x/sum(x)*100, axis=1)

In [None]:
interested=interested[['Not at all',
     'Very little',
     'Somewhat',
     'To a great extent']]

In [None]:
interested=interested.reindex(['Medical Student, Resident, Fellow', 'Graduate Student, Postdoc Researcher', 
                                               'Clinical Faculty', 'Research Faculty', 'Administrative Staff', 'Other'])

In [None]:
ax = plot_likert.plot_counts(interested, interest_scale,
                        plot_percentage=True,#bar_labels=True, 
                        colors=plot_likert.colors.default_with_darker_neutral,
                        figsize=(8,4))
  
ax.legend(fontsize=14) 
ax.legend(bbox_to_anchor=(1, .8),title=f'Interest in using ChatGPT', title_fontsize=16,fontsize=14)
ax.set_xticklabels(ax.get_xticklabels(), fontsize=12)
ax.set_yticklabels(ax.get_yticklabels(), fontsize=14)
ax.set_ylabel("Role", size=16)

In [None]:
ax.get_figure().savefig('interested_byrole.pdf')

In [None]:
healthcare = survey.groupby('Role')['Healthcare'].value_counts(normalize=True).round(2).unstack().fillna(0)*100

In [None]:
healthcare = healthcare[['No, it should not be used at all', 
                                               "I don't know, it is too early to make a statement", 
                                               'Yes, it can be used for administrative purposes',
                                               "Yes, it can be used for any purpose",]] 

In [None]:
healthcare_scale = \
    ['No, it should not be used at all', 
                                               "I don't know, it is too early to make a statement", 
                                               'Yes, it can be used for administrative purposes',
                                               "Yes, it can be used for any purpose",]

In [None]:
healthcare=healthcare.reindex(['Medical Student, Resident, Fellow', 'Graduate Student, Postdoc Researcher', 
                                               'Clinical Faculty', 'Research Faculty', 'Administrative Staff', 'Other'])

In [None]:
ax = plot_likert.plot_counts(healthcare, healthcare_scale,
                        plot_percentage=True,#bar_labels=True, 
                        colors=[
    plot_likert.colors.TRANSPARENT,
    "firebrick",
    "silver",
    "cornflowerblue",
    "darkblue",
],
                        figsize=(10,5))

ax.legend(fontsize=14) 
ax.legend(bbox_to_anchor=(0.2, 1),title=f'Healthcare and ChatGPT', title_fontsize=16,fontsize=14)
ax.set_xticklabels(ax.get_xticklabels(), fontsize=12)
ax.set_yticklabels(ax.get_yticklabels(), fontsize=14)     

In [None]:
ax.get_figure().savefig('healthcare_byrole.pdf')

In [None]:
healthcare = survey.groupby('Used_before')['Healthcare'].value_counts(normalize=True).round(2).unstack().fillna(0)*100

In [None]:
healthcare = healthcare[['No, it should not be used at all', 
                                               "I don't know, it is too early to make a statement", 
                                               'Yes, it can be used for administrative purposes',
                                               "Yes, it can be used for any purpose",]] 

In [None]:
healthcare_scale = \
    ['No, it should not be used at all', 
                                               "I don't know, it is too early to make a statement", 
                                               'Yes, it can be used for administrative purposes',
                                               "Yes, it can be used for any purpose",]

In [None]:
healthcare=healthcare.reindex(['No','Yes'])

In [None]:
ax = plot_likert.plot_counts(healthcare, healthcare_scale,
                        plot_percentage=True,#bar_labels=True, 
                        colors=plot_likert.colors.default_with_darker_neutral,
                        figsize=(10,4))

ax.legend(fontsize=14) 
ax.legend(bbox_to_anchor=(1, .8),title=f'Healthcare and ChatGPT', title_fontsize=16,fontsize=14)
ax.set_xticklabels(ax.get_xticklabels(), fontsize=12)
ax.set_yticklabels(ax.get_yticklabels(), fontsize=12)     

ax.set_ylabel("Used ChatGPT before", size=16)

In [None]:
Research = survey.groupby('Used_before')['Research'].value_counts(normalize=True).round(2).unstack().fillna(0)*100

In [None]:
Research = Research[['No, it should not be used at all', 
                                               "I don't know, it is too early to make a statement", 
                                               'Yes, but it should only be used to help brainstorm',
                                               "Yes, as long as its use is transparently disclosed",
                                               "Yes, disclosure is NOT needed"]] 

In [None]:
Research_scale = \
    ['No, it should not be used at all', 
                                               "I don't know, it is too early to make a statement", 
                                               'Yes, but it should only be used to help brainstorm',
                                               "Yes, as long as its use is transparently disclosed",
                                               "Yes, disclosure is NOT needed",]

In [None]:
Research=Research.reindex(['No','Yes'])

In [None]:
ax = plot_likert.plot_counts(Research, Research_scale,
                        plot_percentage=True,#bar_labels=True, 
                        colors=plot_likert.colors.default_with_darker_neutral,
                        figsize=(10,4), )

ax.legend(fontsize=14) 
ax.legend(bbox_to_anchor=(1, .8),title=f'Research and ChatGPT', title_fontsize=16,fontsize=14)
ax.set_xticklabels(ax.get_xticklabels(), fontsize=12)
ax.set_yticklabels(ax.get_yticklabels(), fontsize=12)     

ax.set_ylabel("Used ChatGPT before", size=16)

In [None]:
Education = survey.groupby('Used_before')['Education'].value_counts(normalize=True).round(2).unstack().fillna(0)*100

In [None]:
Education = Education[['No, it should be banned', "I don't know, it is too early to make a statement", 'Yes, it should be actively incorporated',]] 

In [None]:
Education_scale = \
    ['','No, it should be banned', "I don't know, it is too early to make a statement", 'Yes, it should be actively incorporated',]

In [None]:
Education=Education.reindex(['No','Yes'])

In [None]:
ax = plot_likert.plot_counts(Education, Education_scale,
                        plot_percentage=True,#bar_labels=True, 
                        colors=plot_likert.colors.default_with_darker_neutral,
                        figsize=(10,4), )

ax.legend(fontsize=14) 
ax.legend(bbox_to_anchor=(1, .8),title=f'Education and ChatGPT', title_fontsize=16,fontsize=14)
ax.set_xticklabels(ax.get_xticklabels(), fontsize=12)
ax.set_yticklabels(ax.get_yticklabels(), fontsize=12)     

ax.set_ylabel("Used ChatGPT before", size=16)

In [None]:
used = survey.groupby(['Role','Used_before']).size().reset_index().pivot(columns='Used_before', index='Role', values=0).apply(lambda x: x/sum(x)*100, axis=1)

In [None]:
scale = \
    ['No', 'Yes',]

In [None]:
used=used.reindex(['Medical Student, Resident, Fellow', 'Graduate Student, Postdoc Researcher', 
                                               'Clinical Faculty', 'Research Faculty', 'Administrative Staff', 'Other'])

In [None]:
ax = plot_likert.plot_counts(used, scale,
                        plot_percentage=True,#bar_labels=True, 
                        colors=[plot_likert.colors.TRANSPARENT,'lightcoral','cornflowerblue'],
                        figsize=(10,5))

ax.legend(fontsize=14) 
ax.legend(bbox_to_anchor=(0.2, 1),title=f'Used ChatGPT before?', title_fontsize=16,fontsize=14)
ax.set_xticklabels(ax.get_xticklabels(), fontsize=12)
ax.set_yticklabels(ax.get_yticklabels(), fontsize=14)     

In [None]:
ax.get_figure().savefig('used_byrole.pdf')

In [None]:
sns.set_context(context='talk', font_scale=1, rc=None)

In [None]:
survey['Role'] = pd.Categorical(survey['Role'],
                                   categories=['Medical Student, Resident, Fellow', 'Graduate Student, Postdoc Researcher', 
                                               'Clinical Faculty', 'Research Faculty', 'Administrative Staff', 'Other'],
                                   ordered=True)

In [None]:
fig.savefig('respondent_role.pdf')

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
fig = sns.histplot(data=survey, y='Role', x='Interested',cbar=True, )
ax.set_ylabel("Role", size=18)
ax.set_xlabel("Interest in using ChatGPT ", size=20)

plt.savefig('interested_byrole.pdf')