In [None]:
import pandas as pd
import numpy as np
import plotly.express as px


In [None]:
def load_file(x):
  text= pd.read_csv(x)
  return text

In [None]:
#creating a dataframe of the features value_counts series returned
#by pandas
def getfeat_countsdf(DF):
  featscounts= pd.DataFrame(DF['feats'].value_counts())
  return featscounts


In [None]:
#in order for this to work you need to have
#your columns labeled as
# feats (the column containing the features)
#and 'count' (column conatining counts)
def get_featurelist(DF):
  DF1= DF.dropna()
  DF1['feats']= DF1['feats'].apply(lambda x: x.split("|"))
  featureslist= DF1.feats.tolist()
  return featureslist  #returns a list of lists


In [None]:
#flattens our list of lists
#into one level
def flattenList(llist):
  individualfeats=  [item for sublist in llist for item in sublist]
  return individualfeats


In [None]:
#now that we have split up features
#We can count them individually
#just makes it easier to break up visually
#also they are almost always encoded togther by stanza
#so thought it would be good to split
def feats_dict(blist):
  feats_dict= {}
  feats_dict['Voice']= [item for item in blist if 'Voice'in item]
  feats_dict['Tense']= [item for item in blist if 'Tense'in item]
  feats_dict['Verbform']= [item for item in blist if 'VerbForm'in item]
  feats_dict['Case']= [item for item in blist if 'Case'in item]
  feats_dict['Number']= [item for item in blist if 'Number'in item]
  feats_dict['Degree']= [item for item in blist if 'Degree'in item]
  feats_dict['Gender']= [item for item in blist if 'Gender'in item]
  feats_dict['PronType']= [item for item in blist if 'PronType'in item]
  feats_dict['Reflex']= [item for item in blist if 'Reflex'in item]
  feats_dict['Person']= [item for item in blist if 'Person'in item]
  feats_dict['Mood']= [item for item in blist if 'Mood'in item]
  feats_dict['Aspect']= [item for item in blist if 'Aspect'in item]
  return feats_dict


In [None]:
#making a dataframe of our split up features
#wih their individual counts
def makefeats_df(dict_):
  featdf= pd.DataFrame.from_dict(dict_,orient='index')
  return(featdf.T)


In [None]:
##renaming columns for our indicvidual features dataframes
##once the columns are consistent throughout
##each frame we can concatenate them into one
##big dataframe
voicedf= pd.DataFrame(featuresdf['Voice'].value_counts().reset_index().rename(columns={"index": "Feature", "Voice": "Count"}))
tensedf= pd.DataFrame(featuresdf['Tense'].value_counts().reset_index().rename(columns={"index": "Feature", "Tense": "Count"}))
verbformdf=pd.DataFrame(featuresdf['Verbform'].value_counts().reset_index().rename(columns={"index": "Feature", "Verbform": "Count"}))
casedf= pd.DataFrame(featuresdf['Case'].value_counts().reset_index().rename(columns={"index": "Feature", "Case": "Count"}))
numberdf= pd.DataFrame(featuresdf['Number'].value_counts().reset_index().rename(columns={"index": "Feature", "Number": "Count"}))
degree= pd.DataFrame(featuresdf['Degree'].value_counts().reset_index().rename(columns={"index": "Feature", "Degree": "Count"}))
genderdf= pd.DataFrame(featuresdf['Gender'].value_counts().reset_index().rename(columns={"index": "Feature", "Gender": "Count"}))
prontypedf= pd.DataFrame(featuresdf['PronType'].value_counts().reset_index().rename(columns={"index": "Feature", "PronType": "Count"}))
reflexdf= pd.DataFrame(featuresdf['Reflex'].value_counts().reset_index().rename(columns={"index": "Feature", "Reflex": "Count"}))
persondf= pd.DataFrame(featuresdf['Person'].value_counts().reset_index().rename(columns={"index": "Feature", "Person": "Count"}))
aspectdf= pd.DataFrame(featuresdf['Aspect'].value_counts().reset_index().rename(columns={"index": "Feature", "Aspect": "Count"}))
mooddf= pd.DataFrame(featuresdf['Mood'].value_counts().reset_index().rename(columns={"index": "Feature", "Mood": "Count"}))

In [None]:
#making our DataFrame
allfcounts = pd.concat([voicedf,tensedf,verbformdf,casedf,numberdf,degree,genderdf,prontypedf,reflexdf,mooddf,aspectdf],axis=0)
allfcounts

In [None]:
##adding normalized percent to each
##of the morphological features
allfcounts['normalized_percent']= allfcounts['Count']/allfcounts['Count'].sum()

In [None]:
##saving reformatted DF to my google drive
allfcounts.to_csv('/content/drive/MyDrive/Thesis2023/Viz_Graphics/SunburstCharts/tlg0018.tlg003.featviz.csv')

In [None]:
##creating a sunburst chart with our results
import plotly.express as px
import numpy as np
fig = px.sunburst(allfcounts, path=['Feature','Count'], values='Count',
 color_discrete_sequence=px.colors.qualitative.Antique_r)
fig.update_layout(autosize=False, width=1000, height=1000,margin = dict(t=25, l=25, r=25, b=25))
fig.update_traces(insidetextorientation='radial')
fig.update_layout(uniformtext_minsize=16, uniformtext_mode='hide')
fig.show()

