In [28]:
!pip install krippendorff

Collecting krippendorff
  Downloading krippendorff-0.8.1-py3-none-any.whl.metadata (3.0 kB)
Downloading krippendorff-0.8.1-py3-none-any.whl (18 kB)
Installing collected packages: krippendorff
Successfully installed krippendorff-0.8.1


In [29]:
import pandas as pd
import krippendorff

In [2]:
results = pd.read_csv("Reliability Step 2_April 23, 2025_19.20.csv")

In [3]:
results.columns

Index(['StartDate', 'EndDate', 'Status', 'IPAddress', 'Progress',
       'Duration (in seconds)', 'Finished', 'RecordedDate', 'ResponseId',
       'RecipientLastName', 'RecipientFirstName', 'RecipientEmail',
       'ExternalReference', 'LocationLatitude', 'LocationLongitude',
       'DistributionChannel', 'UserLanguage', 'Coder_ID', 'Post_ID',
       'Attribution_Presence', 'Attribution_Number', 'AttributionType_1',
       'SenderAccountMatch_1', 'Evaluation_1', 'PresenceIndividual_1',
       'SpecificIndividual_1', 'PresenceCollective_1', 'SpecificCollective_1',
       'PresenceSystems_1', 'SpecificSystem_1', 'PresenceNetwork_1',
       'SpecificNetworks_1', 'Economy_1', 'SocialPolicies_1',
       'PoliticsHumanRight_1', 'ExternalRelations_1', 'PastTense_1',
       'PresentTense_1', 'FutureTense_1'],
      dtype='object')

In [4]:
results['Coder_ID'].unique()

array(['Coder ID', '{"ImportId":"QID1"}', 'Luke', 'Francesco', 'Ada'],
      dtype=object)

In [5]:
results = results.loc[:, 'Coder_ID':'FutureTense_1']

In [7]:
results = results.iloc[2:]

In [8]:
results.head()

Unnamed: 0,Coder_ID,Post_ID,Attribution_Presence,Attribution_Number,AttributionType_1,SenderAccountMatch_1,Evaluation_1,PresenceIndividual_1,SpecificIndividual_1,PresenceCollective_1,...,SpecificSystem_1,PresenceNetwork_1,SpecificNetworks_1,Economy_1,SocialPolicies_1,PoliticsHumanRight_1,ExternalRelations_1,PastTense_1,PresentTense_1,FutureTense_1
2,Luke,1,Yes,First,Affirmed causal responsibility,No,Negatively,No,,Yes,...,,,,No,No,Yes,,No,Yes,
3,Luke,2,Yes,First,Affirmed causal responsibility,No,Negatively,No,,Yes,...,,,,No,No,Yes,,No,Yes,
4,Luke,3,Yes,First,Affirmed causal responsibility,Yes,Negatively,No,,Yes,...,,,,No,No,Yes,,Yes,,
5,Francesco,1,Yes,First,Affirmed causal responsibility,Yes,Neutrally,,,,...,,,,,,,,,,
6,Luke,4,Yes,First,Affirmed causal responsibility,Yes,Positively,Yes,Other Individuals,,...,,,,No,No,Yes,,No,Yes,


In [10]:
results_filtered = results[results['Coder_ID'] != 'Francesco']

In [18]:
results_filtered

Unnamed: 0,Coder_ID,Post_ID,Attribution_Presence,Attribution_Number,AttributionType_1,SenderAccountMatch_1,Evaluation_1,PresenceIndividual_1,SpecificIndividual_1,PresenceCollective_1,...,SpecificSystem_1,PresenceNetwork_1,SpecificNetworks_1,Economy_1,SocialPolicies_1,PoliticsHumanRight_1,ExternalRelations_1,PastTense_1,PresentTense_1,FutureTense_1
2,Luke,1,Yes,First,Affirmed causal responsibility,No,Negatively,No,,Yes,...,,,,No,No,Yes,,No,Yes,
3,Luke,2,Yes,First,Affirmed causal responsibility,No,Negatively,No,,Yes,...,,,,No,No,Yes,,No,Yes,
4,Luke,3,Yes,First,Affirmed causal responsibility,Yes,Negatively,No,,Yes,...,,,,No,No,Yes,,Yes,,
6,Luke,4,Yes,First,Affirmed causal responsibility,Yes,Positively,Yes,Other Individuals,,...,,,,No,No,Yes,,No,Yes,
7,Luke,5,Yes,First,Affirmed causal responsibility,Yes,Negatively,No,,Yes,...,,,,No,No,Yes,,No,No,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111,Ada,34,Yes,First,Affirmed causal responsibility,Yes,Neutrally,,,,...,,,,,,,,,,
112,Ada,35,Yes,First,Affirmed causal responsibility,No,Neutrally,,,,...,,,,,,,,,,
113,Ada,35,Yes,Second,Affirmed causal responsibility,No,Negatively,No,,No,...,,No,,,,,,,,
114,Ada,36,Yes,First,Negated causal responsibility,Yes,Negatively,Yes,Donald Trump,,...,,,,No,No,No,No,,,


In [34]:
#Create a Unique Attribution Identifier
## This ensures annotations on the same post + attribution number
results_filtered['PostAttr_ID'] = results_filtered['Post_ID'].astype(str) + '_' + results_filtered['Attribution_Number']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results_filtered['PostAttr_ID'] = results_filtered['Post_ID'].astype(str) + '_' + results_filtered['Attribution_Number']


In [44]:
pivot_df = results_filtered.pivot_table(
    index='PostAttr_ID',
    columns='Coder_ID',
    values='AttributionType_1',  # or any other column you want to compare
    aggfunc='first'  # just in case there's only one per coder
).dropna()  # drop rows where either coder didn’t code

In [45]:
agreement = (pivot_df['Ada'] == pivot_df['Luke']).mean()
print(f'Percent agreement: {agreement:.2%}')

Percent agreement: 93.33%


In [46]:
# Each inner list is a unit (i.e., one PostAttr_ID)
data = pivot_df.transpose().values.tolist()

# Calculate K-alpha (nominal scale)
alpha = krippendorff.alpha(reliability_data=data, level_of_measurement='nominal')

print(f"Krippendorff's alpha: {alpha:.3f}")

Krippendorff's alpha: 0.372


In [31]:
def get_disagreement_rows(df):
    return df[df.nunique(axis=1) > 1]

disagreement_rows = get_disagreement_rows(pivot_df)
print(disagreement_rows)

Coder_ID                               Ada                            Luke
PostAttr_ID                                                               
17_First     Negated causal responsibility  Affirmed causal responsibility
36_First     Negated causal responsibility  Affirmed causal responsibility
8_Second     Negated causal responsibility  Affirmed causal responsibility
