In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import nltk
from textblob import TextBlob

In [30]:
import scipy.stats as stats
from statsmodels.stats.anova import AnovaRM
import random

In [8]:
def calculate_metrics(df):
    """
    Calculates the average sentence length, average grammatical complexity, and average sentiment
    score for each row of text in the input DataFrame.
    
    Parameters:
        df (Pandas DataFrame): A DataFrame with a single column of text
    
    Returns:
        Pandas DataFrame: A new DataFrame with the columns 'average_sentence_length', 'average_grammatical_complexity',
        and 'average_sentiment'
    """
    # Define a function to calculate the POS tag count for a given sentence
    def pos_tag_count(sentence):
        pos_tags = nltk.pos_tag(nltk.word_tokenize(sentence))
        tag_count = len(pos_tags)
        return tag_count
    
    # Define a function to calculate the sentiment score for a given sentence
    def sentiment_score(sentence):
        blob = TextBlob(sentence)
        score = blob.sentiment.polarity
        return score
    
    # Tokenize the text into sentences
    sentences = df['prompt'].apply(nltk.sent_tokenize)
    
    # Calculate the average sentence length for each row
    df['average_sentence_length'] = sentences.apply(lambda x: sum(len(sentence.split()) for sentence in x)/len(x))
    
    # Calculate the average POS tag count for each row
    df['average_grammatical_complexity'] = sentences.apply(lambda x: sum(pos_tag_count(sentence) for sentence in x)/len(x))
    
    # Calculate the average sentiment score for each row
    df['average_sentiment'] = sentences.apply(lambda x: sum(sentiment_score(sentence) for sentence in x)/len(x))
    
    # Drop the 'sentences' column
    #df = df.drop('sentences', axis=1)
    
    return df[['average_sentence_length', 'average_grammatical_complexity', 'average_sentiment']]


In [42]:
df = pd.read_excel("3class.xlsx")

metrics = calculate_metrics(df)

In [43]:
df = df.drop(columns=['prompt'])

In [47]:
df

Unnamed: 0,completion,average_sentence_length,average_grammatical_complexity,average_sentiment
0,college,24.437500,28.062500,0.287285
1,college,15.057143,16.628571,0.072698
2,college,24.045455,27.500000,0.274590
3,10thgrade,14.978723,17.234043,0.107646
4,6thgrade,20.409091,24.136364,0.207507
...,...,...,...,...
295,college,21.138889,23.777778,0.191063
296,college,19.421053,21.789474,0.125174
297,6thgrade,19.434783,21.000000,0.342120
298,6thgrade,20.236842,22.710526,0.070582


In [51]:
import pandas as pd
from scipy.stats import friedmanchisquare

# Load your data into a DataFrame
#df = pd.read_csv('data.csv')

# Define your variables
completion = 'completion'
metrics = ['average_sentence_length', 'average_grammatical_complexity', 'average_sentiment']

# Perform the Friedman test for each metric
for metric in metrics:
    data = []
    for level in df[completion].unique():
        data.append(df[df[completion]==level][metric].values)
    stat, p = friedmanchisquare(*data)
    print(f'Friedman test for {metric}: chi-squared={stat:.3f}, p={p:.10f}')


Friedman test for average_sentence_length: chi-squared=38.000, p=0.0000000056
Friedman test for average_grammatical_complexity: chi-squared=41.060, p=0.0000000012
Friedman test for average_sentiment: chi-squared=7.740, p=0.0208583694


In [13]:
df2 = pd.read_excel("human_ai.xlsx")

metrics2 = calculate_metrics(df2)

In [14]:
df2

Unnamed: 0,prompt,completion,average_sentence_length,average_grammatical_complexity,average_sentiment
0,\n\nThe NBA is home to some of the most amazin...,ai,17.975610,20.878049,0.180748
1,Evelyn McHale’s Most Beautiful Suicide\n\nA ph...,human,16.428571,18.968254,0.107971
2,\n\nIn a world where technology has become an ...,ai,19.000000,21.638889,0.184185
3,An Analysis of the Role of Media Coverage of t...,human,39.333333,43.166667,-0.009496
4,\n\nThe Familial Blackmail Of A Popular Senato...,ai,16.292683,18.097561,0.072050
...,...,...,...,...,...
445,"\n\nIn today's ever-evolving world, communicat...",ai,20.485714,23.685714,0.084739
446,\n\nThe study of Anglo-Saxon culture and histo...,ai,23.457143,25.971429,0.093564
447,\n\nA Vacation to Mars: The Biggest Scam in Mo...,ai,21.029412,23.235294,0.024079
448,\n\nThe Winter Goddess\n\nWinter is one of the...,ai,19.828571,22.028571,0.081576


In [32]:
# Get 50 random entries from each category
human_data = df2[df2['completion'] == 'human'].sample(n=50, random_state=42)
ai_data = df2[df2['completion'] == 'ai'].sample(n=50, random_state=42)

# Combine the two dataframes
df_new = pd.concat([human_data, ai_data])

# Perform dependent samples t-test between categories 1 and 2
t_stat1, p_val1 = stats.ttest_rel(df_new[df_new['completion'] == 'human']['average_sentence_length'], 
                                  df_new[df_new['completion'] == 'ai']['average_sentence_length'])

# Perform dependent samples t-test between categories 1 and 3
t_stat2, p_val2 = stats.ttest_rel(df_new[df_new['completion'] == 'human']['average_grammatical_complexity'], 
                                  df_new[df_new['completion'] == 'ai']['average_grammatical_complexity'])

# Perform dependent samples t-test between categories 2 and 3
t_stat3, p_val3 = stats.ttest_rel(df_new[df_new['completion'] == 'human']['average_sentiment'], 
                                  df_new[df_new['completion'] == 'ai']['average_sentiment'])

# Print results
print('T-statistic for average_sentence_length:', t_stat1)
print('P-value for average_sentence_length:', p_val1)
print('T-statistic for caverage_grammatical_complexity:', t_stat2)
print('P-value for average_grammatical_complexity:', p_val2)
print('T-statistic for average_sentiment:', t_stat3)
print('P-value for average_sentiment:', p_val3)

T-statistic for average_sentence_length: 2.3814547798227776
P-value for average_sentence_length: 0.021173470428243146
T-statistic for caverage_grammatical_complexity: 2.8082946812703815
P-value for average_grammatical_complexity: 0.007132242100184873
T-statistic for average_sentiment: -2.567474969176779
P-value for average_sentiment: 0.013346831895158473
