<a href="https://colab.research.google.com/github/dataskeptic/similarities/blob/main/nilc_wordembeddings_distances.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pot

In [None]:
!pip install spacy

In [None]:
!python -m spacy download pt_core_news_lg

In [None]:
from nltk.corpus import stopwords
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.stem import SnowballStemmer
import pandas as pd
import numpy as np
import nltk
import re
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('punkt')

In [5]:
import spacy
nlp = spacy.load('pt_core_news_lg')

In [6]:
def to_lowercase(data):
    return data.str.lower()

def remove_punct(data):
    return data.str.replace(r'[^\w\s]', ' ', regex=True)

def remove_stopwords(text):
    stop_words = spacy.lang.pt.stop_words.STOP_WORDS
    doc = nlp(text)
    return ' '.join([token.text for token in doc if token.text.lower() not in stop_words])

def lemmatize(text):
    lemmatizer = WordNetLemmatizer()
    tokens = word_tokenize(text, language='portuguese')
    return ' '.join([lemmatizer.lemmatize(token) for token in tokens])

def stem(text):
    stemmer = SnowballStemmer('portuguese')
    tokens = word_tokenize(text, language='portuguese')
    return ' '.join([stemmer.stem(token) for token in tokens])

def preprocess_text(data):
    data = remove_punct(data)
    data = to_lowercase(data)
    data = data.apply(remove_stopwords)
    data = data.apply(lemmatize)
    #data = data.apply(stem)
    return data

In [7]:
path_reference_answers = "drive/MyDrive/data/poo/reference_answers_extended.xlsx"
path_student_answers = "drive/MyDrive/data/poo/student_answers.xlsx"

In [8]:
reference_answers = pd.read_excel(path_reference_answers)
student_answers = pd.read_excel(path_student_answers)

print(reference_answers)
print("---------------------------------------")
print(student_answers)

    question_id                                        refans_text  \
0             1  Polimorfismo é uma característica das linguage...   
1             1  Polimorfismo é a capacidade de uma classe se c...   
2             1  Polimorfismo refere-se à capacidade de um méto...   
3             1  Em programação orientada a objetos, polimorfis...   
4             1  O polimorfismo é um conceito em programação or...   
5             2  Classe são modelos descritivos para a criação ...   
6             2  Uma classe é uma descrição abstrata de um tipo...   
7             2  Classes são estruturas fundamentais em program...   
8             2  Em linguagens orientadas a objetos, uma classe...   
9             2  Classes em programação orientada a objetos ser...   
10            3  Herança é um conceito que permite que uma clas...   
11            3  Herança na programação orientada a objetos é u...   
12            3  Herança é um mecanismo que permite que uma nov...   
13            3  Her

In [9]:
reference_answers['refans_preprocess'] = preprocess_text(reference_answers['refans_text'])

In [10]:
print(reference_answers['refans_preprocess'])

0     polimorfismo característica linguagens orienta...
1     polimorfismo capacidade classe comportar difer...
2     polimorfismo refere capacidade método várias i...
3     programação orientada objetos polimorfismo pri...
4     polimorfismo conceito programação orientada ob...
5     classe modelos descritivos criação tipos lingu...
6     classe descrição abstrata objeto definindo atr...
7     class estruturas fundamentais programação orie...
8     linguagens orientadas objetos classe modelo pr...
9     class programação orientada objetos servem mol...
10    herança conceito permite classe absorva estrut...
11    herança programação orientada objetos princípi...
12    herança mecanismo permite classe herde campos ...
13    herança capacidade classe herdar atributos mét...
14    herança oop permite classe subclasse herde car...
15    private gt permite acesso própria classepublic...
16    existe 4 tipos modificadores acesso public cla...
17    java modificadores acesso determinam visib

In [11]:
student_answers['answer_preprocess'] = preprocess_text(student_answers['answer_text'])

In [12]:
print(student_answers['answer_preprocess'])

0      polimorfismo nome sugere múltiplas formas mane...
1      class modelos estruturas coisas transcritos có...
2      herança poo fácil entendida olhada contexto us...
3      possíveis modificadores acesso 4 public privat...
4      polimorfismo estático permite criemos métodos ...
                             ...                        
119                                                  nao
120    polimorfismo permite contrato definido classe ...
121    class estruturas dado reunem conjunto métodos ...
122    herança permite atributos métodos classe sejam...
123    public permite método atributo classe seja cha...
Name: answer_preprocess, Length: 124, dtype: object


In [None]:
from ot import emd2

In [14]:
from gensim.models import KeyedVectors
model_cbow = KeyedVectors.load_word2vec_format("drive/MyDrive/wordembeddings/cbow_s300.txt")

In [None]:
model_cbow.init_sims(replace=True)

In [16]:
model_glove = KeyedVectors.load_word2vec_format("drive/MyDrive/wordembeddings/glove_s300.txt")

In [None]:
model_glove.init_sims(replace=True)

In [18]:
model_skip = KeyedVectors.load_word2vec_format("drive/MyDrive/wordembeddings/skip_s300.txt")

In [None]:
model_skip.init_sims(replace=True)

In [20]:
from gensim.similarities import WmdSimilarity


In [81]:

def compute_wmd_similarities(student_df, reference_df, model, prefix):
    wmd_similarities = pd.DataFrame()

    for questionId in range(1, 5):
        temp_student_answers = student_df[student_df['question_id'] == questionId].copy()
        refans_answers = reference_df['refans_preprocess'][reference_df['question_id'] == questionId].copy()
        refans_answers = [ref.split() for ref in refans_answers]  # Split just once for each questionId
        wmd_similarity = WmdSimilarity(refans_answers, model)

        for index, row in temp_student_answers.iterrows():
            temp_answer = row['answer_preprocess']
            similarities = wmd_similarity[temp_answer.split()]

            similarity = max(similarities), np.mean(similarities), np.median(similarities), min(similarities)
            columns = [f"max_{prefix}", f"mean_{prefix}", f"median_{prefix}", f"min_{prefix}"]
            temp_student_answers.loc[index, columns] = similarity

        wmd_similarities = pd.concat([wmd_similarities, temp_student_answers])

    return wmd_similarities[columns]


final_df = student_answers.copy()

columns_cbow = compute_wmd_similarities(student_answers, reference_answers, model_cbow, "cbow")
final_df = pd.concat([final_df, columns_cbow], axis=1)

columns_glove = compute_wmd_similarities(student_answers, reference_answers, model_glove, "glove")
final_df = pd.concat([final_df, columns_glove], axis=1)

columns_skip = compute_wmd_similarities(student_answers, reference_answers, model_skip, "skip")
final_df = pd.concat([final_df, columns_skip], axis=1)
wmd_similarities = final_df.copy()
print(wmd_similarities)


     question_id                                        answer_text  notas  \
0              1  Polimorfismo é, como o nome sugere (múltiplas ...   1.11   
1              2  Classes são modelos/"estruturas" de coisas tra...   1.11   
2              3  Herança, em POO, é bem fácil de ser entendida ...   1.00   
3              4  Os possíveis modificadores de acesso são 4: pu...   1.11   
4              1  O polimorfismo estático ele permite que nós cr...   0.40   
..           ...                                                ...    ...   
119            4                                            Nao sei   0.00   
120            1  Polimorfismo permite que a partir de um contra...   0.70   
121            2  Classes são estruturas de dados, que reunem um...   0.60   
122            3  Herança permite que atributos ou métodos de um...   1.11   
123            4  public - Permite que qualquer método ou atribu...   0.75   

                                     answer_preprocess  max_cbo

In [40]:

def compute_cosine_similarities(student_df, reference_df, model, prefix):
    cosine_similarities = pd.DataFrame()

    for questionId in range(1, 5):
        temp_student_answers = student_df[student_df['question_id'] == questionId].copy()
        refans_answers = reference_df['refans_preprocess'][reference_df['question_id'] == questionId].copy()
        refans_answers = [ref.split() for ref in refans_answers]  # Split just once for each questionId

        for index, row in temp_student_answers.iterrows():
            temp_answer = row['answer_preprocess'].split()
            similarities = ([model.n_similarity(temp_answer, ref) for ref in refans_answers])

            similarity = max(similarities), np.mean(similarities), np.median(similarities), min(similarities)
            columns = [f"max_{prefix}", f"mean_{prefix}", f"median_{prefix}", f"min_{prefix}"]
            temp_student_answers.loc[index, columns] = similarity

        cosine_similarities = pd.concat([cosine_similarities, temp_student_answers])

    return cosine_similarities[columns]

# Assuming student_answers and reference_answers are your dataframes and model_cbow, model_glove, and model_skip are your models

final_df = student_answers.copy()

columns_cbow = compute_cosine_similarities(student_answers, reference_answers, model_cbow, "cbow")
final_df = pd.concat([final_df, columns_cbow], axis=1)

columns_glove = compute_cosine_similarities(student_answers, reference_answers, model_glove, "glove")
final_df = pd.concat([final_df, columns_glove], axis=1)

columns_skip = compute_cosine_similarities(student_answers, reference_answers, model_skip, "skip")
final_df = pd.concat([final_df, columns_skip], axis=1)
cosine_similarities = final_df.copy()
print(cosine_similarities)

     question_id                                        answer_text  notas  \
0              1  Polimorfismo é, como o nome sugere (múltiplas ...   1.11   
1              2  Classes são modelos/"estruturas" de coisas tra...   1.11   
2              3  Herança, em POO, é bem fácil de ser entendida ...   1.00   
3              4  Os possíveis modificadores de acesso são 4: pu...   1.11   
4              1  O polimorfismo estático ele permite que nós cr...   0.40   
..           ...                                                ...    ...   
119            4                                            Nao sei   0.00   
120            1  Polimorfismo permite que a partir de um contra...   0.70   
121            2  Classes são estruturas de dados, que reunem um...   0.60   
122            3  Herança permite que atributos ou métodos de um...   1.11   
123            4  public - Permite que qualquer método ou atribu...   0.75   

                                     answer_preprocess  max_cbo

In [89]:
def assign_grades(column):
    median = wmd_similarities[column].median()
    std = wmd_similarities[column].std()
    lower_bound = median - std
    upper_bound = median + std

    def grade(similarity):
        if similarity < lower_bound:
            return 0
        elif similarity < median:
            return 1
        elif similarity < upper_bound:
            return 2
        else:
            return 3

    return wmd_similarities[column].apply(grade)

graded_wmd = wmd_similarities.copy()

# Assign grades to each column in the dataframe
for column in wmd_similarities.columns[4:]:
    graded_wmd[f"{column}_grade"] = assign_grades(column)

print(graded_wmd[['max_cbow_grade', 'mean_cbow_grade', 'median_cbow_grade',
       'min_cbow_grade', 'max_glove_grade', 'mean_glove_grade',
       'median_glove_grade', 'min_glove_grade', 'max_skip_grade',
       'mean_skip_grade', 'median_skip_grade', 'min_skip_grade']])

     max_cbow_grade  mean_cbow_grade  median_cbow_grade  min_cbow_grade  \
0                 2                2                  2               2   
1                 1                1                  1               1   
2                 2                2                  2               3   
3                 3                3                  3               2   
4                 2                2                  2               2   
..              ...              ...                ...             ...   
119               0                0                  0               0   
120               1                2                  2               2   
121               1                1                  1               1   
122               2                2                  2               2   
123               2                2                  2               2   

     max_glove_grade  mean_glove_grade  median_glove_grade  min_glove_grade  \
0                  2

In [104]:
def assign_grades(column):
    median = cosine_similarities[column].median()
    std = cosine_similarities[column].std()
    lower_bound = median - std
    upper_bound = median + std

    def grade(similarity):
        if similarity < lower_bound+0.1:
            return 0
        elif similarity < median-0.05:
            return 1
        elif similarity < upper_bound-0.1:
            return 2
        else:
            return 3

    return cosine_similarities[column].apply(grade)

graded_cosines = cosine_similarities.copy()

# Assign grades to each column in the dataframe
for column in cosine_similarities.columns[4:]:
    graded_cosines[f"{column}_grade"] = assign_grades(column)

print(graded_cosines[['max_cbow_grade', 'mean_cbow_grade', 'median_cbow_grade',
       'min_cbow_grade', 'max_glove_grade', 'mean_glove_grade',
       'median_glove_grade', 'min_glove_grade', 'max_skip_grade',
       'mean_skip_grade', 'median_skip_grade', 'min_skip_grade']])

     max_cbow_grade  mean_cbow_grade  median_cbow_grade  min_cbow_grade  \
0                 3                3                  3               3   
1                 2                2                  2               2   
2                 3                3                  3               3   
3                 3                2                  3               2   
4                 2                2                  2               3   
..              ...              ...                ...             ...   
119               0                0                  0               0   
120               2                2                  2               2   
121               2                2                  2               3   
122               2                2                  2               2   
123               2                1                  1               0   

     max_glove_grade  mean_glove_grade  median_glove_grade  min_glove_grade  \
0                  2

In [60]:
columns = ['max', 'mean', 'median', 'min']

for size in columns:
  mean_cbow_cbow = graded_cosines[f'{size}_cbow'].mean()
  median_cbow_cbow = graded_cosines[f'{size}_cbow'].median()
  std_cbow_cbow = graded_cosines[f'{size}_cbow'].std()
  min_cbow_cbow = graded_cosines[f'{size}_cbow'].min()
  max_cbow_cbow = graded_cosines[f'{size}_cbow'].max()

  print(f'{size.capitalize()} statistics:')
  print(f'Mean cbow: {mean_cbow_cbow:.2f}')
  print(f'Median cbow: {median_cbow_cbow:.2f}')
  print(f'Min cbow: {min_cbow_cbow:.2f}')
  print(f'Max cbow: {max_cbow_cbow:.2f}')
  print(f'Standard deviation cbow: {std_cbow_cbow:.2f}')
  print()

Max statistics:
Mean cbow: 0.66
Median cbow: 0.74
Min cbow: 0.04
Max cbow: 1.00
Standard deviation cbow: 0.22

Mean statistics:
Mean cbow: 0.60
Median cbow: 0.68
Min cbow: -0.01
Max cbow: 0.86
Standard deviation cbow: 0.21

Median statistics:
Mean cbow: 0.61
Median cbow: 0.68
Min cbow: -0.01
Max cbow: 0.87
Standard deviation cbow: 0.21

Min statistics:
Mean cbow: 0.54
Median cbow: 0.61
Min cbow: -0.05
Max cbow: 0.82
Standard deviation cbow: 0.21



In [106]:
def similarity_to_score(similarity):
    if similarity <= 0.3 :
        score = 0
    elif similarity <= 0.6:
        score = 1
    elif similarity < 0.9:
        score = 2
    else:
        score = 3
    return score

graded_wmd['grade'] = graded_wmd['notas'].apply(similarity_to_score)

In [107]:
def similarity_to_score(similarity):
    if similarity <= 0.3 :
        score = 0
    elif similarity <= 0.6:
        score = 1
    elif similarity < 0.9:
        score = 2
    else:
        score = 3
    return score

graded_cosines['grade'] = graded_cosines['notas'].apply(similarity_to_score)

In [48]:
from sklearn.metrics import cohen_kappa_score

In [108]:
Kappa_max = cohen_kappa_score(graded_wmd['grade'], graded_wmd['max_cbow_grade'], weights='linear')
Kappa_mean = cohen_kappa_score(graded_wmd['grade'], graded_wmd['mean_cbow_grade'], weights='linear')
Kappa_median = cohen_kappa_score(graded_wmd['grade'], graded_wmd['median_cbow_grade'], weights='linear')
Kappa_min = cohen_kappa_score(graded_wmd['grade'], graded_wmd['min_cbow_grade'], weights='linear')


# Print the cbow Cohen's Kappa metric
print("Max cbow Cohen's Kappa:", Kappa_max)
print("Mean cbow Cohen's Kappa:", Kappa_mean)
print("Median cbow Cohen's Kappa:", Kappa_median)
print("Min cbow Cohen's Kappa:", Kappa_min)
print('\n')

Kappa_max = cohen_kappa_score(graded_wmd['grade'], graded_wmd['max_glove_grade'], weights='linear')
Kappa_mean = cohen_kappa_score(graded_wmd['grade'], graded_wmd['mean_glove_grade'], weights='linear')
Kappa_median = cohen_kappa_score(graded_wmd['grade'], graded_wmd['median_glove_grade'], weights='linear')
Kappa_min = cohen_kappa_score(graded_wmd['grade'], graded_wmd['min_glove_grade'], weights='linear')


# Print the cbow Cohen's Kappa metric
print("Max glove Cohen's Kappa:", Kappa_max)
print("Mean glove Cohen's Kappa:", Kappa_mean)
print("Median glove Cohen's Kappa:", Kappa_median)
print("Min glove Cohen's Kappa:", Kappa_min)
print('\n')


Kappa_max = cohen_kappa_score(graded_wmd['grade'], graded_wmd['max_skip_grade'], weights='linear')
Kappa_mean = cohen_kappa_score(graded_wmd['grade'], graded_wmd['mean_skip_grade'], weights='linear')
Kappa_median = cohen_kappa_score(graded_wmd['grade'], graded_wmd['median_skip_grade'], weights='linear')
Kappa_min = cohen_kappa_score(graded_wmd['grade'], graded_wmd['min_skip_grade'], weights='linear')


# Print the cbow Cohen's Kappa metric
print("Max skip Cohen's Kappa:", Kappa_max)
print("Mean skip Cohen's Kappa:", Kappa_mean)
print("Median skip Cohen's Kappa:", Kappa_median)
print("Min skip Cohen's Kappa:", Kappa_min)
print('\n')

Max cbow Cohen's Kappa: 0.3282844220048384
Mean cbow Cohen's Kappa: 0.3810846969229186
Median cbow Cohen's Kappa: 0.39016393442622943
Min cbow Cohen's Kappa: 0.3852509812022309


Max glove Cohen's Kappa: 0.34791754312158174
Mean glove Cohen's Kappa: 0.3710421149707961
Median glove Cohen's Kappa: 0.3774590163934426
Min glove Cohen's Kappa: 0.38133936837773885


Max skip Cohen's Kappa: 0.33123887318043765
Mean skip Cohen's Kappa: 0.37733142037302725
Median skip Cohen's Kappa: 0.39016393442622943
Min skip Cohen's Kappa: 0.38651433234724064




In [109]:
Kappa_max = cohen_kappa_score(graded_wmd['grade'], graded_wmd['max_cbow_grade'], weights='quadratic')
Kappa_mean = cohen_kappa_score(graded_wmd['grade'], graded_wmd['mean_cbow_grade'], weights='quadratic')
Kappa_median = cohen_kappa_score(graded_wmd['grade'], graded_wmd['median_cbow_grade'], weights='quadratic')
Kappa_min = cohen_kappa_score(graded_wmd['grade'], graded_wmd['min_cbow_grade'], weights='quadratic')


# Print the cbow Cohen's Kappa metric
print("Max cbow Cohen's Kappa:", Kappa_max)
print("Mean cbow Cohen's Kappa:", Kappa_mean)
print("Median cbow Cohen's Kappa:", Kappa_median)
print("Min cbow Cohen's Kappa:", Kappa_min)
print('\n')

Kappa_max = cohen_kappa_score(graded_wmd['grade'], graded_wmd['max_glove_grade'], weights='quadratic')
Kappa_mean = cohen_kappa_score(graded_wmd['grade'], graded_wmd['mean_glove_grade'], weights='quadratic')
Kappa_median = cohen_kappa_score(graded_wmd['grade'], graded_wmd['median_glove_grade'], weights='quadratic')
Kappa_min = cohen_kappa_score(graded_wmd['grade'], graded_wmd['min_glove_grade'], weights='quadratic')


# Print the cbow Cohen's Kappa metric
print("Max glove Cohen's Kappa:", Kappa_max)
print("Mean glove Cohen's Kappa:", Kappa_mean)
print("Median glove Cohen's Kappa:", Kappa_median)
print("Min glove Cohen's Kappa:", Kappa_min)
print('\n')


Kappa_max = cohen_kappa_score(graded_wmd['grade'], graded_wmd['max_skip_grade'], weights='quadratic')
Kappa_mean = cohen_kappa_score(graded_wmd['grade'], graded_wmd['mean_skip_grade'], weights='quadratic')
Kappa_median = cohen_kappa_score(graded_wmd['grade'], graded_wmd['median_skip_grade'], weights='quadratic')
Kappa_min = cohen_kappa_score(graded_wmd['grade'], graded_wmd['min_skip_grade'], weights='quadratic')


# Print the cbow Cohen's Kappa metric
print("Max skip Cohen's Kappa:", Kappa_max)
print("Mean skip Cohen's Kappa:", Kappa_mean)
print("Median skip Cohen's Kappa:", Kappa_median)
print("Min skip Cohen's Kappa:", Kappa_min)
print('\n')

Max cbow Cohen's Kappa: 0.4498119417019276
Mean cbow Cohen's Kappa: 0.5075591586327782
Median cbow Cohen's Kappa: 0.5121793489976287
Min cbow Cohen's Kappa: 0.5140431090790333


Max glove Cohen's Kappa: 0.47691586594797275
Mean glove Cohen's Kappa: 0.5074578469520103
Median glove Cohen's Kappa: 0.5121793489976287
Min glove Cohen's Kappa: 0.5265381282495667


Max skip Cohen's Kappa: 0.4577154772937905
Mean skip Cohen's Kappa: 0.5027097333622372
Median skip Cohen's Kappa: 0.5121793489976287
Min skip Cohen's Kappa: 0.5166525243954179




In [110]:
Kappa_max = cohen_kappa_score(graded_cosines['grade'], graded_cosines['max_cbow_grade'], weights='linear')
Kappa_mean = cohen_kappa_score(graded_cosines['grade'], graded_cosines['mean_cbow_grade'], weights='linear')
Kappa_median = cohen_kappa_score(graded_cosines['grade'], graded_cosines['median_cbow_grade'], weights='linear')
Kappa_min = cohen_kappa_score(graded_cosines['grade'], graded_cosines['min_cbow_grade'], weights='linear')


# Print the cbow Cohen's Kappa metric
print("Max cbow Cohen's Kappa:", Kappa_max)
print("Mean cbow Cohen's Kappa:", Kappa_mean)
print("Median cbow Cohen's Kappa:", Kappa_median)
print("Min cbow Cohen's Kappa:", Kappa_min)
print('\n')

Kappa_max = cohen_kappa_score(graded_cosines['grade'], graded_cosines['max_glove_grade'], weights='linear')
Kappa_mean = cohen_kappa_score(graded_cosines['grade'], graded_cosines['mean_glove_grade'], weights='linear')
Kappa_median = cohen_kappa_score(graded_cosines['grade'], graded_cosines['median_glove_grade'], weights='linear')
Kappa_min = cohen_kappa_score(graded_cosines['grade'], graded_cosines['min_glove_grade'], weights='linear')


# Print the cbow Cohen's Kappa metric
print("Max glove Cohen's Kappa:", Kappa_max)
print("Mean glove Cohen's Kappa:", Kappa_mean)
print("Median glove Cohen's Kappa:", Kappa_median)
print("Min glove Cohen's Kappa:", Kappa_min)
print('\n')


Kappa_max = cohen_kappa_score(graded_cosines['grade'], graded_cosines['max_skip_grade'], weights='linear')
Kappa_mean = cohen_kappa_score(graded_cosines['grade'], graded_cosines['mean_skip_grade'], weights='linear')
Kappa_median = cohen_kappa_score(graded_cosines['grade'], graded_cosines['median_skip_grade'], weights='linear')
Kappa_min = cohen_kappa_score(graded_cosines['grade'], graded_cosines['min_skip_grade'], weights='linear')


# Print the cbow Cohen's Kappa metric
print("Max skip Cohen's Kappa:", Kappa_max)
print("Mean skip Cohen's Kappa:", Kappa_mean)
print("Median skip Cohen's Kappa:", Kappa_median)
print("Min skip Cohen's Kappa:", Kappa_min)
print('\n')

Max cbow Cohen's Kappa: 0.4364239118810823
Mean cbow Cohen's Kappa: 0.4177796327212019
Median cbow Cohen's Kappa: 0.4488305640808551
Min cbow Cohen's Kappa: 0.3597994987468671


Max glove Cohen's Kappa: 0.3828583403185247
Mean glove Cohen's Kappa: 0.3720194133783499
Median glove Cohen's Kappa: 0.3678789152827412
Min glove Cohen's Kappa: 0.3233113673805601


Max skip Cohen's Kappa: 0.40740740740740755
Mean skip Cohen's Kappa: 0.39669153421991565
Median skip Cohen's Kappa: 0.4093959731543625
Min skip Cohen's Kappa: 0.3332650553052029




In [111]:
Kappa_max = cohen_kappa_score(graded_cosines['grade'], graded_cosines['max_cbow_grade'], weights='quadratic')
Kappa_mean = cohen_kappa_score(graded_cosines['grade'], graded_cosines['mean_cbow_grade'], weights='quadratic')
Kappa_median = cohen_kappa_score(graded_cosines['grade'], graded_cosines['median_cbow_grade'], weights='quadratic')
Kappa_min = cohen_kappa_score(graded_cosines['grade'], graded_cosines['min_cbow_grade'], weights='quadratic')


# Print the cbow Cohen's Kappa metric
print("Max cbow Cohen's Kappa:", Kappa_max)
print("Mean cbow Cohen's Kappa:", Kappa_mean)
print("Median cbow Cohen's Kappa:", Kappa_median)
print("Min cbow Cohen's Kappa:", Kappa_min)
print('\n')

Kappa_max = cohen_kappa_score(graded_cosines['grade'], graded_cosines['max_glove_grade'], weights='quadratic')
Kappa_mean = cohen_kappa_score(graded_cosines['grade'], graded_cosines['mean_glove_grade'], weights='quadratic')
Kappa_median = cohen_kappa_score(graded_cosines['grade'], graded_cosines['median_glove_grade'], weights='quadratic')
Kappa_min = cohen_kappa_score(graded_cosines['grade'], graded_cosines['min_glove_grade'], weights='quadratic')


# Print the cbow Cohen's Kappa metric
print("Max glove Cohen's Kappa:", Kappa_max)
print("Mean glove Cohen's Kappa:", Kappa_mean)
print("Median glove Cohen's Kappa:", Kappa_median)
print("Min glove Cohen's Kappa:", Kappa_min)
print('\n')


Kappa_max = cohen_kappa_score(graded_cosines['grade'], graded_cosines['max_skip_grade'], weights='quadratic')
Kappa_mean = cohen_kappa_score(graded_cosines['grade'], graded_cosines['mean_skip_grade'], weights='quadratic')
Kappa_median = cohen_kappa_score(graded_cosines['grade'], graded_cosines['median_skip_grade'], weights='quadratic')
Kappa_min = cohen_kappa_score(graded_cosines['grade'], graded_cosines['min_skip_grade'], weights='quadratic')


# Print the cbow Cohen's Kappa metric
print("Max skip Cohen's Kappa:", Kappa_max)
print("Mean skip Cohen's Kappa:", Kappa_mean)
print("Median skip Cohen's Kappa:", Kappa_median)
print("Min skip Cohen's Kappa:", Kappa_min)
print('\n')

Max cbow Cohen's Kappa: 0.547228144989339
Mean cbow Cohen's Kappa: 0.5203218485661234
Median cbow Cohen's Kappa: 0.5710691823899371
Min cbow Cohen's Kappa: 0.45518954729481054


Max glove Cohen's Kappa: 0.4870503597122302
Mean glove Cohen's Kappa: 0.4420126815299652
Median glove Cohen's Kappa: 0.44051220004033076
Min glove Cohen's Kappa: 0.41174145087962044


Max skip Cohen's Kappa: 0.5380338078291815
Mean skip Cohen's Kappa: 0.5171127687582273
Median skip Cohen's Kappa: 0.5316596311930684
Min skip Cohen's Kappa: 0.4388189738625363




In [None]:
Kappa_max = cohen_kappa_score(graded_cosines['grade'], graded_cosines['max_cbow_grade'], weights='linear')
Kappa_mean = cohen_kappa_score(graded_cosines['grade'], graded_cosines['mean_cbow_grade'], weights='linear')
Kappa_median = cohen_kappa_score(graded_cosines['grade'], graded_cosines['median_cbow_grade'], weights='linear')
Kappa_min = cohen_kappa_score(graded_cosines['grade'], graded_cosines['min_cbow_grade'], weights='linear')


# Print the cbow Cohen's Kappa metric
print("Max cbow Cohen's Kappa:", Kappa_max)
print("Mean cbow Cohen's Kappa:", Kappa_mean)
print("Median cbow Cohen's Kappa:", Kappa_median)
print("Min cbow Cohen's Kappa:", Kappa_min)
print('\n')

Kappa_max = cohen_kappa_score(graded_cosines['grade'], graded_cosines['max_glove_grade'], weights='linear')
Kappa_mean = cohen_kappa_score(graded_cosines['grade'], graded_cosines['mean_glove_grade'], weights='linear')
Kappa_median = cohen_kappa_score(graded_cosines['grade'], graded_cosines['median_glove_grade'], weights='linear')
Kappa_min = cohen_kappa_score(graded_cosines['grade'], graded_cosines['min_glove_grade'], weights='linear')


# Print the cbow Cohen's Kappa metric
print("Max glove Cohen's Kappa:", Kappa_max)
print("Mean glove Cohen's Kappa:", Kappa_mean)
print("Median glove Cohen's Kappa:", Kappa_median)
print("Min glove Cohen's Kappa:", Kappa_min)
print('\n')


Kappa_max = cohen_kappa_score(graded_cosines['grade'], graded_cosines['max_skip_grade'], weights='linear')
Kappa_mean = cohen_kappa_score(graded_cosines['grade'], graded_cosines['mean_skip_grade'], weights='linear')
Kappa_median = cohen_kappa_score(graded_cosines['grade'], graded_cosines['median_skip_grade'], weights='linear')
Kappa_min = cohen_kappa_score(graded_cosines['grade'], graded_cosines['min_skip_grade'], weights='linear')


# Print the cbow Cohen's Kappa metric
print("Max skip Cohen's Kappa:", Kappa_max)
print("Mean skip Cohen's Kappa:", Kappa_mean)
print("Median skip Cohen's Kappa:", Kappa_median)
print("Min skip Cohen's Kappa:", Kappa_min)
print('\n')

Max Cohen's Kappa: 0.5146926729115873
Mean Cohen's Kappa: 0.4729764019284446
Median Cohen's Kappa: 0.49342525213838884
Min Cohen's Kappa: 0.44695528068506185


In [66]:
zero = 0
um = 0
dois = 0
tres = 0

def count_result(result):
    global zero, um, dois, tres
    if result == 0:
        zero = zero + 1
    elif result == 1:
        um = um + 1
    elif result == 2:
        dois = dois + 1
    elif result == 3:
        tres = tres + 1
    return result
graded_cosines['grade'].apply(count_result)
print(zero, um, dois, tres)

21 9 56 38


In [113]:
zero = 0
um = 0
dois = 0
tres = 0

def count_result(result):
    global zero, um, dois, tres
    if result == 0:
        zero = zero + 1
    elif result == 1:
        um = um + 1
    elif result == 2:
        dois = dois + 1
    elif result == 3:
        tres = tres + 1
    return result
graded_cosines['max_cbow_grade'].apply(count_result)
print(zero, um, dois, tres)

27 13 64 20
