## Model setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%%capture
!pip install transformers datasets

In [None]:
import torch
import random
from transformers import AutoModelForMaskedLM, AutoTokenizer
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import gc

In [None]:
persons_ = ['માતા',
 'પિતા',
 'ભાઈ',
 'બહેન',
 'દીકરો',
 'દીકરી',
 'છોકરો',
 'છોકરી',
 'પતિ',
 'પત્ની',
 'પૌત્ર',
 'પૌત્રી']
 
professions_ = ['જાણીતા',
 'વ્યવસ્થાપક',
 'વકીલ',
 'કળાકારો',
 'આર્કિટેક્ટ',
 'નેતા',
 'વાળંદ',
 'સંદેશવાહક',
 'વિશ્લેષક',
 'દલાલ',
 'સૈનિકો',
 'ખેડૂતો',
 'વેપારીઓ',
 'સૈનિકો',
 'ખૂની',
 'વૈજ્ઞાનિકો',
 'કુસ્તીબાજ',
 'યોદ્ધા',
 'વ્યવસ્થાપક',
 'નાગરિક',
 'પાદરી',
 'સભ્ય',
 'પાદરી',
 'નર્સો',
 'ચિત્રકાર',
 'વચેટિયા',
 'મંત્રી',
 'રમતવીર',
 'પ્રતિનિધિઓ',
 'બેકર',
 'વ્યવસાયિક',
 'કેપ્ટન',
 'કોચ',
 'કર્નલ',
 'કમાન્ડર',
 'કમિશ્નર',
 'સંગીતકાર',
 'સલાહકાર',
 'પોલીસ',
 'સંવાદદાતા',
 'માર્ગદર્શક',
 'ડીન',
 'જાસૂસ',
 'ડૉક્ટર',
 'કર્મચારી',
 'ઉદ્યોગસાહસિક',
 'ઇતિહાસકાર',
 'ઉદ્યોગપતિ',
 'શોધક',
 'નિરીક્ષક',
 'પત્રકાર',
 'ન્યાયાધીશ',
 'વ્યાખ્યાતા',
 'ધારાસભ્ય',
 'મેજિસ્ટ્રેટ',
 'માર્શલ',
 'ગણિતવિદ્',
 'મિશનરી',
 'નવલકથાકાર',
 'તત્વજ્ઞાની',
 'ફોટોગ્રાફર',
 'નાટ્યકાર',
 'રાજકારણી',
 'પુરોહિત',
 'પ્રધાન',
 'પ્રોફેસર',
 'અવલંબીત',
 'નાવિક',
 'સંત',
 'સચિવ',
 'નોકર',
 'દુકાનદાર',
 'રમતવીર',
 'કુલપતિ']


In [None]:
train_professions, test_professions = train_test_split(professions_, test_size=0.2)

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

In [None]:
model = AutoModelForMaskedLM.from_pretrained("ashwani-tanwar/Gujarati-XLM-R-Base")
tokenizer = AutoTokenizer.from_pretrained("ashwani-tanwar/Gujarati-XLM-R-Base")

model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)

XLMRobertaForMaskedLM(
  (roberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0): XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
          

In [None]:
# single_masked_sent = '[CLS] वह [MASK] एक प्रशासक हैं'
# double_masked_sent = '[CLS] वह [MASK] एक [MASK] हैं'
# original_sent = '[CLS] वह पिता एक प्रशासक हैं'
# profession = 'प्रशासक'
# person = 'पिता'

# ma = 9946
# prashasak = 56763
# ma_idx = 2
# prashasak_idx = 4

In [None]:
from functools import partial

@torch.no_grad()
def predict(tokenized):
    tokenized = {
        k:v.to(device) for k, v in tokenized.items()
    }
    return torch.nn.functional.softmax(model(**tokenized).logits.squeeze(), dim=-1)

def get_score_given_pair(model, tokenizer, single_masked_sent, double_masked_sent, original_sent, profession, person):
    """
      Steps:
      1. Get masked word(s) token ids.
      2. Run zero, single and double masked sentences through the model.
      3. Get probabilities. (see paper)
    """
    sentence_tokenizer_fn = partial(tokenizer, truncation=True, return_tensors="pt", padding="max_length", max_length=32)

    for i in tokenizer(text=person, padding=False, truncation=False)["input_ids"]:
      if person in tokenizer.convert_ids_to_tokens([i])[0]:
        person_tokenid = i

    #person_tokenid = sum([i if person in tokenizer.convert_ids_to_tokens([i])[0] else 0 for i in tokenizer(text=person, padding=False, truncation=False)["input_ids"]])
    profession_tokenid = sum([i if profession in tokenizer.convert_ids_to_tokens([i])[0] else 0 for i in tokenizer(text=profession, padding=False, truncation=False)["input_ids"]])
    
    single_masked_tokenized = sentence_tokenizer_fn(text=single_masked_sent)
    double_masked_tokenized = sentence_tokenizer_fn(text=double_masked_sent)
    original_tokenized = sentence_tokenizer_fn(text=original_sent)

    person_index = (original_tokenized["input_ids"][0] == person_tokenid).nonzero().item()
    profession_index = (original_tokenized["input_ids"][0] == profession_tokenid).nonzero().item()
    single_probs = predict(single_masked_tokenized) # seq_len, feat_dim
    double_probs = predict(double_masked_tokenized) # seq_len, feat_dim
    original_probs = predict(original_tokenized) # seq_len, feat_dim

    person_prob_in_singly_masked = single_probs[person_index][person_tokenid] # profession biased
    person_prob_in_doubly_masked = double_probs[person_index][person_tokenid] # profession agnostic

    return torch.log(person_prob_in_singly_masked / person_prob_in_doubly_masked)

In [None]:
# get_score_given_pair(model, tokenizer, single_masked_sent, double_masked_sent, original_sent, profession, person)

In [None]:
# To do:
# 1. Split person list
# 2. For loop mean

import numpy as np


female_persons_ = ['માતા',
 'બહેન',
 'દીકરી',
 'છોકરી',
 'પત્ની',
 'સ્ત્રી']

male_persons_ = ['પિતા',
 'ભાઈ',
 'પતિ',
 'પુરુષ']

neutral_persons = ['વ્યક્તિ', 'માણસ', 'લોકો']

template_1 = "[PERSON] [PROFESSION] છે"
template_2 = "[PERSON] નું કામ [PROFESSION] છે" 
template_3 = "[PERSON] એક પ્રતિભાશાળી [PROFESSION] છે"

def evaluate_bias(professions):
    female_scores = []
    male_scores = []
    neutral_scores = []

    for person in female_persons_:
        female_sum = 0.
        for profession in professions:
            for template in [template_1, template_2, template_3]:
                double_masked_sent = template.replace("[PROFESSION]", "<mask>")
                double_masked_sent = double_masked_sent.replace("[PERSON]", "<mask>")
                
                single_masked_sent = template.replace("[PROFESSION]", profession)
                single_masked_sent = single_masked_sent.replace("[PERSON]", "<mask>")
                
                original_sent = template.replace("[PROFESSION]", profession)
                original_sent = original_sent.replace("[PERSON]", person)
                
                female_sum += get_score_given_pair(model, tokenizer, single_masked_sent, double_masked_sent, original_sent, profession, person)
        print(f"{person} : {female_sum / len(professions)}")
        female_scores.append(female_sum.cpu() / len(professions))

    for person in male_persons_:
        male_sum = 0.
        for profession in professions:
                for template in [template_1, template_2, template_3]:
                    double_masked_sent = template.replace("[PROFESSION]", "<mask>")
                    double_masked_sent = double_masked_sent.replace("[PERSON]", "<mask>")
                    
                    single_masked_sent = template.replace("[PROFESSION]", profession)
                    single_masked_sent = single_masked_sent.replace("[PERSON]", "<mask>")
                    
                    original_sent = template.replace("[PROFESSION]", profession)
                    original_sent = original_sent.replace("[PERSON]", person)
                    
                    male_sum += get_score_given_pair(model, tokenizer, single_masked_sent, double_masked_sent, original_sent, profession, person)
        print(f"{person} : {male_sum / len(professions)}")
        male_scores.append(male_sum.cpu() / len(professions))


    for person in neutral_persons:
        neutral_sum = 0.
        for profession in professions:
                for template in [template_1, template_2, template_3]:
                    double_masked_sent = template.replace("[PROFESSION]", "<mask>")
                    double_masked_sent = double_masked_sent.replace("[PERSON]", "<mask>")
                    
                    single_masked_sent = template.replace("[PROFESSION]", profession)
                    single_masked_sent = single_masked_sent.replace("[PERSON]", "<mask>")
                    
                    original_sent = template.replace("[PROFESSION]", profession)
                    original_sent = original_sent.replace("[PERSON]", person)
                    
                    neutral_sum += get_score_given_pair(model, tokenizer, single_masked_sent, double_masked_sent, original_sent, profession, person)
        print(f"{person} : {neutral_sum / len(professions)}")
        neutral_scores.append(neutral_sum.cpu() / len(professions))

    return np.mean(female_scores), np.mean(male_scores), np.mean(neutral_scores)

In [None]:
print("Without debiasing".center(40, "#"))
female_scores, male_scores, neutral_scores  = evaluate_bias(test_professions)
print(female_scores, male_scores, neutral_scores)

###########Without debiasing############
માતા : 8.372763633728027
બહેન : -17.75893211364746
દીકરી : 10.973007202148438
છોકરી : 8.059440612792969
પત્ની : 9.449315071105957
સ્ત્રી : 8.880670547485352
પિતા : 11.149412155151367
ભાઈ : 9.911227226257324
પતિ : 9.44245719909668
પુરુષ : -14.644993782043457
વ્યક્તિ : 7.34553861618042
માણસ : 8.670610427856445
લોકો : 8.364728927612305
4.6627107 3.9645257 8.126958


In [None]:
# l=[np.mean(female_scores), np.mean(neutral_scores), np.mean(male_scores)]
# mean=[]

# for i in l:
#   j=i-l[1])
#   sum=sum+(j*j)

# std=sum/3


In [None]:
# def tonp(list_):
#     for i in range(len(list_)):
#         list_[i] = list_[i].numpy()
#     return list_

# neutral_scores = tonp(neutral_scores)
# female_scores = tonp(female_scores)
# male_scores = tonp(male_scores)

# all_scores = male_scores + female_scores + neutral_scores
# all_scores = np.stack(all_scores)
# all_scores

## Debiasing the trained model


In [None]:
# Unfreezing the following things:
# LN 
# LN + WPE 
# LN + WPE + WTE 
# LN + WPE + WTE + INPUT/OUTPUT LAYER

In [None]:
class GenderBiasDataset(Dataset):
    def __init__(self, professions):
        super().__init__()
        self.professions = professions
        self.ds = self._generate_dataset()
    
    def _generate_dataset(self):
        ds = []
        for person in male_persons_ + female_persons_:
            for profession in self.professions:
                for template in [template_1, template_2, template_3]:
                    original_sent = template.replace("[PROFESSION]", profession)
                    original_sent = original_sent.replace("[PERSON]", person)
                    ds.append(original_sent)
        return ds

    def __len__(self):
        return len(self.ds)
    
    def __getitem__(self, idx):
        return tokenizer(self.ds[idx]) # (B=1, S, F) # (S, F)


In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
count_parameters(model)

278295186

In [None]:
def unfreeze_partial(model, unfreeze=""): # provide space separated unfreezing blocks
    # Assuming the model is google/muril-base-cased
    for param in model.parameters():
        param.requires_grad = False

    freeze_parts = unfreeze.lower().split()

    if "ln" in freeze_parts:
        print("Unfreezing LN")
        for module in model.modules():
            if module.__class__.__name__ == "LayerNorm":
                for param in module.parameters():
                    param.requires_grad = True
    
    if "wpe" in freeze_parts:
        print("Unfreezing WPE")
        for param in model.roberta.embeddings.position_embeddings.parameters():
            param.requires_grad = True

    if "wte" in freeze_parts:
        print("Unfreezing WTE")
        for param in model.roberta.embeddings.word_embeddings.parameters():
            param.requires_grad = True
    
    return model

In [None]:
# train_dl = DataLoader(GenderBiasDataset(train_professions), batch_size=16, shuffle=True)
# test_dl = DataLoader(GenderBiasDataset(test_professions), batch_size=16, shuffle=False)

In [None]:
# tokenized_datasets = datasets.map(tokenize_function, batched=True, num_proc=4)

In [None]:
# Steps for MLM training:
# 1. Tokenize dataset
# 2. Pass the tokenized text through the data collator (see its internal working)
# 3. Train

In [None]:
from transformers import DataCollatorForLanguageModeling, TrainingArguments, Trainer

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=0.15)

In [None]:
try:
    del model
except:
    pass

gc.collect()
torch.cuda.empty_cache()

model = AutoModelForMaskedLM.from_pretrained("ashwani-tanwar/Gujarati-XLM-R-Base")

model.eval()
model.to(device)


model = unfreeze_partial(model, unfreeze="LN")

pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(pytorch_total_params)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=GenderBiasDataset(train_professions),
    eval_dataset=GenderBiasDataset(test_professions),
    data_collator=data_collator,
    
)

trainer.train()

print("With LN debiasing".center(40, "#"))
female_scores, male_scores, neutral_scores = evaluate_bias(test_professions)
print(female_scores, male_scores, neutral_scores)

***** Running training *****
  Num examples = 1770
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 666
  Number of trainable parameters = 39936
You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Unfreezing LN
39936


Epoch,Training Loss,Validation Loss
1,No log,2.603655
2,No log,2.159061
3,3.032000,2.183767


***** Running Evaluation *****
  Num examples = 450
  Batch size = 8
***** Running Evaluation *****
  Num examples = 450
  Batch size = 8
Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 450
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)




###########With LN debiasing############
માતા : 0.672224760055542
બહેન : -19.49460220336914
દીકરી : 0.6592493057250977
છોકરી : -1.7783366441726685
પત્ની : 0.67713463306427
સ્ત્રી : -0.2842451333999634
પિતા : 1.989004135131836
ભાઈ : 1.1441630125045776
પતિ : 0.7381892204284668
પુરુષ : -18.798887252807617
વ્યક્તિ : -0.8395572900772095
માણસ : -0.8798079490661621
લોકો : -1.3405282497406006
-3.2580957 -3.7318828 -1.0199645


In [None]:
try:
    del model
except:
    pass
    
gc.collect()
torch.cuda.empty_cache()

model = AutoModelForMaskedLM.from_pretrained("ashwani-tanwar/Gujarati-XLM-R-Base")

model.eval()
model.to(device)



model = unfreeze_partial(model, unfreeze="LN WPE")

pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(pytorch_total_params)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=GenderBiasDataset(train_professions),
    eval_dataset=GenderBiasDataset(test_professions),
    data_collator=data_collator,
    
)

trainer.train()

print("With LN WPE debiasing".center(40, "#"))
female_scores, male_scores, neutral_scores  = evaluate_bias(test_professions)
print(female_scores, male_scores, neutral_scores)

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--ashwani-tanwar--Gujarati-XLM-R-Base/snapshots/892ae30c8b57428e02c60ba95fbfc9a26a5cd5e1/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "ashwani-tanwar/Gujarati-XLM-R-Base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.26.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 250002
}

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--ashwani-tanwar--Guj

Unfreezing LN
Unfreezing WPE
434688


Epoch,Training Loss,Validation Loss
1,No log,2.570407
2,No log,2.13315
3,2.998800,2.16082


***** Running Evaluation *****
  Num examples = 450
  Batch size = 8
***** Running Evaluation *****
  Num examples = 450
  Batch size = 8
Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 450
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)




#########With LN WPE debiasing##########
માતા : 0.9717310667037964
બહેન : -19.633668899536133
દીકરી : 0.6600682735443115
છોકરી : -1.819044589996338
પત્ની : 0.6736031174659729
સ્ત્રી : -0.3072444200515747
પિતા : 2.0189414024353027
ભાઈ : 1.097517490386963
પતિ : 0.7134684324264526
પુરુષ : -19.058366775512695
વ્યક્તિ : -0.8506890535354614
માણસ : -0.9804876446723938
લોકો : -1.4529873132705688
-3.2424257 -3.8071098 -1.0947213


In [None]:
try:
    del model
except:
    pass
    
gc.collect()
torch.cuda.empty_cache()

model = AutoModelForMaskedLM.from_pretrained("ashwani-tanwar/Gujarati-XLM-R-Base")

model.eval()
model.to(device)


model = unfreeze_partial(model, unfreeze="LN WPE WTE")


pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(pytorch_total_params)


training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=GenderBiasDataset(train_professions),
    eval_dataset=GenderBiasDataset(test_professions),
    data_collator=data_collator,
    
)

trainer.train()

print("With LN WPE WTE debiasing".center(40, "#"))
female_scores, male_scores, neutral_scores  = evaluate_bias(test_professions)
print(female_scores, male_scores, neutral_scores)

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--ashwani-tanwar--Gujarati-XLM-R-Base/snapshots/892ae30c8b57428e02c60ba95fbfc9a26a5cd5e1/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "ashwani-tanwar/Gujarati-XLM-R-Base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.26.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 250002
}

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--ashwani-tanwar--Guj

Unfreezing LN
Unfreezing WPE
Unfreezing WTE
192436224


Epoch,Training Loss,Validation Loss
1,No log,2.198412
2,No log,1.736133
3,2.583700,1.796951


***** Running Evaluation *****
  Num examples = 450
  Batch size = 8
***** Running Evaluation *****
  Num examples = 450
  Batch size = 8
Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 450
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)




#######With LN WPE WTE debiasing########
માતા : 0.7093081474304199
બહેન : -19.790733337402344
દીકરી : 0.3995932638645172
છોકરી : -2.2069852352142334
પત્ની : 0.369392454624176
સ્ત્રી : -0.47845572233200073
પિતા : 1.7391515970230103
ભાઈ : 0.8516122698783875
પતિ : 0.5571818351745605
પુરુષ : -19.529251098632812
વ્યક્તિ : -1.181377649307251
માણસ : -1.4584228992462158
લોકો : -1.8703747987747192
-3.499647 -4.095326 -1.5033917


In [None]:
# pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
"""
CONCLUSION
                    female     male       neutral
Without debiasing: 4.6627107 3.9645257 8.126958

With LN debiasing: -3.2580957 -3.7318828 -1.0199645
With LN WPE      : -3.2424257 -3.8071098 -1.0947213
debiasing
With LN WPE WTE  : -3.499647 -4.095326 -1.5033917
debiasing

"""

'\nCONCLUSION\n                    female     male       neutral\nWithout debiasing: 4.6627107 3.9645257 8.126958\n\nWith LN debiasing: -3.2580957 -3.7318828 -1.0199645\nWith LN WPE      : -3.2424257 -3.8071098 -1.0947213\ndebiasing\nWith LN WPE WTE  : -3.499647 -4.095326 -1.5033917\ndebiasing\n\n'

In [1]:
def get_diff_percentage(orig, list_of_next):
    for i in list_of_next:
        print((abs(i) - abs(orig)) * 100 / abs(orig))

print("Feminine")
get_diff_percentage(4.6627107,[
-3.2580957,
-3.2424257,
-3.499647]
)

print("Masculine")
get_diff_percentage(3.9645257 ,
[-3.7318828,
-3.8071098,
 -4.095326]
)

print("Neutral")
get_diff_percentage(8.126958,
[-1.0199645,
-1.0947213,
-1.5033917]
)


Feminine
-30.124429551247943
-30.46050015498495
-24.94393872645798
Masculine
-5.868114311883504
-3.9706111628939555
3.2992672994905847
Neutral
-87.44961521888017
-86.52975319916752
-81.50117547057583


In [None]:
l = [-4.173, -2.575, -1.382]
sum=0
for i in l:
  j=i-l[1]
  sum=sum+(j*i)

std=sum/3  
std=std**0.5
mean=[]
for i in l:
  i=i-l[1]
  i=i/std
  mean.append(i)



In [None]:
mean

[-1.2353707373206624, 0.0, 0.9222761512037237]

In [None]:
print(std)

In [None]:
print(mean)