In [1]:
!pip install spacy



In [2]:
import re
from nltk.corpus import stopwords
from gensim.models import Word2Vec
import gensim
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
import nltk
from nltk import pos_tag
from nltk.tokenize import word_tokenize
import spacy

In [3]:
# character name
# data of character name & gender
df = pd.read_csv('data/character.metadata.tsv', sep='\t', header=None)
df_name_gender = df.iloc[:, [3, 5]].dropna()
df_name_gender.columns = ['Name','Gender']
print(df_name_gender.head(5))

                         Name Gender
0                    Akooshay      F
1  Lieutenant Melanie Ballard      F
2         Desolation Williams      M
3          Sgt Jericho Butler      M
4             Bashira Kincaid      F


In [4]:
# build a name dict
name_list = df.iloc[:, [3]].dropna()
name_list=name_list.squeeze().tolist()
name_dict = set()
for item in list(name_list):
    names = item.split()
    for name in names:
            name_dict.add(name.lower())

In [5]:
# English vocabulary specific to a certain gender
# female_nouns.txt contains common words that are only used to refer to females, like 'mother'
female_words=set()
with open('data/female_nouns.txt', 'r') as file:
    for line in file:
        words = re.split('[^a-zA-Z]', line)
        for word in words:
            if word:
                female_words.add(word.lower())                
                
# male_nouns.txt contains common words that are only used to refer to males, like 'uncle'     
male_words=set()
with open('data/male_nouns.txt', 'r') as file:
    for line in file:
        words = re.split('[^a-zA-Z]', line)
        for word in words:
            if word:
                male_words.add(word.lower())

In [6]:
# Define a list of words representing females/males
# females
df_female_name = df_name_gender[df_name_gender['Gender']=='F']['Name']
female_name_dict = set()
for item in df_female_name:
    names = item.split()
    for name in names:
        # remove the names like 'Alice's father' that can have a negative impact on the result
        if not name.lower() in female_words | male_words:
            if not name.lower().endswith("'s"):
                female_name_dict.add(name.lower())
female_cleaned_list = [re.sub('[^a-zA-Z]', '', s) for s in female_name_dict]


# males
df_male_name = df_name_gender[df_name_gender['Gender']=='M']['Name']
male_name_dict = set()
for item in df_male_name:
    names = item.split()
    for name in names:
        if not name.lower() in female_words | male_words:
            if not name.lower().endswith("'s"):
                male_name_dict.add(name.lower())
male_cleaned_list = [re.sub('[^a-zA-Z]', '', s) for s in male_name_dict]

In [7]:
#some characters have the same family name but different gender,remove the family names
female_name_dict_cleaned = set(female_cleaned_list)
male_name_dict_cleaned = set(male_cleaned_list)

intersection_set = male_name_dict_cleaned & female_name_dict_cleaned
male_name_dict = male_name_dict_cleaned - intersection_set
female_name_dict = female_name_dict_cleaned - intersection_set
cleaned_list = [re.sub('[^a-zA-Z]', '', s) for s in name_dict]
print(len(cleaned_list))
cleaned_list = set(cleaned_list) - intersection_set
print(len(cleaned_list))

68422
53838


In [8]:
# code for storing the names
'''with open('data/names from movies.txt', 'w', encoding='utf-8') as file:
    for item in cleaned_list:
        file.write(item + '\n')
with open('data/female_name_dict1.txt', 'w', encoding='utf-8') as file:
    for name in female_name_dict:
        file.write(name + '\n')
with open('data/male_name_dict1.txt', 'w', encoding='utf-8') as file:
    for name in male_name_dict:
        file.write(name + '\n')'''

"with open('data/names from movies.txt', 'w', encoding='utf-8') as file:\n    for item in cleaned_list:\n        file.write(item + '\n')\nwith open('data/female_name_dict1.txt', 'w', encoding='utf-8') as file:\n    for name in female_name_dict:\n        file.write(name + '\n')\nwith open('data/male_name_dict1.txt', 'w', encoding='utf-8') as file:\n    for name in male_name_dict:\n        file.write(name + '\n')"

In [9]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/syldayang/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [10]:
# data preprocessing
# Set the stopwords
stop_words = set(stopwords.words('english'))

# a list of words representing females/males
female_names_words = female_words | female_name_dict
male_names_words = male_words | male_name_dict

# remove adjs in the name list
adjs_set = set()
with open('data/common_adjs.txt', 'r') as file:
    for line in file:
        word = line.strip().lower()
        adjs_set.add(word)
female_names_words = female_names_words - adjs_set
male_names_words = male_names_words - adjs_set

# function used to tokenize, replace words representing females with 'she' and males with 'he', and remove stopwords
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return ['she' if word in female_names_words else 'he' if word in male_names_words else word for word in text.split() if word not in stop_words]

# apply the fuction to the summaries
documents = []
with open('data/plot_summaries.txt', 'r', encoding='utf-8') as file:
    for line in file:
        _, text = line.split('\t', 1)
        processed_text = preprocess_text(text)
        documents.append(processed_text)

In [11]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /Users/syldayang/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [12]:
import nltk
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/syldayang/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [13]:
# code to extract adjs(the outcome is stored in data/adjectives.txt, so you can skip this)
# 
female_name_dict = []
with open('data/female_name_dict1.txt', 'r', encoding='utf-8') as file:
    for line in file:
        female_name_dict.append(line.strip())
female_name_dict=set(female_name_dict)
male_name_dict = []
with open('data/male_name_dict1.txt', 'r', encoding='utf-8') as file:
    for line in file:
        male_name_dict.append(line.strip())
male_name_dict=set(male_name_dict)

#
# data preprocessing, very time-consuming

# Define a list of words representing females/males
female_names_words = female_words | female_name_dict
male_names_words = male_words | male_name_dict
remove_dict=(stop_words | female_names_words | male_names_words)
#from https://www.merriam-webster.com/thesaurus/

def preprocess_text_adj(text):
    # lower the words and remove punctuation
    text_token = word_tokenize(text)
    tagged = pos_tag(text_token)
    adjectives = [word for word, pos in tagged if (pos.startswith('JJ'))]
    text = [word.lower() for word in adjectives if word.isalpha()]
    # tokenize, replace words representing females with 'she' and males with 'he', and remove stopwords.
    return [word for word in text if word not in remove_dict]


documents_adj=[]
with open('data/plot_summaries.txt', 'r', encoding='utf-8') as file:
    for line in file:
        text = line
        documents_adj.append(preprocess_text_adj(text))
documents_adj = [word for sublist in documents_adj for word in sublist]

vocabulary=list(set(documents_adj))
word_counts = {word: documents_adj.count(word) for word in vocabulary}
word_count_df = pd.DataFrame(list(word_counts.items()), columns=['Word', 'Count'])
# only use the words appear at least twice
documents_adj=list(word_count_df[word_count_df['Count']>1]['Word'])

In [14]:
# remove some words in case of incorrect classification
lines = []

# file from https://www.cs.cmu.edu/afs/cs/project/ai-repository/ai/areas/nlp/corpora/names/
with open('data/female name.txt', 'r', encoding='utf-8') as file:
    for line in file:
        line=line.lower()
        lines.append(line.strip())
documents_adj = set(documents_adj)-set(lines)

lines = []
with open('data/male name.txt', 'r', encoding='utf-8') as file:
    for line in file:
        line=line.lower()
        lines.append(line.strip())
documents_adj = set(documents_adj)-set(lines)


lines = []
'''with open('data/names from movies.txt', 'r', encoding='utf-8') as file:
    for line in file:
        line=line.lower()
        lines.append(line.strip())
documents = set(documents)-(set(lines) & set(documents))'''

print(len(documents_adj))
other_common_words = set(['child', 'orphan', 'baby', 'girls', 'lover', 'mute','child', 'orphan', 'bride', 
                          'baby', 'girls', 'lover', 'mute', 'housekeeper', 'cousin', 'neighbour', 'parent', 
                          'sibling', 'pregnant', 'housewife', 'relative', 'servant', 'waif', 'grandson', 
                          'stranger', 'courtesan', 'servant','cousin', 'neighbour', 'parent', 'sibling', 
                          'housewife','nurse', 'stepsister', 'housekeeper','grandson','servant','grandchildren',
                          'relatives', 'uncles', 'orphans', 'childbirth', 'scoundrel', 'foreigner', 'stepfamily', 
                          'tuberculosis', 'mallaya', 'triplet', 'grandchildren', 'outcast', 'neighbours', 'huanhuan', 
                          'lakshmiammal', 'jaipal', 'himal', 'pasarian', 'leukemia'])
documents_adj = set(documents_adj)-set(other_common_words)

8671


In [15]:
# save data
with open('data/adjectives.txt', 'w', encoding='utf-8') as file:
    for item in documents_adj:
        file.write(item + '\n')

In [16]:
# load the data adjectives.txt (extracted from the text)
adjectives = []
with open('data/adjectives.txt', 'r', encoding='utf-8') as file:
    for line in file:
        adjectives.append(line.strip())

In [17]:
# train the word2vec model
model = Word2Vec(documents, vector_size=100, window=5, min_count=1, workers=4)

# save the model
model.save("word2vec.model")

In [18]:
# apply the model
model = Word2Vec.load("word2vec.model")
adjectives = [word for word in adjectives if word in model.wv.key_to_index]
adj_vectors = [model.wv[word] for word in adjectives]
vector_she = model.wv['she']
vector_he = model.wv['he']

In [19]:
# example: lovely
similarity = cosine_similarity(vector_she.reshape(1,-1), model.wv['lovely'].reshape(1,-1))
print(f'similarity between female and lovely: {similarity}')
similarity = cosine_similarity(vector_he.reshape(1,-1), model.wv['lovely'].reshape(1,-1))
print(f'similarity between male and lovely: {similarity}')

similarity between female and lovely: [[0.4278197]]
similarity between male and lovely: [[0.19654715]]


In [20]:
female_reference_vector = vector_she.reshape(1, -1)
female_similarities = [cosine_similarity(female_reference_vector, vec.reshape(1, -1))[0][0] for vec in adj_vectors]
male_reference_vector = vector_he.reshape(1, -1)
male_similarities = [cosine_similarity(male_reference_vector, vec.reshape(1, -1))[0][0] for vec in adj_vectors]

In [21]:
similaritie = pd.DataFrame({'Female':female_similarities,'Male':male_similarities})
similaritie.index=adjectives
print(similaritie)

             Female      Male
provided   0.066867  0.114790
fashioned  0.022361  0.073489
spiritual  0.026383 -0.067133
yeshwant   0.237445  0.232663
combined  -0.120892  0.019519
...             ...       ...
radical   -0.002280 -0.027109
isolated   0.196052  0.104083
warmest    0.073296  0.082389
severely   0.165978  0.255125
untoward   0.359431  0.253286

[8646 rows x 2 columns]


In [22]:
similarity_diff=similaritie['Female']-similaritie['Male']
top_100_female_adj = similarity_diff.sort_values(ascending=False).head(100).index
print(top_100_female_adj)
top_100_male_adj = similarity_diff.sort_values(ascending=True).head(100).index
print(top_100_male_adj)

Index(['loving', 'beautiful', 'somewhat', 'unhappy', 'lonely', 'shy',
       'neighbor', 'flighty', 'intimate', 'fiancé', 'passionate', 'deeply',
       'lovely', 'desire', 'intrigued', 'jealousy', 'caring', 'shared',
       'sweetheart', 'lifestyle', 'cheerful', 'unmarried', 'neglected', 'deaf',
       'charming', 'sexual', 'overprotective', 'domineering', 'promiscuous',
       'seduced', 'behaviour', 'precocious', 'affair', 'teenage', 'grown',
       'attracted', 'genuinely', 'aloof', 'twin', 'fiancée', 'insecure',
       'divorced', 'fond', 'babysat', 'nun', 'imbalanced', 'parentless',
       'disturbed', 'older', 'insensitive', 'rekindled', 'introverted',
       'neglectful', 'teenager', 'repressed', 'tormented', 'carefree',
       'dreams', 'overbearing', 'socialite', 'homosexual', 'smitten', 'timid',
       'marital', 'younger', 'widower', 'loved', 'emotional', 'autistic',
       'unattractive', 'excitement', 'uncomfortable', 'émilie', 'devoted',
       'depressed', 'pretty', 'af

In [23]:
import nltk
from nltk import pos_tag
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet

nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')

def get_adjectives(words):
    # Tokenize the words and perform part-of-speech tagging
    tagged_words = pos_tag(words)
    
    # Keep only adjectives
    adjectives = [word for word, pos in tagged_words if pos in ['JJ', 'JJR', 'JJS']]
    
    return adjectives

# Filter out non-adjectives
filtered_female_adj = get_adjectives(top_100_female_adj)
filtered_male_adj = get_adjectives(top_100_male_adj)

print("Female Adjectives after POS:", filtered_female_adj)
print("Male Adjectives after POS:", filtered_male_adj)

Female Adjectives after POS: ['beautiful', 'unhappy', 'shy', 'desire', 'jealousy', 'sweetheart', 'lifestyle', 'neglected', 'sexual', 'overprotective', 'promiscuous', 'precocious', 'aloof', 'fiancée', 'parentless', 'disturbed', 'older', 'introverted', 'neglectful', 'tormented', 'carefree', 'socialite', 'homosexual', 'timid', 'marital', 'younger', 'widower', 'emotional', 'autistic', 'unattractive', 'uncomfortable', 'depressed', 'affectionate', 'eldest', 'flirtatious', 'upbringing', 'paternal', 'bisexual', 'curious', 'unmanageable', 'tremulous', 'infatuated', 'platonic', 'comfortable', 'enigmatic', 'snobbish']
Male Adjectives after POS: ['tank', 'alert', 'armament', 'automatic', 'unloaded', 'fastest', 'federal', 'choppy', 'explosive', 'atomic', 'saradian', 'aerial', 'heavy', 'tactical', 'arsenal', 'ss', 'finish', 'net', 'barbed', 'maximum', 'trench', 'smokescreen', 'destroys', 'georgian', 'outlaws', 'general']


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/syldayang/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to /Users/syldayang/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [24]:
#use textblob to do sentimental analysis

!pip install textblob

from textblob import download_corpora
download_corpora.main()

Finished.


[nltk_data] Downloading package brown to /Users/syldayang/nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package punkt to /Users/syldayang/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/syldayang/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/syldayang/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package conll2000 to
[nltk_data]     /Users/syldayang/nltk_data...
[nltk_data]   Package conll2000 is already up-to-date!
[nltk_data] Downloading package movie_reviews to
[nltk_data]     /Users/syldayang/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!


In [25]:
from textblob import TextBlob

def analyze_sentiments(adjective_list):
    sentiments = {}
    for adj in adjective_list:
        blob = TextBlob(adj)
        sentiments[adj] = blob.sentiment.polarity
    return sentiments

# Perform sentiment analysis on the adjectives
top_100_female_sentiments = analyze_sentiments(top_100_female_adj)
top_100_male_sentiments = analyze_sentiments(top_100_male_adj)

print("Sentiments for female adjectives:")
for adj, score in top_100female_sentiments.items():
    print(f"{adj}: {score}")

print("\nSentiments for male adjectives:")
for adj, score in top_100_male_sentiments.items():
    print(f"{adj}: {score}")

Sentiments for female adjectives:
beautiful: 0.85
unhappy: -0.6
shy: -0.5
desire: 0.0
jealousy: 0.0
sweetheart: 0.0
lifestyle: 0.0
neglected: 0.0
sexual: 0.5
overprotective: 0.0
promiscuous: 0.0
precocious: 0.0
aloof: 0.0
fiancée: 0.0
parentless: 0.0
disturbed: 0.0
older: 0.16666666666666666
introverted: 0.0
neglectful: 0.0
tormented: 0.0
carefree: 0.0
socialite: 0.0
homosexual: 0.0
timid: 0.0
marital: 0.0
younger: 0.0
widower: 0.0
emotional: 0.0
autistic: -0.2
unattractive: 0.0
uncomfortable: -0.5
depressed: 0.0
affectionate: 0.0
eldest: 0.0
flirtatious: 0.0
upbringing: 0.0
paternal: 0.0
bisexual: 0.0
curious: -0.1
unmanageable: 0.0
tremulous: 0.0
infatuated: -0.2
platonic: 0.0
comfortable: 0.4
enigmatic: 0.1
snobbish: 0.0

Sentiments for male adjectives:
tank: 0.0
alert: 0.0
armament: 0.0
automatic: 0.0
unloaded: 0.0
fastest: 0.0
federal: 0.0
choppy: -0.2
explosive: 0.0
atomic: 0.0
saradian: 0.0
aerial: 0.0
heavy: -0.2
tactical: 0.0
arsenal: 0.0
ss: 0.0
finish: 0.0
net: 0.0
barbed: 0

In [26]:
def calculate_average_polarity(sentiments):
    # Filter out the zero scores
    non_zero_sentiments = [score for score in sentiments.values() if score != 0]
    
    # Calculate the average polarity if there are non-zero sentiment scores
    if non_zero_sentiments:
        average_polarity = sum(non_zero_sentiments) / len(non_zero_sentiments)
    else:
        average_polarity = 0
    
    return average_polarity

# Calculate the average sentiment polarity for female and male adjectives
avg_polarity_female = calculate_average_polarity(female_sentiments)
avg_polarity_male = calculate_average_polarity(male_sentiments)

print(f"Average sentiment polarity for female adjectives: {avg_polarity_female}")
print(f"Average sentiment polarity for male adjectives: {avg_polarity_male}")

Average sentiment polarity for female adjectives: -0.007575757575757586
Average sentiment polarity for male adjectives: -0.11666666666666665


In [27]:
#use BERT to do sentimental analysis

!pip install transformers
!pip install xformer



In [28]:
from transformers import pipeline

sentiment_pipeline = pipeline("sentiment-analysis")


female_sentiments = sentiment_pipeline(filtered_female_adj)
male_sentiments = sentiment_pipeline(filtered_male_adj)

print("Female sentiments:", female_sentiments)
print("Male sentiments:", male_sentiments)

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


Female sentiments: [{'label': 'POSITIVE', 'score': 0.9998807907104492}, {'label': 'NEGATIVE', 'score': 0.999789297580719}, {'label': 'NEGATIVE', 'score': 0.9961496591567993}, {'label': 'POSITIVE', 'score': 0.9836276769638062}, {'label': 'NEGATIVE', 'score': 0.9980547428131104}, {'label': 'POSITIVE', 'score': 0.999849796295166}, {'label': 'POSITIVE', 'score': 0.9985740184783936}, {'label': 'NEGATIVE', 'score': 0.9997264742851257}, {'label': 'POSITIVE', 'score': 0.987690806388855}, {'label': 'NEGATIVE', 'score': 0.9992198944091797}, {'label': 'NEGATIVE', 'score': 0.9821943044662476}, {'label': 'NEGATIVE', 'score': 0.9885442852973938}, {'label': 'NEGATIVE', 'score': 0.9984350800514221}, {'label': 'POSITIVE', 'score': 0.997830331325531}, {'label': 'NEGATIVE', 'score': 0.9885744452476501}, {'label': 'NEGATIVE', 'score': 0.9994720816612244}, {'label': 'NEGATIVE', 'score': 0.9919120073318481}, {'label': 'NEGATIVE', 'score': 0.9969197511672974}, {'label': 'NEGATIVE', 'score': 0.999451220035553

In [29]:
def sentiment_proportions(sentiments):
    # Count the number of positive and negative sentiments
    positive = sum(1 for sentiment in sentiments if sentiment['label'] == 'POSITIVE')
    negative = sum(1 for sentiment in sentiments if sentiment['label'] == 'NEGATIVE')
    total = positive + negative
    # Calculate the proportions
    positive_proportion = positive / total
    negative_proportion = negative / total
    return positive_proportion, negative_proportion

female_positive, female_negative = sentiment_proportions(female_sentiments)
male_positive, male_negative = sentiment_proportions(male_sentiments)

print(f"Female Positive: {female_positive:.2f}, Female Negative: {female_negative:.2f}")
print(f"Male Positive: {male_positive:.2f}, Male Negative: {male_negative:.2f}")

Female Positive: 0.43, Female Negative: 0.57
Male Positive: 0.65, Male Negative: 0.35


The significantly higher proportion of negative sentiments in female-associated adjectives and a correspondingly lower proportion of negative sentiments in male-associated adjectives suggest the presence of gender bias in the dataset or in societal perceptions.

These results may reflect existing gender stereotypes in society. Language often mirrors societal attitudes, and this could be a manifestation of underlying biases.

In [30]:
def average_sentiment_score(sentiments):
    # Calculate the average score
    scores = [sentiment['score'] for sentiment in sentiments]
    average_score = sum(scores) / len(scores)
    return average_score

female_avg_score = average_sentiment_score(female_sentiments)
male_avg_score = average_sentiment_score(male_sentiments)

print(f"Average sentiment score for female adjectives: {female_avg_score:.2f}")
print(f"Average sentiment score for male adjectives: {male_avg_score:.2f}")

Average sentiment score for female adjectives: 0.99
Average sentiment score for male adjectives: 0.98


This suggests that even though female adjectives have more negative sentiments, their average sentiment score is high. This could mean that the positive sentiments are very strong. For male adjectives, there's a higher proportion of positive sentiments, but the average score is slightly lower.

We may ckeck in a bigger dataset, not only the top 100 to analyze the question in a broader way. (same methods

In [31]:
# Words more related to females, which are words where the value in similarity_diff is positive, 
female_related_words = similarity_diff[similarity_diff > 0].sort_values(ascending=False).index.tolist()

# Words more related to males, which are words where the value in similarity_diff is negative, 
male_related_words = similarity_diff[similarity_diff < 0].sort_values(ascending=True).index.tolist()

In [32]:
filtered_female_adj_all = get_adjectives(female_related_words)
filtered_male_adj_all = get_adjectives(male_related_words)

In [33]:
female_all_sentiments = sentiment_pipeline(filtered_female_adj_all)
male_all_sentiments = sentiment_pipeline(filtered_male_adj_all)

print("All female sentiments:", female_all_sentiments)
print("All male sentiments:", male_all_sentiments)

All female sentiments: [{'label': 'POSITIVE', 'score': 0.9998807907104492}, {'label': 'NEGATIVE', 'score': 0.999789297580719}, {'label': 'NEGATIVE', 'score': 0.9961496591567993}, {'label': 'POSITIVE', 'score': 0.9836276769638062}, {'label': 'NEGATIVE', 'score': 0.9980547428131104}, {'label': 'POSITIVE', 'score': 0.999849796295166}, {'label': 'POSITIVE', 'score': 0.9985740184783936}, {'label': 'NEGATIVE', 'score': 0.9997264742851257}, {'label': 'POSITIVE', 'score': 0.987690806388855}, {'label': 'NEGATIVE', 'score': 0.9992198944091797}, {'label': 'NEGATIVE', 'score': 0.9821943044662476}, {'label': 'NEGATIVE', 'score': 0.9885442852973938}, {'label': 'NEGATIVE', 'score': 0.9984350800514221}, {'label': 'POSITIVE', 'score': 0.997830331325531}, {'label': 'NEGATIVE', 'score': 0.9885744452476501}, {'label': 'NEGATIVE', 'score': 0.9994720816612244}, {'label': 'NEGATIVE', 'score': 0.9919120073318481}, {'label': 'NEGATIVE', 'score': 0.9969197511672974}, {'label': 'NEGATIVE', 'score': 0.99945122003

In [34]:
female_all_positive, female_all_negative = sentiment_proportions(female_all_sentiments)
male_all_positive, male_all_negative = sentiment_proportions(male_all_sentiments)

print(f"All Female Positive: {female_all_positive:.2f}, All Female Negative: {female_all_negative:.2f}")
print(f"All Male Positive: {male_all_positive:.2f}, All Male Negative: {male_all_negative:.2f}")

All Female Positive: 0.50, All Female Negative: 0.50
All Male Positive: 0.55, All Male Negative: 0.45


In [35]:
female_all_avg_score = average_sentiment_score(female_all_sentiments)
male_all_avg_score = average_sentiment_score(male_all_sentiments)

print(f"Average sentiment score for all female adjectives: {female_all_avg_score:.2f}")
print(f"Average sentiment score for all male adjectives: {male_all_avg_score:.2f}")

Average sentiment score for all female adjectives: 0.96
Average sentiment score for all male adjectives: 0.96
