# Introduction

The topic difference between genders is calculated by cosine similarity. To keep coherency, the model all-MiniLM-L12-v2 is utilized again to generate embeddings.

In [1]:
from sentence_transformers import SentenceTransformer
import numpy as np
from scipy.spatial.distance import cosine

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
embedding_model = SentenceTransformer('../models/all-MiniLM-L12-v2')

def calculate_topic_difference(topic_set1, topic_set2):

	# protect nulls
	if not topic_set1 or not topic_set2:
        	return 1.0, 0.0

	text1 = ' '.join(topic_set1)
	text2 = ' '.join(topic_set2)
	vec1 = embedding_model.encode(text1,device='cpu')
	vec2 = embedding_model.encode(text2,device='cpu')
    
	similarity = 1 - cosine(vec1, vec2)
	difference = 1 - similarity
  
	return round(difference, 4), round(similarity, 4)

In [8]:
# ir_al
topic1 = ["Peace", "International Agreements", "Public Safety", "Public Services","Welfare"]
topic2 = ["Public Service", "International Leadership", "National Politics"]

diff_ir_al, sim_ir_al = calculate_topic_difference(topic1, topic2)
print(f"ir_al：sim={sim_ir_al}，diff={diff_ir_al}")

ir_al：sim=0.48649999499320984，diff=0.5134999752044678


In [9]:
# ir_ms
topic3 = ["Government", "Media", "Asylum","Immigration","Accommodation", "Child Protection","Religion"]
topic4 = ["Economy","Social Policy", "Elections","Party Politics", "Israel–Gaza Conflict","International Relations"]

diff_ir_ms, sim_ir_ms = calculate_topic_difference(topic3, topic4)
print(f"ir_ms：sim={sim_ir_ms}，diff={diff_ir_ms}")

ir_ms：sim=0.24160000681877136，diff=0.758400022983551


In [11]:
# np_al
topic5 = ["National Elections", "Parliamentary Leadership", "Government Appointments","Parliamentary Nominations"]
topic6 = ["Political Parties","Law, Foreign Affairs", "Party Leadership", "Conventions"]

diff_np_al, sim_np_al = calculate_topic_difference(topic5, topic6)
print(f"np_al：sim={sim_np_al}，diff={diff_np_al}")

np_al：sim=0.42080000042915344，diff=0.579200029373169


In [12]:
# np_ms
topic7 = ["Party Leadership","Internal Elections", "Climate Policy", "Parliamentary Delegations", "Leadership"]
topic8 = ["Political Parties","Law","Foreign Affairs Leadership", "Elections","Political Leadership"]

diff_np_ms, sim_np_ms = calculate_topic_difference(topic7, topic8)
print(f"np_ms：sim={sim_np_ms}，diff={diff_np_ms}")

np_ms：sim=0.7095999717712402，diff=0.2903999984264374
