## Co-Occurrence Analysis

The following code finds the most frequently occurring mentions of two different entity types within a sentence in the corpus.

### Loading the Corpus

In [1]:
# Importing the required modules
import pickle

# Loading the list of doc entities
with open("./data/descriptions_docs.pkl", "rb") as descriptions_docs:
    docs = pickle.load(descriptions_docs)

### Social Relations - Material Entities

In [None]:
# Importing required libraries
from collections import Counter

# Counter to store co-occurrence frequencies
co_occurrence_counter = Counter()

# Lists of material entities and social relations (manually created)
material_entities = ['bullet', 'smoke', 'dumbbell', 'lawn', 'gun', 'throne', 'fibre', 'bed', 'pānchagavyam', 'bodice', 'kamba', 'matchlock', 'juice', 'hedge', 'soot', 'chank', 'bedi', 'beef', 'dūli', 'gate', 'shadow', 'gurigi', 'upparige', 'mound', 'wall', 'ceiling', 'fence', 'pile', 'sirdachi', 'bali', 'axe', 'pūkāre', 'switch', 'clod', 'podi', 'pudding', 'dew', 'banner', 'club', 'garb', 'cymbal', 'stave', 'nishāni', 'track', 'mask', 'hoof', 'trouser', 'doll', 'slush', 'paddock', 'gallow', 'whip', 'rein', 'reflection', 'yoke', 'bridle', 'stool', 'board', 'stake', 'prize', 'weapon', 'feather', 'fan', 'towel', 'spurs', 'football', 'pillar', 'manuscript', 'patent', 'ship', 'cargo', 'vest', 'share', 'oven', 'shield', 'wax', 'boat', 'drawers', 'pestle', 'kitti', 'coral', 'bowl', 'bracelet', 'collyrium', 'bottu', 'dhornam', 'bran', 'chaplet', 'dagger', 'shoe', 'canopy', 'fenugreek', 'curds', 'condiment', 'tamarind', 'pepper', 'māla', 'biruthu', 'ladle', 'powder', 'braid', 'utensil', 'lathe', 'railway', 'robe', 'bow', 'air', 'cross', 'dandam', 'tong', 'stamp', 'sālagrāma', 'pipe', 'ganja', 'hemp', 'opium', 'beard', 'nāmam', 'vōli', 'gundam', 'pool', 'barrier', 'ināms', 'rent', 'tent', 'peg', 'trident', 'straw', 'pith', 'crown', 'expense', 'timber', 'saw', 'table', 'rung', 'ladder', 'kistvaen', 'site', 'bundle', 'truffle', 'spade', 'fund', 'border', 'trimming', 'frock', 'gown', 'blouse', 'jacket', 'velvet', 'tweed', 'skirt', 'cream', 'satin', 'chair', 'mattress', 'pillow', 'mortar', 'patch', 'prong', 'heap', 'bar', 'puddle', 'necklace', 'gunny', 'file', 'rear', 'catalogue', 'phonograph', 'trapping', 'bit', 'wire', 'roll', 'lobe', 'hatchet', 'flute', 'sickle', 'petticoat', 'chick pea', 'dhupati', 'crowbar', 'tobacco', 'horn', 'streamer', 'chintz', 'orange', 'storey', 'pole', 'screen', 'cavity', 'ārathi', 'bill-hook', 'sugar', 'jaggery', 'costume', 'hook', 'bread', 'wine', 'clothing', 'spear', 'edge', 'crevice', 'coin', 'whey', 'cake', 'wick', 'castor', 'hole', 'plait', 'ware', 'handkerchief', 'cord', 'flint', 'socket', 'bough', 'ember', 'hearth', 'rain', 'tray', 'strip', 'nīsal', 'dhubati', 'kachche', 'tenai', 'sāmai', 'tovarai', 'fragment', 'sheaf', 'elemukkuththi', 'deposit', 'furrow', 'urine', 'camphor', 'tāli', 'badge', 'grain', 'pack', 'comb', 'loom', 'bag', 'food', 'platter', 'liquor', 'stimulant', 'cup', 'metal', 'spirit', 'thread', 'jewellery', 'umbrella', 'gift', 'money', 'fanam', 'panam', 'earth', 'water', 'fire', 'pot', 'ground', 'corpse', 'dung', 'cloth', 'jewel', 'plank', 'string', 'garland', 'shell', 'instrument', 'feast', 'planet', 'asterism', 'moon', 'asterisms', 'rajju', 'idol', 'letter', 'patrika', 'pariyam', 'clay', 'paste', 'oil', 'rice', 'lamp', 'wind', 'wealth', 'stone', 'roller', 'pan', 'milk', 'post', 'pāl', 'stick', 'ring', 'cloth', 'plate', 'gold', 'silver', 'seat', 'ball', 'ashtamangalam', 'vessel', 'mirror', 'ankusam', 'flapper', 'flag', 'drum', 'star', 'bier', 'dead', 'palanquin', 'vegetable', 'cultivation', 'turban', 'fee', 'torch', 'object', 'car', 'sword', 'attire', 'bayonet', 'bangle', 'cart', 'salt', 'flesh', 'picture', 'rope', 'bristle', 'hair', 'screw', 'diaphragm', 'plough', 'crop', 'brass', 'arrow', 'vehicle', 'stock', 'anna', 'mutton', 'pork', 'alcohol', 'fish', 'net', 'pearl', 'bead', 'meal', 'floor', 'wood', 'property', 'medicine', 'text', 'butter', 'dish', 'silk', 'egg', 'pyre', 'cinder', 'drink', 'payment', 'fine', 'tax', 'sadābāth', 'dresser', 'knapsack', 'drug', 'knife', 'carbon', 'razor', 'box', 'pill', 'land', 'mortgage', 'marakkāls', 'paddy', 'glass', 'tweezer', 'soap', 'edict', 'ornament', 'dress', 'fuel', 'jewelry', 'poison', 'nectar', 'meat', 'door', 'gong', 'uppu', 'alm', 'ivory', 'blanket', 'vase', 'purāna', 'poem', 'linga', 'lingam', 'lock', 'key', 'basket', 'image', 'saffron', 'turmeric', 'mark', 'cotton', 'pumpkin', 'ghee', 'honey', 'disc', 'slab', 'sack', 'mud', 'nut', 'chaudike', 'rod', 'book', 'bell', 'copper', 'lead', 'tin', 'iron', 'load', 'steel', 'ashti', 'bone', 'toddy', 'pāni', 'āsu', 'indigo', 'fabric', 'tea', 'coffee', 'wage', 'newspaper', 'debt', 'barley', 'coat', 'pay', 'income', 'rāgi', 'piece', 'coffin', 'rupee', 'curry', 'stuff', 'pie', 'fruit', 'carcase', 'cromlech', 'incense', 'taxis', 'cromlechs', 'pit', 'platform', 'mandaikallu', 'art', 'musk', 'roof', 'store', 'boulder', 'brick', 'mandhe', 'litter', 'manure', 'rock', 'soil', 'surface', 'emblem', 'herd', 'wilderness', 'wreath', 'thorn', 'wheat', 'beer', 'ganji', 'onion', 'garlic', 'potato', 'cash', 'capital', 'flour', 'harvest', 'dowry', 'faggot', 'revenue', 'hoe', 'implement', 'ratchai', 'cot', 'heirloom', 'sandal', 'cane', 'ash', 'ashes', 'produce', 'cap', 'device', 'necklet', 'armlet', 'tattoo', 'pigment', 'impression', 'needle', 'yam', 'charcoal', 'cicatrix', 'keloid', 'wool', 'mustard', 'gudu', 'dairy', 'garment', 'wound', 'bolt', 'honē', 'brim', 'fluid', 'mat', 'pulp', 'photograph', 'product', 'tank', 'bund', 'embankment', 'pulse', 'perfume', 'lingas', 'lime']
social_relations = ['prey', 'sportsman', 'organiser', 'wrestler', 'highwayman', 'heir', 'author', 'ejamān', 'herdsman', 'constable', 'taliāri', 'panchayatdar', 'defaulter', 'agriculturist', 'police', 'housebreaker', 'historian', 'thief', 'marksman', 'dhiyani', 'sweeper', 'grandmother', 'convict', 'nomad', 'soothsayer', 'bachelor', 'budu', 'accuser', 'giant', 'leveller', 'landowner', 'youngster', 'outsider', 'manager', 'mukhtesar', 'forger', 'scoundrel', 'makkala', 'aliya', 'housewife', 'slayer', 'warrior', 'laird', 'squire', 'artisan', 'prince', 'treasurer', 'cartman', 'lender', 'sowcar','maker', 'cane-splitter', 'orphan', 'custodian', 'invader', 'butcher', 'consort', 'swāmis', 'coolie', 'baker', 'butler', 'inspector', 'runner', 'hawker', 'grandson', 'governor', 'monk', 'nun', 'abstainer', 'gōpi', 'disciple', 'teacher', 'follower', 'trader', 'zamindar', 'overlord', 'expert', 'zemindar', 'officer', 'complainant', 'architect', 'blacksmith', 'conqueror', 'hillman', 'steward', 'dwarf', 'scapegoat', 'performer', 'bearer', 'tribesman', 'grandfather', 'babe', 'grandparent', 'informant', 'guardian', 'widower', 'cousin', 'winner', 'abetter', 'abductor', 'correspondent', 'suitor', 'virgin', 'partner', 'adult', 'infant', 'neighbour', 'betrothed', 'thrower', 'patriarch', 'celebrant', 'traveller', 'devotee', 'baby', 'novice', 'weaver', 'father', 'minister', 'barber', 'attendant', 'son', 'mendicant', 'munsiff', 'headman', 'bride', 'sage', 'servant', 'priest', 'goddess', 'maid', 'singer', 'reader', 'reciter', 'slave', 'vassal', 'patron', 'client', 'patient', 'cultivator', 'descendant', 'immigrant', 'brother', 'householder', 'landholder', 'king', 'wife', 'husband', 'master', 'child', 'ancestor', 'ruler', 'offspring', 'mother', 'sister', 'bridegroom', 'astrologer', 'enemy', 'purōhit', 'friend', 'bridesmaid', 'seller', 'banker', 'usurer', 'washerman', 'ryot', 'guru', 'preceptor', 'parent', 'messenger', 'washerwoman', 'uncle', 'cowherd', 'clergyman', 'scholar', 'forefather', 'soldier', 'commander', 'daughter', 'widow', 'driver', 'worker', 'eater', 'labourer', 'artist', 'albino', 'leper', 'fisherman', 'merchant', 'kempuava', 'saint', 'watchman', 'hunter', 'peon', 'mason', 'kavalgār', 'elder', 'writer', 'resident', 'surgeon', 'bystander', 'midwife', 'giver', 'pūjāri', 'spectator', 'pauper', 'chief', 'pilgrim', 'employer', 'shaver', 'cutter', 'physician', 'operator', 'accoucheuse', 'matron', 'customer', 'chatter-box', 'negotiator', 'potter', 'villager', 'official', 'carpenter', 'doctor', 'engineer', 'nobleman', 'sorcerer', 'chieftain', 'guest', 'niece', 'nephew', 'spouse', 'karta', 'mourner', 'beggar', 'bricklayer', 'fief', 'farmer', 'aunt', 'tradesman', 'founder', 'precursor', 'owner', 'victim', 'poet', 'progenitor', 'keeper', 'husbandmen', 'agnate', 'prostitute', 'dancer', 'tutor', 'dyer', 'clerk', 'artizan', 'toddy-drawer', 'rishi', 'dāsis', 'schoolmaster', 'contractor', 'painter', 'sawyer', 'tailor', 'gardener', 'guard', 'scavenger', 'pupil', 'judge', 'planter', 'idler', 'lawyer', 'adversary', 'witness', 'colonist', 'inhabitant', 'landlord', 'native', 'jumper', 'accountant', 'leader', 'stranger', 'cooly', 'colleague', 'president', 'delinquent', 'student', 'goldsmith', 'convert', 'onlooker', 'palol', 'necromancer', 'inmate', 'diviner', 'celibate', 'assistant', 'successor', 'donor', 'candidate']

# Creating a table of co-occurrences and frequencies
for doc in docs:
    for sent in doc.sents:

        # Extracting tokens (words) from the sentence
        tokens = [token.text for token in sent]

        # Check for co-occurrence of words from list-A and list-B
        for word1 in material_entities:
            for word2 in social_relations:
                if word1 in tokens and word2 in tokens:
                    # Increment the co-occurrence counter
                    co_occurrence_counter[(word1, word2)] += 1

# Print co-occurrence pairs and their frequencies
for (word1, word2), frequency in co_occurrence_counter.items():
    print(f"Co-occurrence found: {word1} and {word2}, Frequency: {frequency}")

In [3]:
# Saving the co-occurrences to a CSV

# Importing the required libraries
import csv

# Sorting co-occurrence frequencies in descending order
sorted_co_occurrences = sorted(co_occurrence_counter.items(), key=lambda x: x[1], reverse=True)

# Saving to CSV file
csv_filename = "./data/social_material_co_occurrences.csv"
with open(csv_filename, "w", newline="", encoding="utf-8") as csvfile:
    csv_writer = csv.writer(csvfile)
    # Write header
    csv_writer.writerow(["Word1", "Word2", "Frequency"])
    # Write data
    csv_writer.writerows([(word1, word2, frequency) for (word1, word2), frequency in sorted_co_occurrences])

print(f"Co-occurrence frequencies saved to {csv_filename}")

Co-occurrence frequencies saved to ./data/social_material_co_occurrences.csv
