# Step 1: Install Necessary Libraries

In [1]:
!pip install -r requirements.txt


Collecting spacy (from -r requirements.txt (line 9))
  Downloading spacy-3.8.2-cp312-cp312-win_amd64.whl.metadata (27 kB)
Collecting spacy-legacy<3.1.0,>=3.0.11 (from spacy->-r requirements.txt (line 9))
  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0 (from spacy->-r requirements.txt (line 9))
  Downloading spacy_loggers-1.0.5-py3-none-any.whl.metadata (23 kB)
Collecting murmurhash<1.1.0,>=0.28.0 (from spacy->-r requirements.txt (line 9))
  Downloading murmurhash-1.0.10-cp312-cp312-win_amd64.whl.metadata (2.0 kB)
Collecting cymem<2.1.0,>=2.0.2 (from spacy->-r requirements.txt (line 9))
  Downloading cymem-2.0.8-cp312-cp312-win_amd64.whl.metadata (8.6 kB)
Collecting preshed<3.1.0,>=3.0.2 (from spacy->-r requirements.txt (line 9))
  Downloading preshed-3.0.9-cp312-cp312-win_amd64.whl.metadata (2.2 kB)
Collecting thinc<8.4.0,>=8.3.0 (from spacy->-r requirements.txt (line 9))
  Downloading thinc-8.3.2-cp312-cp312-win_amd64.whl

# Step 2: Import Libraries

In [2]:
import pandas as pd        
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import spacy
import datetime as dt 
import os
import glob 
import re


# Step 3: Load the Text File

In [4]:
# Load the text file
with open('key_events_20th_century.txt', 'r') as file:
    text = file.read()


# Step 4: Evaluate and Clean the Text


In [5]:
print(text[:1000])  # Display the first 1000 characters for a quick check

Jump to content
Main menu
Search
Donate
Appearance
Create account
Log in
Personal tools
Key Events of the 20th Century
Add languages
Article
Talk
Tools
From Wikipedia, the free encyclopedia
Look for Key Events of the 20th Century on one of Wikipedia's sister projects:
Wiktionary (dictionary)
Wikibooks (textbooks)
Wikiquote (quotations)
Wikisource (library)
Wikiversity (learning resources)
Commons (media)
Wikivoyage (travel guide)
Wikinews (news source)
Wikidata (linked database)
Wikispecies (species directory)
Wikipedia does not have an article with this exact name. Please search for Key Events of the 20th Century in Wikipedia to check for alternative titles or spellings.
You need to log in or create an account and be autoconfirmed to create new articles. Alternatively, you can use the article wizard to submit a draft for review, or request a new article.
Search for "Key Events of the 20th Century" in existing articles.
Look for pages within Wikipedia that link to this title.
Other rea

In [6]:
# Check Country Names:
country_list = ["United States", "Canada", "United Kingdom", "France", "Germany"]


In [7]:
#Text Cleaning:
# Example: Replace unusual characters
text = text.replace("—", "-").replace("‘", "'").replace("’", "'")


In [8]:
# Save the cleaned text (if modified):
with open('twentieth-century-cleaned.txt', 'w') as file:
    file.write(text)

# Step 5: Create a Named Entity Recognition (NER) Object

In [10]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
      --------------------------------------- 0.3/12.8 MB ? eta -:--:--
     - -------------------------------------- 0.5/12.8 MB 1.9 MB/s eta 0:00:07
     --- ------------------------------------ 1.0/12.8 MB 1.9 MB/s eta 0:00:07
     ---- ----------------------------------- 1.3/12.8 MB 1.8 MB/s eta 0:00:07
     ----- ---------------------------------- 1.8/12.8 MB 1.8 MB/s eta 0:00:07
     ------ --------------------------------- 2.1/12.8 MB 1.8 MB/s eta 0:00:07
     ------- -------------------------------- 2.4/12.8 MB 1.8 MB/s eta 0:00:06
     --------- ------------------------------ 2.9/12.8 MB 1.7 MB/s eta 0:00:06
     --------- ------------------------------ 3.1/12.8 MB 1.8 MB/s eta 0:00:06
     ---------- ----------------------------- 3.

In [11]:
# Load SpaCy's English NER model
nlp = spacy.load("en_core_web_sm")

# Create the NER object
doc = nlp(text)

# Step 6: Split Sentence Entities from the NER Object

In [12]:
# List to store sentences and entities
sentences_entities = []

# Iterate through sentences in the NER document
for sentence in doc.sents:
    # Extract entities in each sentence
    entities = [(ent.text, ent.label_) for ent in sentence.ents]
    sentences_entities.append((sentence.text, entities))
    
# Display the first few sentences with their entities
sentences_entities[:5]

[("Jump to content\nMain menu\nSearch\nDonate\nAppearance\nCreate account\nLog in\nPersonal tools\nKey Events of the 20th Century\nAdd languages\nArticle\nTalk\nTools\nFrom Wikipedia, the free encyclopedia\nLook for Key Events of the 20th Century on one of Wikipedia's sister projects:\n",
  [('the 20th Century\nAdd', 'DATE'),
   ('Wikipedia', 'ORG'),
   ('Look for Key Events of', 'WORK_OF_ART'),
   ('the 20th Century', 'DATE'),
   ('one', 'CARDINAL'),
   ('Wikipedia', 'ORG')]),
 ('Wiktionary (dictionary)\nWikibooks (textbooks)\nWikiquote (quotations)\nWikisource (library)\nWikiversity (learning resources)\nCommons (media)\nWikivoyage (travel guide)\nWikinews (news source)\nWikidata (linked database)\nWikispecies (species directory)\nWikipedia does not have an article with this exact name.',
  [('Wikivoyage', 'PERSON'), ('Wikipedia', 'ORG')]),
 ('Please search for Key Events of the 20th Century in Wikipedia to check for alternative titles or spellings.\n',
  [('Key Events', 'ORG'), ('th

# Step 7: Filter Entities for Country Names

In [13]:
# Filter sentences to keep only entities that are in the country list
filtered_entities = [
    (sentence, [(entity, label) for entity, label in entities if entity in country_list and label == 'GPE'])
    for sentence, entities in sentences_entities
]

# Remove sentences with no relevant entities
filtered_entities = [(sentence, entities) for sentence, entities in filtered_entities if entities]

# Display filtered results
filtered_entities[:5]

[]

# Step 8: Create the Relationships DataFrame

In [14]:
# Create DataFrame for relationships
relationships = []

# Process the filtered sentences and entities
for sentence, entities in filtered_entities:
    if len(entities) > 1:  # Ensure there are multiple countries in the sentence
        for i in range(len(entities) - 1):
            for j in range(i + 1, len(entities)):
                relationships.append({
                    'Sentence': sentence,
                    'Country1': entities[i][0],
                    'Country2': entities[j][0]
                })

# Convert to DataFrame
relationships_df = pd.DataFrame(relationships)

# Show the first few rows
relationships_df.head()


# Step 9: Save and Export the DataFrame

In [15]:
# Save DataFrame to CSV
relationships_df.to_csv('country_relationships.csv', index=False)