In [2]:
import spacy

In [20]:
script_text = "Alex had always dreamed of reaching the top of Eagle's Peak, a mountain so tall many said it can't be climbed without years of training. But Alex wasn't an expert. Just a person with a dream and a backpack full of hope. The first steps were easy. The path was clear. But soon the trail grew steeper. Rocks blocked the way. The wind howled. Doubt crept in."

print(script_text)

Alex had always dreamed of reaching the top of Eagle's Peak, a mountain so tall many said it can't be climbed without years of training. But Alex wasn't an expert. Just a person with a dream and a backpack full of hope. The first steps were easy. The path was clear. But soon the trail grew steeper. Rocks blocked the way. The wind howled. Doubt crept in.


In [21]:
nlp= spacy.load('en_core_web_sm')

In [23]:
# Process it with spaCy
doc = nlp(script_text)

In [24]:
# Tokenization, stop word removal, and lemmatization
tokens = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
print("Processed Tokens:", tokens)

Processed Tokens: ['Alex', 'dream', 'reach', 'Eagle', 'Peak', 'mountain', 'tall', 'say', 'climb', 'year', 'training', 'Alex', 'expert', 'person', 'dream', 'backpack', 'hope', 'step', 'easy', 'path', 'clear', 'soon', 'trail', 'grow', 'steep', 'rock', 'block', 'way', 'wind', 'howl', 'Doubt', 'creep']


In [28]:
# Iterate over detected sentences
for sent in doc.sents:
    print("-",sent.text)

- Alex had always dreamed of reaching the top of Eagle's Peak, a mountain so tall many said it can't be climbed without years of training.
- But Alex wasn't an expert.
- Just a person with a dream and a backpack full of hope.
- The first steps were easy.
- The path was clear.
- But soon the trail grew steeper.
- Rocks blocked the way.
- The wind howled.
- Doubt crept in.


In [29]:
# Print named entities with labels
for ent in doc.ents:
    print(ent.text, "→", ent.label_)

Alex → PERSON
Eagle → ORG
years → DATE
Alex → PERSON
first → ORDINAL


In [30]:
# Print tokens with POS and fine-grained tags
for token in doc:
    print(f"{token.text:10} {token.pos_:10} {token.tag_:6} {spacy.explain(token.tag_)}")

Alex       PROPN      NNP    noun, proper singular
had        AUX        VBD    verb, past tense
always     ADV        RB     adverb
dreamed    VERB       VBN    verb, past participle
of         ADP        IN     conjunction, subordinating or preposition
reaching   VERB       VBG    verb, gerund or present participle
the        DET        DT     determiner
top        NOUN       NN     noun, singular or mass
of         ADP        IN     conjunction, subordinating or preposition
Eagle      PROPN      NNP    noun, proper singular
's         PART       POS    possessive ending
Peak       PROPN      NNP    noun, proper singular
,          PUNCT      ,      punctuation mark, comma
a          DET        DT     determiner
mountain   NOUN       NN     noun, singular or mass
so         ADV        RB     adverb
tall       ADJ        JJ     adjective (English), other noun-modifier (Chinese)
many       ADJ        JJ     adjective (English), other noun-modifier (Chinese)
said       VERB       VBD   

In [31]:
# Print token dependencies
for token in doc:
    print(f"{token.text:10} {token.dep_:15} → Head: {token.head.text}")

Alex       nsubj           → Head: dreamed
had        aux             → Head: dreamed
always     advmod          → Head: dreamed
dreamed    ROOT            → Head: dreamed
of         prep            → Head: dreamed
reaching   pcomp           → Head: of
the        det             → Head: top
top        dobj            → Head: reaching
of         prep            → Head: top
Eagle      poss            → Head: Peak
's         case            → Head: Eagle
Peak       pobj            → Head: of
,          punct           → Head: Peak
a          det             → Head: mountain
mountain   nsubj           → Head: said
so         advmod          → Head: tall
tall       amod            → Head: many
many       nsubj           → Head: said
said       conj            → Head: dreamed
it         nsubjpass       → Head: climbed
ca         aux             → Head: climbed
n't        neg             → Head: climbed
be         auxpass         → Head: climbed
climbed    ccomp           → Head: said
without

In [40]:
# Extract sentences and entities/actions
structured_script = []

for sent in doc.sents:
    entry = {
        "sentence": sent.text,
        "characters": [ent.text for ent in sent.ents if ent.label_ == "PERSON"],
        "locations": [ent.text for ent in sent.ents if ent.label_ in ["GPE", "LOC", "FAC"]],
        "actions": [token.lemma_ for token in sent if token.pos_ == "VERB"],
        "nouns": [token.lemma_ for token in sent if token.pos_ == "NOUN"],
        "raw": sent.text
    }
    structured_script.append(entry)



In [45]:
structured = []

for sent in doc.sents:
    structured.append({
        "sentence": sent.text,
        "characters": [ent.text for ent in sent.ents if ent.label_ == "PERSON"],
        "actions": [token.lemma_ for token in sent if token.pos_ == "VERB"],
        "objects": [token.lemma_ for token in sent if token.pos_ == "NOUN"]
    })

In [46]:
structured

[{'sentence': "Alex had always dreamed of reaching the top of Eagle's Peak, a mountain so tall many said it can't be climbed without years of training.",
  'characters': ['Alex'],
  'actions': ['dream', 'reach', 'say', 'climb'],
  'objects': ['top', 'mountain', 'year', 'training']},
 {'sentence': "But Alex wasn't an expert.",
  'characters': ['Alex'],
  'actions': [],
  'objects': ['expert']},
 {'sentence': 'Just a person with a dream and a backpack full of hope.',
  'characters': [],
  'actions': [],
  'objects': ['person', 'dream', 'backpack', 'hope']},
 {'sentence': 'The first steps were easy.',
  'characters': [],
  'actions': [],
  'objects': ['step']},
 {'sentence': 'The path was clear.',
  'characters': [],
  'actions': [],
  'objects': ['path']},
 {'sentence': 'But soon the trail grew steeper.',
  'characters': [],
  'actions': ['grow'],
  'objects': ['trail']},
 {'sentence': 'Rocks blocked the way.',
  'characters': [],
  'actions': ['block'],
  'objects': ['rock', 'way']},
 {

In [43]:
import json

In [44]:
file_path = "output.json"
with open(file_path, 'w') as json_file:
    json.dump(structured_script, json_file, indent=4)