## Spacy model installation / loading

In [4]:
!pip install spacy
!pip install spacy-transformers
!python3 -m spacy download en_core_web_sm # change with the desired spacy model
import spacy
model = spacy.load("en_core_web_sm") # change with the desired spacy model

if model :
    print("Installation successful")
else:
    print("Installation error")



Collecting en-core-web-sm==3.4.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1-py3-none-any.whl (12.8 MB)
[K     |████████████████████████████████| 12.8 MB 6.0 MB/s eta 0:00:01                    | 1.5 MB 2.7 MB/s eta 0:00:05MB 6.0 MB/s eta 0:00:01


[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
Successful installation !


## Loading of the file with sentence pairs

In [5]:
import pandas as pd
data = pd.read_csv("en_fr_pairs.csv") # change with the desired file (in the same folder, as a .csv)
data # check that everything's fine

Unnamed: 0,EN,FR,Good,POS EN,POS EN.1,POS FR
0,She advanced to the semi-final where she defea...,Elle avança aux semi-finales où elle gagna sur...,True,,,Elle PRON\navança VERB\naux ADP\nsemi-finales ...
1,In 2008 Davidson was elected to the Naismith M...,En 2008 Davidson fut intronisé au Naismith Mem...,True,,,En ADP\n2008 NUM\nDavidson PROPN\nfut AUX\nint...
2,Sterling raised in a poor family was a young a...,Sterling élevé dans une famille pauvre était u...,True,,,Sterling PROPN\nélevé ADJ\ndans ADP\nune DET\n...
3,He treated in a masterly manner (Bulletin astr...,Son traitement analytique de la capture des co...,True,,,Son DET\ntraitement NOUN\nanalytique ADJ\nde A...
4,On February 26 2007 The Walt Disney Company an...,Le 26 février 2007 The Walt Disney Company ann...,True,,,Le DET\n26 NUM\nfévrier NOUN\n2007 NUM\nThe X\...
...,...,...,...,...,...,...
95,(Morning Musume version) Koko ni Iruzee!,Singles de Morning Musume Koko ni Iruzee!,False,meaning unclear,,
96,The British Theatre Guide.,The British Theatre Guide.,False,only english,,
97,Minimal arrays containing all sub-array combin...,Minimal arrays containing all sub-array combin...,False,only english,,
98,I Raise My Eyes to Say Yes.,Pour les articles homonymes voir Dis-moi oui.,False,x,,


#### POS parsing

In [9]:
parsed = [] # list containing all of the POS-parsed sentences

for sentence, is_correct in zip(data["EN"], data["Good"]): # change columns names if needed
    s = ""
    if is_correct == True:
        parsed_sentence = model(sentence) # POS-parsing if sentence is well-aligned
        for token in parsed_sentence:
            s += f"{token} {token.pos_}\n"
    parsed.append(s)
    
print(parsed)

["She PRON\nadvanced VERB\nto ADP\nthe DET\nsemi ADJ\n- ADJ\nfinal ADJ\nwhere SCONJ\nshe PRON\ndefeated VERB\nNew PROPN\nBrunswick PROPN\n's PART\nAndrea PROPN\nKelly PROPN\n. PUNCT\n", 'In ADP\n2008 NUM\nDavidson PROPN\nwas AUX\nelected VERB\nto ADP\nthe DET\nNaismith PROPN\nMemorial PROPN\nBasketball PROPN\nHall PROPN\nof ADP\nFame PROPN\nas ADP\na DET\ncontributor NOUN\nfor ADP\nhis PRON\nsuccesses NOUN\nas ADP\nan DET\nowner NOUN\nof ADP\nthe DET\nPistons PROPN\nand CCONJ\nShock PROPN\n. PUNCT\n', 'Sterling NOUN\nraised VERB\nin ADP\na DET\npoor ADJ\nfamily NOUN\nwas AUX\na DET\nyoung ADJ\nadventurer NOUN\nwith ADP\ndreams NOUN\nof ADP\nfame NOUN\nand CCONJ\nfortune NOUN\nwho PRON\ndecided VERB\nto PART\nbecome VERB\na DET\npirate NOUN\n. PUNCT\n', 'He PRON\ntreated VERB\nin ADP\na DET\nmasterly ADJ\nmanner NOUN\n( PUNCT\nBulletin PROPN\nastronomique ADJ\n1889 NUM\n) PUNCT\nthe DET\ntheory NOUN\nof ADP\nthe DET\ncapture NOUN\nof ADP\ncomets NOUN\nby ADP\nthe DET\nlarger ADJ\nplanet

In [10]:
data["POS EN"] = parsed # change column name if needed
data # check that the annotations have been added

Unnamed: 0,EN,FR,Good,POS EN,POS EN.1,POS FR
0,She advanced to the semi-final where she defea...,Elle avança aux semi-finales où elle gagna sur...,True,She PRON\nadvanced VERB\nto ADP\nthe DET\nsemi...,,Elle PRON\navança VERB\naux ADP\nsemi-finales ...
1,In 2008 Davidson was elected to the Naismith M...,En 2008 Davidson fut intronisé au Naismith Mem...,True,In ADP\n2008 NUM\nDavidson PROPN\nwas AUX\nele...,,En ADP\n2008 NUM\nDavidson PROPN\nfut AUX\nint...
2,Sterling raised in a poor family was a young a...,Sterling élevé dans une famille pauvre était u...,True,Sterling NOUN\nraised VERB\nin ADP\na DET\npoo...,,Sterling PROPN\nélevé ADJ\ndans ADP\nune DET\n...
3,He treated in a masterly manner (Bulletin astr...,Son traitement analytique de la capture des co...,True,He PRON\ntreated VERB\nin ADP\na DET\nmasterly...,,Son DET\ntraitement NOUN\nanalytique ADJ\nde A...
4,On February 26 2007 The Walt Disney Company an...,Le 26 février 2007 The Walt Disney Company ann...,True,On ADP\nFebruary PROPN\n26 NUM\n2007 NUM\nThe ...,,Le DET\n26 NUM\nfévrier NOUN\n2007 NUM\nThe X\...
...,...,...,...,...,...,...
95,(Morning Musume version) Koko ni Iruzee!,Singles de Morning Musume Koko ni Iruzee!,False,,,
96,The British Theatre Guide.,The British Theatre Guide.,False,,,
97,Minimal arrays containing all sub-array combin...,Minimal arrays containing all sub-array combin...,False,,,
98,I Raise My Eyes to Say Yes.,Pour les articles homonymes voir Dis-moi oui.,False,,,


#### Export results

In [7]:
data.to_csv("en_fr_with_pos.csv", index=False) # change output file name with desired name