# Initial set up


In [1]:
#Using Google colab secrets to store keys.
from google.colab import userdata

import urllib.parse

import requests

import json

lingq_api_key=userdata.get('LINGQ_API_KEY');
headers = {
    'Authorization': f'Token {lingq_api_key}',
    'Content-Type': 'application/json'
}


# Helper Functions

In [2]:
def get_json_response (url):
  response = requests.get(url, headers=headers)
  return response.json()

def print_json (json_parsed):
  print (json.dumps(json_parsed, indent = 4))



# Languages


In [3]:
languages = get_json_response('https://www.lingq.com/api/v2/languages/')
languages_with_known_words = [(f'{item["title"]} ({item["knownWords"]})', item["code"]) for item in languages if item["knownWords"] != 0]
print_json(languages_with_known_words)



[
    [
        "Polish (1134)",
        "pl"
    ],
    [
        "English (169)",
        "en"
    ]
]


# Select Language

In [4]:
import ipywidgets as widgets
from IPython.display import display
lang_selector = widgets.Dropdown(
    options=languages_with_known_words,
    value=languages_with_known_words[0][1],
    description='Language:',
)
display(lang_selector)

Dropdown(description='Language:', options=(('Polish (1134)', 'pl'), ('English (169)', 'en')), value='pl')

In [5]:
language_code = lang_selector.value
language_code

'pl'

# Courses for Language

In [6]:

courses_in_language = get_json_response (f'https://www.lingq.com/api/v2/{language_code}/collections/my/')

course_selector = widgets.Dropdown(
    options= [(item["title"], item["id"]) for item in courses_in_language["results"]],
    value = courses_in_language["results"][0]["id"],
    description = "Course:")
display(course_selector)



Dropdown(description='Course:', options=(('A Polish Family', 1424980), ('Daily Polish Listening', 1456660), ('…

In [7]:
course_id = course_selector.value
course_id


1424980

# Lessons from Course

In [8]:
lessons_in_course = get_json_response(f'https://www.lingq.com/api/v2/{language_code}/collections/{course_id}')
lesson_selector = widgets.Dropdown(
    options= [(item["title"], item["url"]) for item in lessons_in_course["lessons"]],
    value = lessons_in_course["lessons"][0]["url"],
    description = "Lesson:")
display(lesson_selector)


Dropdown(description='Lesson:', options=(('A Polish Family S1:E1', 'https://www.lingq.com/api/v3/pl/lessons/24…

In [9]:
lesson_url = lesson_selector.value

# Lesson

In [10]:

lesson = get_json_response(lesson_url)
#print_json(lesson["tokenizedText"])
sentences = [sentence[0]["text"] for sentence in lesson["tokenizedText"] if not ("opentag" in sentence[0]["tokens"][0])]
text = " ".join(sentences)
text

'A to gdzie? W szpitalu. Złamałeś sobie rękę. Miałeś sześć lat. Nie pamiętasz? Była zima, chciałem zeskoczyć z zaspy i złamałem rękę. - Ale nie pamiętam jak. - No tak, że zeskoczyłeś z zaspy. - To nie była moja wina. - To była wina zaspy. Dawaj! Czekam na dobry wiatr. - Poszedł! - Uwaga! - Cześć. - Cześć. Uwaga! Rozumiem, że jest zima, że są trudne warunki, ale… Ciesz się, że wyperswadowałem im dach. Wiatr jest dobry! I… poszedł! W poniedziałek nie mogę, bo mam zebranie. - A we wtorek? - Wtorek… Rano tak, ale po południu odpada. - Ale… - Co „ale”? Ja mam taką pracę. Nic nie wymyślisz. Wychodzi na to, że tydzień będę siedział w domu. - Ale piątek masz wolny. - Dopiero! Dopiero? Cały tydzień siedzisz w domu. Pracujesz na zlecenie. Nie musisz codziennie biegać do biura i oglądać swojego szefa. Pocierp chociaż prywatnie, okej? Rozumiem. Jak nie pracuję, nie przynoszę pieniędzy. A ty jak nie pracujesz, zawsze coś przynosisz. Magia wolnego zawodu zaczęła ci doskwierać? Może będzie ciężko, al

# Load spaCy (select Model appropriate for your language)


In [11]:
# Install spaCy
!pip install spacy

#Using this time an English model - select a language model for your language from here.
#A small one will be sufficient.
# https://spacy.io/usage/models#languages
!python -m spacy download en_core_web_sm


import spacy
from spacy import displacy

nlp = spacy.load('en_core_web_sm')

2023-12-20 04:38:24.770923: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-20 04:38:24.770996: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-20 04:38:24.772469: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-20 04:38:24.781660: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Collecting en-core-web-sm==3.6.0
  Downloading https:

# Natural Language Processing Using Spacy


In [12]:
doc = nlp(text)


In [13]:

import pandas as pd

cols = ("text", "lemma", "POS", "explain", "morphology")
rows = []

for t in doc:
    row = [t.text, t.lemma_, t.pos_, spacy.explain(t.pos_), t.morph]
    rows.append(row)

df = pd.DataFrame(rows, columns=cols)

df

Unnamed: 0,text,lemma,POS,explain,morphology
0,A,a,NOUN,noun,(Number=Sing)
1,to,to,ADP,adposition,()
2,gdzie,gdzie,VERB,verb,(VerbForm=Inf)
3,?,?,PUNCT,punctuation,(PunctType=Peri)
4,W,W,PROPN,proper noun,(Number=Sing)
...,...,...,...,...,...
2421,.,.,PUNCT,punctuation,(PunctType=Peri)
2422,Podobno,Podobno,PROPN,proper noun,(Number=Sing)
2423,grypa,grypa,PROPN,proper noun,(Number=Sing)
2424,żołądkowa,żołądkowa,PROPN,proper noun,(Number=Sing)


In [14]:
sentence_spans = list(doc.sents)
displacy.render(sentence_spans, style="dep", jupyter=True)