# Initial set up


In [1]:
#Using Google colab secrets to store keys.
from google.colab import userdata

import urllib.parse

import requests

import json

lingq_api_key=userdata.get('LINGQ_API_KEY');
headers = {
    'Authorization': f'Token {lingq_api_key}',
    'Content-Type': 'application/json'
}


# Helper Functions

In [2]:
def get_json_response (url):
  response = requests.get(url, headers=headers)
  return response.json()

def print_json (json_parsed):
  print (json.dumps(json_parsed, indent = 4))



# Languages


In [3]:
languages = get_json_response('https://www.lingq.com/api/v2/languages/')
languages_with_known_words = [(f'{item["title"]} ({item["knownWords"]})', item["code"]) for item in languages if item["knownWords"] != 0]
print_json(languages_with_known_words)



[
    [
        "Polish (1132)",
        "pl"
    ],
    [
        "English (169)",
        "en"
    ]
]


# Select Language

In [4]:
import ipywidgets as widgets
from IPython.display import display
lang_selector = widgets.Dropdown(
    options=languages_with_known_words,
    value=languages_with_known_words[0][1],
    description='Language:',
)
display(lang_selector)

Dropdown(description='Language:', options=(('Polish (1132)', 'pl'), ('English (169)', 'en')), value='pl')

In [31]:
language_code = lang_selector.value
language_code

'en'

# Courses for Language

In [32]:

courses_in_language = get_json_response (f'https://www.lingq.com/api/v2/{language_code}/collections/my/')

course_selector = widgets.Dropdown(
    options= [(item["title"], item["id"]) for item in courses_in_language["results"]],
    value = courses_in_language["results"][0]["id"],
    description = "Course:")
display(course_selector)



Dropdown(description='Course:', options=(('The Guardian', 1447171),), value=1447171)

In [33]:
course_id = course_selector.value
course_id


1447171

# Lessons from Course

In [34]:
lessons_in_course = get_json_response(f'https://www.lingq.com/api/v2/{language_code}/collections/{course_id}')
lesson_selector = widgets.Dropdown(
    options= [(item["title"], item["url"]) for item in lessons_in_course["lessons"]],
    value = lessons_in_course["lessons"][0]["url"],
    description = "Lesson:")
display(lesson_selector)


Dropdown(description='Lesson:', options=(('Slovakia’s pro-Russia former PM reaches deal to form coalition gove…

In [35]:
lesson_url = lesson_selector.value

# Lesson

In [36]:

lesson = get_json_response(lesson_url)
#print_json(lesson["tokenizedText"])
sentences = [sentence[0]["text"] for sentence in lesson["tokenizedText"] if not ("opentag" in sentence[0]["tokens"][0])]
text = " ".join(sentences)
text

"Chuck out your decluttering manuals. In the past few months, the pavement outside my flat has been taken over by stuff: baby baths, filing systems, books, stools. Clutter has emerged, dusty and triumphant, as a defining byproduct of the pandemic. Howard, who works from home, has witnessed (among other things) what she calls a “renaissance” in encyclopedias used as laptop stands. Cluttercore devotee Amy-Louise Holton, 36, who lives in Brighton, East Sussex, makes and sells clothes on Etsy. For a generation that rent rather than buy, clutter can be a lifeline. TikTok's depiction of cluttercore is often bedroom-based, and advocates honesty over aspiration. Stuck inside during febrile times, our social lives much diminished, we may be more likely to fall prey to consumerism. Joseph Ferrari, who studies the psychological impact of clutter at DePaul University in Chicago, describes home as a “situation for living” and a foundation for identity. Typically, an abundance of clutter has the pow

# Load spaCy (select Model appropriate for your language)


In [38]:
# Install spaCy
!pip install spacy

#Using this time an English model - select a language model for your language from here.
#A small one will be sufficient.
# https://spacy.io/usage/models#languages
!python -m spacy download en_core_web_sm


import spacy
from spacy import displacy

nlp = spacy.load('en_core_web_sm')

2023-12-19 20:42:17.547981: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-19 20:42:17.548043: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-19 20:42:17.549454: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Collecting en-core-web-sm==3.6.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m32.8 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load

# Natural Language Processing Using Spacy


In [39]:
doc = nlp(text)


In [40]:

import pandas as pd

cols = ("text", "lemma", "POS", "explain", "morphology")
rows = []

for t in doc:
    row = [t.text, t.lemma_, t.pos_, spacy.explain(t.pos_), t.morph]
    rows.append(row)

df = pd.DataFrame(rows, columns=cols)

df

Unnamed: 0,text,lemma,POS,explain,morphology
0,Chuck,chuck,VERB,verb,(VerbForm=Inf)
1,out,out,ADP,adposition,()
2,your,your,PRON,pronoun,"(Person=2, Poss=Yes, PronType=Prs)"
3,decluttering,decluttering,NOUN,noun,(Number=Sing)
4,manuals,manual,NOUN,noun,(Number=Plur)
...,...,...,...,...,...
387,your,your,PRON,pronoun,"(Person=2, Poss=Yes, PronType=Prs)"
388,home,home,NOUN,noun,(Number=Sing)
389,with,with,ADP,adposition,()
390,stuff,stuff,NOUN,noun,(Number=Sing)


In [41]:
sentence_spans = list(doc.sents)
displacy.render(sentence_spans, style="dep", jupyter=True)