

You need to provide your lingq api key when prompted which you can get from here:

https://www.lingq.com/en/accounts/apikey/


To run the script select "Run all" from the "Runtime" menu above.

# Initial set up


In [1]:
#Using Google colab secrets to store lingq api key.
from google.colab import userdata

import urllib.parse

import requests

import json


# Helper Functions

In [2]:
def get_json_response (url):
  lingq_api_key=userdata.get('LINGQ_API_KEY');
  headers = {
    'Authorization': f'Token {lingq_api_key}',
    'Content-Type': 'application/json'
  }
  response = requests.get(f'{url}?page_size=1000', headers=headers)
  return response.json()

def print_json (json_parsed):
  print (json.dumps(json_parsed, indent = 4))


# Languages


In [3]:
languages = get_json_response('https://www.lingq.com/api/v2/languages/')
languages_with_known_words = [item["code"] for item in languages if item["knownWords"] != 0]
print_json(languages_with_known_words)



[
    "pl"
]


In [None]:
# Install PyDrive2
#!pip install PyDrive2

# Import necessary libraries
#from pydrive2.auth import GoogleAuth
#from pydrive2.drive import GoogleDrive
#from google.colab import auth
#from oauth2client.client import GoogleCredentials
#import os

# Authenticate and create the PyDrive2 client
#auth.authenticate_user()
#gauth = GoogleAuth()
#gauth.credentials = GoogleCredentials.get_application_default()
#drive = GoogleDrive(gauth)

# My courses

In [4]:

# Iterate through languages_with_known_words
for language_code in languages_with_known_words:
    print(f'Fetching language {language_code}')
    courses_in_language = get_json_response(f'https://www.lingq.com/api/v2/{language_code}/collections/my/')
    print_json(courses_in_language)


Fetching language pl
{
    "count": 36,
    "next": null,
    "previous": null,
    "results": [
        {
            "id": 1554093,
            "title": "031A-DailyPolishStory",
            "imageUrl": "https://static.lingq.com/static/images/default-content.webp"
        },
        {
            "id": 1554385,
            "title": "031A-DailyPolishStory",
            "imageUrl": "https://static.lingq.com/static/images/default-content.webp"
        },
        {
            "id": 1554420,
            "title": "033A-DailyPolishStory",
            "imageUrl": "https://static.lingq.com/static/images/default-content.webp"
        },
        {
            "id": 1562567,
            "title": "037A-DailyPolishStory",
            "imageUrl": "https://static.lingq.com/static/images/default-content.webp"
        },
        {
            "id": 1424980,
            "title": "A Polish Family",
            "imageUrl": "https://static.lingq.com/media/resources/contents/images/2023/09/21/AAAABdz1V7bpP

# Lessons

In [17]:
course = get_json_response(f'https://www.lingq.com/api/v2/pl/collections/289027')
print_json(course)

{
    "pk": 289027,
    "url": "https://www.lingq.com/api/v3/pl/collections/289027/",
    "title": "LingQ Mini Stories",
    "description": "A series of stories where you can practice the basic patterns of the Polish language. Lots of repetition. These can be used by beginners and for later review of grammatical patterns. Enjoy.",
    "audio": null,
    "image": "https://static.lingq.com/media/resources/collections/images/2021/11/17/1637183731_a9cba855.jpg",
    "imageUrl": "https://static.lingq.com/media/resources/collections/images/2021/11/17/1637183731_a9cba855.jpg",
    "originalImageUrl": "https://static.lingq.com/media/resources/collections/images/2021/11/17/1637183731_a9cba855-orig.jpg",
    "completedRatio": 14.4,
    "completedTimes": 10000000,
    "lessonsCount": 0,
    "newWordsCount": 1180,
    "difficulty": 50.64,
    "level": "Beginner 2",
    "price": 0,
    "providerImageUrl": "https://static.lingq.com/media/resources/photo/1669846878_7a1f4acf.webp",
    "sharedById": 2

# The meat of the sandwich

In [23]:
course['lessons'][0]['url']

'https://www.lingq.com/api/v3/pl/lessons/24479157/'

In [27]:
import spacy
from ipywidgets import widgets
from IPython.display import display, HTML
#@title Choose a language model
model = "pl_core_news_lg" #@param ["fi_core_news_lg", "de_core_news_lg", "en_core_web_lg", "nl_core_news_lg", "sv_core_news_lg", "da_core_news_lg", "pl_core_news_lg", "ru_core_news_lg", "uk_core_news_lg"]
!python -m spacy download {model}

spacy.prefer_gpu()

nlp = spacy.load(model)

# Function to process text and return HTML with color-coded cases and tooltips
def process_and_display_text(text):
    doc = nlp(text)
    highlighted_text = ""
    for token in doc:
        # Retrieve morphological information
        case = token.morph.get("Case")
        morph = " ".join(f'{token.morph}'.split("|"))
        # Define the tooltip text
        tooltip_text = f"{token.lemma_} {token.pos_} ({morph})"

        # Assign color based on grammatical case
        color = "black"  # Default color
        if case:
            if "Nom" in case:
                color = "blue"
            elif "Gen" in case:
                color = "green"
            elif "Dat" in case:
                color = "red"
            elif "Acc" in case:
                color = "purple"
            elif "Ins" in case:
                color = "orange"
            elif "Loc" in case:
                color = "brown"
            elif "Voc" in case:
                color = "pink"

        # Append the token span with style and tooltip
        highlighted_text += f'<span style="color: {color};" title="{tooltip_text}">{token.text}</span> '

    display(HTML(highlighted_text))


Collecting pl-core-news-lg==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/pl_core_news_lg-3.7.0/pl_core_news_lg-3.7.0-py3-none-any.whl (573.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m573.7/573.7 MB[0m [31m642.6 kB/s[0m eta [36m0:00:00[0m
Installing collected packages: pl-core-news-lg
Successfully installed pl-core-news-lg-3.7.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('pl_core_news_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:


for lesson in course['lessons']:

  paragraphs = get_json_response(lesson['url'] + 'paragraphs/')
  joined_paragraphs = ""
  for paragraph in paragraphs:
    joined_sentences = ' '.join(sentence['cleanText'] for sentence in paragraph['sentences'])
    joined_paragraphs = joined_paragraphs + joined_sentences + "\n"
  process_and_display_text(joined_paragraphs)



In [None]:


    for course in courses_in_language["results"]:
        course_title = course["title"]
        course_id = course["id"]
        print(f'Fetching course {course_title} - {course_id}')

        # Encode the course name before including it in the URL
        encoded_course_title = urllib.parse.quote(course_title)
        lessons_in_course = get_json_response(f'https://www.lingq.com/api/v2/{language_code}/collections/{course_id}')

        # Create or find the course folder within the language folder
        course_folder_name = course_title
        course_folder = None
        folder_list = drive.ListFile({'q': f"title='{urllib.parse.quote(course_folder_name)}' and '{language_folder['id']}' in parents and trashed=false"}).GetList()
        if len(folder_list) == 0:
            course_folder = drive.CreateFile({'title': course_folder_name, 'parents': [{'id': language_folder['id']}], 'mimeType': 'application/vnd.google-apps.folder'})
            course_folder.Upload()
        else:
            course_folder = folder_list[0]

        print_json(lessons_in_course)

        for lesson in lessons_in_course["lessons"]:
            lesson_title = lesson["title"]
            lesson_url = lesson["url"]
            lesson_json = get_json_response(lesson_url)

            if "tokenizedText" in lesson_json:
                sentences = [sentence[0]["text"] for sentence in lesson_json["tokenizedText"] if not ("opentag" in sentence[0]["tokens"][0])]
                text = " ".join(sentences)

                # Check if the file already exists within the course folder
                existing_files = drive.ListFile({'q': f"title='{lesson_title}.txt' and '{course_folder['id']}' in parents and trashed=false"}).GetList()

                if not existing_files:
                    # Create and write the text content to a text file
                    lesson_file = drive.CreateFile({'title': f'{lesson_title}.txt', 'parents': [{'id': course_folder['id']}]})
                    lesson_file.SetContentString(text)  # Write text content to the file
                    lesson_file.Upload()  # Upload the file
                else:
                    print(f'File {lesson_title}.txt already exists in {course_folder_name}')