# Initial set up


In [14]:
#Using Google colab secrets to store keys.
from google.colab import userdata

import urllib.parse

import requests

import json

lingq_api_key=userdata.get('LINGQ_API_KEY');
headers = {
    'Authorization': f'Token {lingq_api_key}',
    'Content-Type': 'application/json'
}


# Helper Functions

In [15]:
def get_json_response (url):
  response = requests.get(url, headers=headers)
  return response.json()

def print_json (json_parsed):
  print (json.dumps(json_parsed, indent = 4))



# Languages


In [16]:
languages = get_json_response('https://www.lingq.com/api/v2/languages/')
languages_with_known_words = [item["code"] for item in languages if item["knownWords"] != 0]
print_json(languages_with_known_words)



[
    "pl",
    "en"
]


# Courses for Language

In [21]:
# Install PyDrive2
!pip install PyDrive2

# Import necessary libraries
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import os
import urllib.parse

# Authenticate and create the PyDrive2 client
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Define the base folder in Google Drive where you want to save the files
base_folder_name = 'LingQ_Data'


# Create or find the base folder
base_folder = None
folder_list = drive.ListFile({'q': "title='" + base_folder_name + "' and mimeType='application/vnd.google-apps.folder' and trashed=false"}).GetList()
if len(folder_list) == 0:
    base_folder = drive.CreateFile({'title': base_folder_name, 'mimeType': 'application/vnd.google-apps.folder'})
    base_folder.Upload()
else:
    base_folder = folder_list[0]

# Iterate through languages_with_known_words
for language_code in languages_with_known_words:
    print(f'Fetching language {language_code}')
    courses_in_language = get_json_response(f'https://www.lingq.com/api/v2/{language_code}/collections/my/')

    # Create or find the language folder within the base folder
    language_folder_name = language_code
    language_folder = None
    folder_list = drive.ListFile({'q': f"title='{urllib.parse.quote(language_folder_name)}' and '{base_folder['id']}' in parents and trashed=false"}).GetList()
    if len(folder_list) == 0:
        language_folder = drive.CreateFile({'title': language_folder_name, 'parents': [{'id': base_folder['id']}], 'mimeType': 'application/vnd.google-apps.folder'})
        language_folder.Upload()
    else:
        language_folder = folder_list[0]

    print_json(courses_in_language)

    for course in courses_in_language["results"]:
        course_title = course["title"]
        course_id = course["id"]
        print(f'Fetching course {course_title} - {course_id}')

        # Encode the course name before including it in the URL
        encoded_course_title = urllib.parse.quote(course_title)
        lessons_in_course = get_json_response(f'https://www.lingq.com/api/v2/{language_code}/collections/{course_id}/{encoded_course_title}')

        # Create or find the course folder within the language folder
        course_folder_name = course_title
        course_folder = None
        folder_list = drive.ListFile({'q': f"title='{urllib.parse.quote(course_folder_name)}' and '{language_folder['id']}' in parents and trashed=false"}).GetList()
        if len(folder_list) == 0:
            course_folder = drive.CreateFile({'title': course_folder_name, 'parents': [{'id': language_folder['id']}], 'mimeType': 'application/vnd.google-apps.folder'})
            course_folder.Upload()
        else:
            course_folder = folder_list[0]

        print_json(lessons_in_course)

        for lesson in lessons_in_course["lessons"]:
            lesson_title = lesson["title"]
            lesson_url = lesson["url"]
            lesson_json = get_json_response(lesson_url)

            if "tokenizedText" in lesson_json:
                sentences = [sentence[0]["text"] for sentence in lesson_json["tokenizedText"] if not ("opentag" in sentence[0]["tokens"][0])]
                text = " ".join(sentences)

                # Check if the file already exists within the course folder
                existing_files = drive.ListFile({'q': f"title='{lesson_title}.txt' and '{course_folder['id']}' in parents and trashed=false"}).GetList()

                if not existing_files:
                    # Create and write the text content to a text file
                    lesson_file = drive.CreateFile({'title': f'{lesson_title}.txt', 'parents': [{'id': course_folder['id']}]})
                    lesson_file.SetContentString(text)  # Write text content to the file
                    lesson_file.Upload()  # Upload the file
                else:
                    print(f'File {lesson_title}.txt already exists in {course_folder_name}')

Fetching language pl
{
    "count": 12,
    "next": null,
    "previous": null,
    "results": [
        {
            "id": 1424980,
            "title": "A Polish Family",
            "imageUrl": "https://static.lingq.com/media/resources/contents/images/2023/09/21/AAAABdz1V7bpPzPwEaaUZGq4sWXj-WPI9FyGf3hm_14f7c715.webp"
        },
        {
            "id": 1456660,
            "title": "Daily Polish Listening",
            "imageUrl": "https://static.lingq.com/static/images/default-content.webp"
        },
        {
            "id": 1440209,
            "title": "Daily Polish Story",
            "imageUrl": "https://static.lingq.com/static/images/default-content.webp"
        },
        {
            "id": 1440204,
            "title": "John A1",
            "imageUrl": "https://static.lingq.com/static/images/default-content.webp"
        },
        {
            "id": 1478562,
            "title": "My Simplified Lessons (AI)",
            "imageUrl": "https://static.lingq.com/medi

JSONDecodeError: ignored