In [1]:
#!pip install requests beautifulsoup4


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.1[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [1]:
import openai
import os
import re
import requests
from bs4 import BeautifulSoup


# Get the individual page urls

One url for each page that describes a particular kind of grammar.

In [3]:
def get_links(url):

    # Make a request to the website
    response = requests.get(url)

    # If the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the page content with BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract all links
        links = [a['href'] for a in soup.find_all('a', href=True)]

    else:
        print("Failed to retrieve the webpage.")

    return links


In [6]:
a1_a2_links = get_links('https://learnenglish.britishcouncil.org/grammar/a1-a2-grammar')
a1_a2_grammar_pages = list(set(link for link in a1_a2_links if re.match(r"^/grammar/a1-a2-grammar/", link)))

In [11]:
b1_b2_links = get_links('https://learnenglish.britishcouncil.org/grammar/b1-b2-grammar')
b1_b2_grammar_pages = list(set(link for link in b1_b2_links if re.match(r"^/grammar/b1-b2-grammar/", link)))

In [15]:
'https://learnenglish.britishcouncil.org/grammar/c1-grammar'
c1_links = get_links('https://learnenglish.britishcouncil.org/grammar/c1-grammar')
c1_grammar_pages = list(set(link for link in c1_links if re.match(r"^/grammar/c1-grammar/", link)))

# Scrape each link and extract the text

In [43]:
def extract_text_from_element(element):
    """Recursively extract text from an element and its child elements."""
    if element.string:
        return element.string
    return ''.join([extract_text_from_element(child) for child in element.children])

In [44]:
page_texts = {}
for grammar_page in a1_a2_grammar_pages + b1_b2_grammar_pages + c1_grammar_pages:

    # URL of the page to be scraped
    url = f"https://learnenglish.britishcouncil.org/{grammar_page}"
    print(f"Scraping {url}...")

    # Make a request to the website
    response = requests.get(url)

    # If the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the page content with BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract all text by navigating through all text nodes
        texts = [extract_text_from_element(element) for element in soup.find_all(True) if element.string]
        
        # Joining and printing the extracted texts
        page_text = ' '.join(texts)

        # avoid some junk at the end of the page
        page_text = page_text.split('Language level')[0]
        
        page_texts[grammar_page] = page_text

    else:
        print("Failed to retrieve the webpage.")

Scraping https://learnenglish.britishcouncil.org//grammar/a1-a2-grammar/articles-the-or-no-article...
Scraping https://learnenglish.britishcouncil.org//grammar/a1-a2-grammar/past-continuous-past-simple...
Scraping https://learnenglish.britishcouncil.org//grammar/a1-a2-grammar/nouns-countable-uncountable...
Scraping https://learnenglish.britishcouncil.org//grammar/a1-a2-grammar/prepositions-of-time-at-in-on...
Scraping https://learnenglish.britishcouncil.org//grammar/a1-a2-grammar/verbs-followed-ing-or-infinitive...
Scraping https://learnenglish.britishcouncil.org//grammar/a1-a2-grammar/using-there-there-are...
Scraping https://learnenglish.britishcouncil.org//grammar/a1-a2-grammar/present-simple...
Scraping https://learnenglish.britishcouncil.org//grammar/a1-a2-grammar/present-simple-be...
Scraping https://learnenglish.britishcouncil.org//grammar/a1-a2-grammar/prepositions-place...
Scraping https://learnenglish.britishcouncil.org//grammar/a1-a2-grammar/possessive-s...
Scraping https://

In [47]:
# Save these locally
for grammar, page_text in page_texts.items():

    filename = grammar.split('/')[-1]
    filepath = f"resources/{filename}.txt"

    with open(filepath, 'w') as f:
        f.write(page_text)


# Summarise each page text, and write to a prompt file

In [45]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.getenv('OPENAI_API_KEY')

def get_completion(prompt, model="gpt-4"): 
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0,
    )
    return response.choices[0].message["content"]

In [46]:
for grammar, page_text in page_texts.items():

    prompt = f"""
    I am teacher of English as a foreign language, and I am creating some exercises to help my students practice a particular aspect of English grammar.\
    The following text, delimited by backticks, contains some explanation of of this aspect of English works (as well as some irrelevant text from web scraping).\
    Summarize the relevant part of the text in the form of a single paragraph of around 100 words.\
    You should phrase the respose in the form of instructions to an English teacher, and begin with the words "Help the student understand":

    ```{page_text}```

    """

    # chat GPT summary
    try:
        response = get_completion(prompt)
        print(response)
    except:
        print(f"error with {grammar}")
        continue

    # write the output to a file with the same name as the last part of the URL
    output_filename = grammar.split("/")[-1]
    output_filepath = f"server/grammar-prompts/{output_filename}.txt"

    with open(output_filepath, 'w') as f:
        f.write(str(response))

Help the student understand the use of articles in English, specifically 'the' or no article. Explain that we commonly use articles in phrases and place names. For instance, we use 'the' before school, prison, and hospital. However, we don't normally use an article for continents, most countries, cities, towns, lakes, mountains, or universities. For example, we say 'University of Cape Town', 'University of Delhi', 'University of Tokyo' without an article before 'University'. Reinforce this understanding through grammar exercises.
Help the student understand the use of past continuous and past simple tenses in English. The past continuous tense shows an action that was already in progress at a certain time in the past, and can also indicate that an activity was in progress for some time. The past simple tense is used to show an action that happened in the middle of the past continuous action, while it was in progress. When used together, these tenses can show an action interrupting anot

# Lookup between grammar and nice labels

In [92]:
# lookup to match a grammar type to its group
grammar_group_lookup = {}
for page_ref in a1_a2_grammar_pages + b1_b2_grammar_pages + c1_grammar_pages:
    parts = page_ref.split('/')
    grammar_group = parts[2]
    grammar_name = parts[3]

    grammar_group_lookup[grammar_name] = grammar_group

In [100]:
grammar_group_lookup

{'articles-the-or-no-article': 'a1-a2-grammar',
 'past-continuous-past-simple': 'a1-a2-grammar',
 'nouns-countable-uncountable': 'a1-a2-grammar',
 'prepositions-of-time-at-in-on': 'a1-a2-grammar',
 'verbs-followed-ing-or-infinitive': 'a1-a2-grammar',
 'using-there-there-are': 'a1-a2-grammar',
 'present-simple': 'a1-a2-grammar',
 'present-simple-be': 'a1-a2-grammar',
 'prepositions-place': 'a1-a2-grammar',
 'possessive-s': 'a1-a2-grammar',
 'comparative-adjectives': 'a1-a2-grammar',
 'quantifiers-few-a-few-little-a-bit': 'a1-a2-grammar',
 'adjectives-ending-ed-ing': 'a1-a2-grammar',
 'infinitive-purpose': 'a1-a2-grammar',
 'adjectives-prepositions': 'a1-a2-grammar',
 'present-simple-have-got': 'a1-a2-grammar',
 'question-forms': 'a1-a2-grammar',
 'articles-a-an-the': 'a1-a2-grammar',
 'present-perfect-just-yet-still-already': 'b1-b2-grammar',
 'capital-letters-apostrophes': 'b1-b2-grammar',
 'present-perfect': 'b1-b2-grammar',
 'past-perfect': 'b1-b2-grammar',
 'reflexive-pronouns': 'b1

In [101]:
# these will (via cut and paste...) become values and labels in a dropdown menu
def extract_title_from_text(text):
    pattern = r'function.*?{ }\s*(.*?)\s*\|'
    match = re.search(pattern, text)
    if match:
        return match.group(1)  
    return None

a1_a2_labels = []
b1_b2_labels = []
c1_labels = []
for grammar, page_text in page_texts.items():
    val = grammar.split('/')[-1] 
    lab = extract_title_from_text(page_text)
    grammar_label = {"value": val, "label": lab}

    grammar_group = grammar_group_lookup[val]
    if grammar_group == 'a1-a2-grammar':
        a1_a2_labels.append(grammar_label)
    elif grammar_group == 'b1-b2-grammar':
        b1_b2_labels.append(grammar_label)
    elif grammar_group == 'c1-grammar':
        c1_labels.append(grammar_label)
    else:
        raise ValueError("Unknown grammar group: " + grammar_group)


# Results 

Just cut and paste into the relevant part of `grammarOptions.js`

In [114]:
for row in sorted(a1_a2_labels, key=lambda x: x['label']):
    print(str(row) + ",")

{'value': 'adjectives-prepositions', 'label': 'Adjectives and prepositions'},
{'value': 'adjectives-ending-ed-ing', 'label': "Adjectives ending in '-ed' and '-ing'"},
{'value': 'articles-a-an-the', 'label': "Articles: 'a', 'an', 'the'"},
{'value': 'articles-the-or-no-article', 'label': "Articles: 'the' or no article"},
{'value': 'comparative-adjectives', 'label': 'Comparative adjectives'},
{'value': 'infinitive-purpose', 'label': 'Infinitive of purpose'},
{'value': 'nouns-countable-uncountable', 'label': 'Nouns: countable and uncountable'},
{'value': 'past-continuous-past-simple', 'label': 'Past continuous and past simple'},
{'value': 'possessive-s', 'label': "Possessive 's"},
{'value': 'prepositions-place', 'label': "Prepositions of place – 'in', 'on', 'at'"},
{'value': 'prepositions-of-time-at-in-on', 'label': "Prepositions of time: 'at', 'in', 'on'"},
{'value': 'present-simple', 'label': 'Present simple'},
{'value': 'present-simple-have-got', 'label': "Present simple: 'have got'"},


In [123]:
for row in sorted(b1_b2_labels, key=lambda x: x['label'])[20:]:
    print(str(row) + ",")

{'value': 'present-perfect-simple-continuous', 'label': 'Present perfect simple and continuous'},
{'value': 'present-perfect-just-yet-still-already', 'label': "Present perfect: 'just', 'yet', 'still' and 'already'"},
{'value': 'question-tags', 'label': 'Question tags'},
{'value': 'reflexive-pronouns', 'label': 'Reflexive pronouns'},
{'value': 'relative-clauses-defining-relative-clauses', 'label': 'Relative clauses: defining relative clauses'},
{'value': 'relative-clauses-non-defining-relative-clauses', 'label': 'Relative clauses: non-defining relative clauses'},
{'value': 'reported-speech-questions', 'label': 'Reported speech: questions'},
{'value': 'reported-speech-reporting-verbs', 'label': 'Reported speech: reporting verbs'},
{'value': 'reported-speech-statements', 'label': 'Reported speech: statements'},
{'value': 'stative-verbs', 'label': 'Stative verbs'},
{'value': 'future-degrees-certainty', 'label': 'The future: degrees of certainty'},
{'value': 'using-as-like', 'label': "Using

In [118]:
for row in sorted(c1_labels, key=lambda x: x['label']):
    print(str(row) + ",")

{'value': 'avoiding-repetition-text', 'label': 'Avoiding repetition in a text'},
{'value': 'ellipsis', 'label': 'Ellipsis'},
{'value': 'emphasis-cleft-sentences-inversion-auxiliaries', 'label': 'Emphasis: cleft sentences, inversion and auxiliaries'},
{'value': 'inversion-after-negative-adverbials', 'label': 'Inversion after negative adverbials'},
{'value': 'inversion-conditionals', 'label': 'Inversion and conditionals'},
{'value': 'modals-probability', 'label': 'Modals: probability'},
{'value': 'participle-clauses', 'label': 'Participle clauses'},
{'value': 'patterns-reporting-verbs', 'label': 'Patterns with reporting verbs'},
{'value': 'possession-noun-modifiers', 'label': 'Possession and noun modifiers'},
{'value': 'unreal-time', 'label': 'Unreal time'},
{'value': 'word-order-phrasal-verbs', 'label': 'Word order in phrasal verbs'},
