In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Generate Learning Content for Language Teaching Materials
Prototype for Experiments
"""
__author__ = ["Leo S. Rüdian"]
__copyright__ = "2024, Rüdian"
__credits__ = ["Leo S. Rüdian"]
__license__ = "CC BY-NC-SA"
__version__ = "1.0.0"
__maintainer__ = ["Leo S. Rüdian"]
__email__ =["ruediasy@informatik.hu-berlin.de"]
__status__ = "Prototype"

In [6]:
# Note: you need to be using OpenAI Python v0.27.0 for the code below to work
import openai
import random
import json
from os.path import exists
import hashlib

from IPython.display import display, HTML
from PIL import Image
import requests
import shutil # to save it locally
import time

openai.api_key = '[ADD YOUR KEY]'
'''
OpenAI API to access gpt-3.5-turbo
'''
def getChatGPT(txt,cat=0,force=False):
    try:
        resp = openai.ChatCompletion.create(
          model="gpt-3.5-turbo",
          messages=[
                {"role": "user", "content": txt},
            ]
        )
        return resp['choices'][0]['message']['content']
    
    except:
        print('GPT ERROR')
        # try again
        time.sleep(5)
        return getChatGPT(txt,cat)


In [42]:
'''
Helper functions
'''

'''
Convert textual response to list
'''
def response2list(t):
    temp = t.split("\n")
    lst = []
    for i in temp:
        if i != '' and ' ' in i:
            c = i.index(' ')
            add = i[c+1:].replace('- ','')
            lst.append(add)
    return lst

'''
Convert two-dimensional textual response to list
'''
def response2dim2list(t):
    #t = t.replace('- ','')
    temp = t.split("\n")
    lst = {}
    for i in temp:
        if i != '' and ' ' in i:
            c = i.index(':')
            add = i[c+1:].strip().split(',')
            if add[0:1]=='-': add = add[1:]
            lst[i[0:c].strip()]=(add)
    return lst

'''
Reverse the list from key to value
'''
def voc_reverse(voc_combine):
    voc_combine_rev = {}
    for i in voc_combine:
        for j in voc_combine[i]:
            #print(j)
            w = j.strip()
            if len(w) < 20:
                if not w in voc_combine_rev:
                    voc_combine_rev[w]=[i]
                else:
                    voc_combine_rev[w].append(i)
    return voc_combine_rev

'''
Return intersection of lists
'''
def intersection(lst1, lst2):
    return list(set(lst1) & set(lst2))
    
'''
Select vocab that fulfills constraints
''' 
def selectVocab():
    global voc_combine_rev
    # identify a
    voc_combine_rev_wk = voc_combine_rev.copy()
    candidate_a = {}
    for i in voc_combine_rev:
        if len(voc_combine_rev[i]) >= 4:
            candidate_a[i] = voc_combine_rev_wk[i]

    # check whether noun exist at least 4 times, and remove otherwise
    for i in voc_combine:
        cnter = 0
        for j in candidate_a:
            if i in voc_combine_rev_wk[j]:
                cnter += 1
        if cnter < 2:
            for j in candidate_a:
                if i in candidate_a[j]:
                    candidate_a[j].remove(i)
                
    # remove finally if exists less than 4 items
    candidate_a_new = {}
    for i in candidate_a:
        if len(candidate_a[i]) >= 2:
            candidate_a_new[i] = candidate_a[i]
    candidate_a = candidate_a_new
    return candidate_a

'''
Select candidate of interest
'''
def select2er():
    global voc_combine_rev
    candicate_intersect = []
    for i in voc_combine_rev:
        for j in voc_combine_rev:
            if j!=i:
                intersect = intersection(voc_combine_rev[i],voc_combine_rev[j]) 
                if len(intersect)>= 2:
                    random.shuffle(intersect)
                    candicate_intersect.append([i,j,intersect])
                    
    return candicate_intersect

'''
Select random item
''' 
def selectrandom():
    global candicate_intersect,candicate_intersect_filtered,candidate_D_a,candidate_D_d,maxrand
    candidate_D_d = random.choice(candicate_intersect)
    candidate_D_d = [candidate_D_d[0],candidate_D_d[1],candidate_D_d[2]]

    # filter by D_d nouns
    candicate_intersect_filtered = []
    for i in candicate_intersect:
        if candidate_D_d[0] in [i[0],i[1]] or candidate_D_d[1] in [i[0],i[1]]:
            isin = False
            for j in candidate_D_d[2]:
                if j in i[2]:
                    isin = True
            if not isin:
                candicate_intersect_filtered.append(i)
    try:
        candidate_D_a = random.choice(candicate_intersect_filtered)
        while candidate_D_a == candidate_D_d:# or :
            candidate_D_a = random.choice(candicate_intersect_filtered)
        candidate_D_a = [candidate_D_a[0],candidate_D_a[1],candidate_D_a[2]]
        return candidate_D_a,candidate_D_d
    except:
        maxrand -= 1
        if maxrand > 0:
            return selectrandom()

'''
Select list [A,B,C,D,E,F,G,H,a,b,c,d] of possible candidates
'''
def makevoc(candidate_D_a,candidate_D_d):
    global voc_combine_rev, voc_noun
    if candidate_D_a[0] == candidate_D_d[0]:
        shared_verb = candidate_D_a[0]
        b = candidate_D_d[1]
        c = candidate_D_a[1]
    elif candidate_D_a[1] == candidate_D_d[1]:
        shared_verb = candidate_D_a[1]
        b = candidate_D_d[0]
        c = candidate_D_a[0]
    elif candidate_D_a[0] == candidate_D_d[1]:
        shared_verb = candidate_D_a[0]
        b = candidate_D_d[0]
        c = candidate_D_a[1]
    else:
        shared_verb = candidate_D_a[1]
        b = candidate_D_d[1]
        c = candidate_D_a[0]
    a = shared_verb

    A = candidate_D_d[2][0]
    B = candidate_D_d[2][1]
    C = candidate_D_a[2][0]
    D = candidate_D_a[2][1]

    # find d, which is not a,b,c, but compatible with D
    candidate_d = []
    for i in voc_combine_rev:
        if i != a and i != b and i != c:
            if D in voc_combine_rev[i]:
                candidate_d.append(i)
    random.shuffle(candidate_d)
    #print(candidate_d)
    d = candidate_d[0]
    
    # identify remaining nouns
    voc_noun_rest = voc_noun.copy()
    if A in voc_noun_rest: voc_noun_rest.remove(A)
    if B in voc_noun_rest: voc_noun_rest.remove(B)
    if C in voc_noun_rest: voc_noun_rest.remove(C)
    if D in voc_noun_rest: voc_noun_rest.remove(D)
    #print(voc_noun_rest)
    random.shuffle(voc_noun_rest)
    E = voc_noun_rest[0]
    F = voc_noun_rest[1]
    G = voc_noun_rest[2]
    H = voc_noun_rest[3]
    return [A,B,C,D,E,F,G,H,a,b,c,d]

'''
Store course content in course/
'''
def storecourse(data,vocab):
    try:
        coursefile = "course/"+vocab+".json"
        nr = 1
        while exists(coursefile):
            coursefile = "course/"+vocab+'#'+str(nr)+".json"
            nr += 1

        with open(coursefile, "w") as outfile:
            outfile.write(json.dumps(data))

        return True
    except:
        print('Storage ERROR')
        return False

## Generate Learning Content of 200 Topics (Random Surfer)

In [None]:
vocab = '[ADD KEYWORD]'
number_of_microlearning_contents = 200
debug = True
random_voc = [vocab]

for i in range(number_of_microlearning_contents):
    try:
        if debug: print(vocab)
        voc_sent = {}
        voc_verb = response2list(getChatGPT("Erstelle eine Liste von 20 Verben zum Thema \""+vocab+"\". Sprachlevel A1",1))
        voc_noun = response2list(getChatGPT("Erstelle eine Liste von 20 Substantiven zum Thema \""+vocab+"\". Sprachlevel A1",2))

        voc_combine = (getChatGPT("Kombiniere Wörter \""+','.join(voc_noun)+"\" mit \""+','.join(voc_verb)+"\""+
                                              ". Die Liste soll nur das Substantiv und mindestens 4 Verben in einer Zeile enthalten. Verwende keine Nummern und keine Aufzählung.", 3))
        voc_combine = response2dim2list(voc_combine)
        if debug:print('Vocab downloaded')
            
        voc_combine_rev = voc_reverse(voc_combine)

        # 1st constraint: [ABCD]+[a]
        candidate_a = selectVocab()
        if debug:print('constraint 1 erfüllt: ', len(candidate_a)>0)
        if debug:print(candidate_a)

        # 2nd constraint: [AC]+[a], [BD]+[a], [AB]+[b], [CD]+[c]
        candicate_intersect = select2er()
        #print('candicate_intersect',candicate_intersect)
        maxrand = 10
        candidate_D_a, candidate_D_d = selectrandom()

        # get vocab
        A,B,C,D,E,F,G,H,a,b,c,d = makevoc(candidate_D_a,candidate_D_d)
        random_voc.extend([A,B,C,D,E,F,G,H])

        if debug:print('A,B,C,D,E,F,G,H,a,b,c,d ready')
        
        # create sentences of 2 words
        Aa = (getChatGPT("Erstelle einen Satz, der die Wörter \""+','.join([A,a])+"\ enthält. Der Satz darf maximal 6 Wörter enthalten. Verwende bei Substantiven einen Artikel. Sprachlevel A1",6)).strip()
        Ba = (getChatGPT("Erstelle einen Satz, der die Wörter \""+','.join([B,a])+"\ enthält. Der Satz darf maximal 6 Wörter enthalten. Verwende bei Substantiven einen Artikel. Sprachlevel A1",6)).strip()
        Ca = (getChatGPT("Erstelle einen Satz, der die Wörter \""+','.join([C,a])+"\ enthält. Der Satz darf maximal 6 Wörter enthalten. Verwende bei Substantiven einen Artikel. Sprachlevel A1",6)).strip()
        Da = (getChatGPT("Erstelle einen Satz, der die Wörter \""+','.join([D,a])+"\ enthält. Der Satz darf maximal 6 Wörter enthalten. Verwende bei Substantiven einen Artikel. Sprachlevel A1",6)).strip()
        if debug:print('4 2er sentences created')
            
        # create sentences of 3 words
        ABb =(getChatGPT("Erstelle einen Satz, der die Wörter \""+','.join([A,B,b])+"\" enthält. Der Satz darf maximal 12 Wörter enthalten. Sprachlevel A1. Jeder Satz muss \""+','.join([A,B,b])+"\ enthalten. Sprachlevel A1",4)).strip()
        CDc =(getChatGPT("Erstelle einen Satz, der die Wörter \""+','.join([C,D,c])+"\" enthält. Der Satz darf maximal 12 Wörter enthalten. Sprachlevel A1. Jeder Satz muss \""+','.join([C,D,c])+"\ enthalten. Sprachlevel A1",4)).strip()
        ACa =(getChatGPT("Erstelle einen Satz, der die Wörter \""+','.join([A,C,a])+"\" enthält. Der Satz darf maximal 12 Wörter enthalten. Sprachlevel A1. Jeder Satz muss \""+','.join([A,C,a])+"\ enthalten. Sprachlevel A1",4)).strip()
        BDa =(getChatGPT("Erstelle einen Satz, der die Wörter \""+','.join([B,D,a])+"\" enthält. Der Satz darf maximal 12 Wörter enthalten. Sprachlevel A1. Jeder Satz muss \""+','.join([B,D,a])+"\ enthalten. Sprachlevel A1",4)).strip()
        if debug:print('4 3er sentences created')

        voc_txt = (getChatGPT("Erstelle einen zusammenhängenden Text aus 10 Sätzen. Nutze folgende Wörter: \""+','.join([A,B,C,D,E,F,G,H,a,b,c,d])+"\". Jeder Satz darf maximal 7 Wörter enthalten. Sprachlevel A1. Jeder Satz in einer Zeile. Verwende keine Zahlen.",5)).strip().split("\n")
        if debug:print('Text created')

        # store all data
        data = {
            'A':A,'B':B,'C':C,'D':D,'E':E,'F':F,'G':G,'H':H,
            'a':a,'b':b,'c':c,'d':d,
            'Aa':Aa,'Ba':Ba,'Ca':Ca,'Da':Da,
            'ABb':ABb,'CDc':CDc,'ACa':ACa,'BDa':BDa,
            'txt':voc_txt
        }

        if storecourse(data,vocab) and debug: print('file stored')

        if debug: print()

        # select random vocab for next round (random surfer model)
        vocab = random.choice(voc_noun) # or [A,B,C,D,E,F,G,H]
        
    except Exception as e: 
        print(e)
        print('Restart')
        vocab = random.choice(random_voc)

## Generate Course Images

In [None]:
import deepl 
auth_key = '[DEEPL KEY]'
translator = deepl.Translator(auth_key) 

'''
translates texts from German to English to create queries using DEEPL
'''
def translatetoenglish(txt,force=False):
    if txt == '': return ''
    transfile = 'translate/'+str(hashlib.sha256(txt.encode('utf-8')).hexdigest())+'.json'
    if exists(transfile) and not force:
        json_data = open(transfile)
        data = json.load(json_data)
        json_data.close()
        return data['to']
    
    global translator
    target_language = 'EN-US'
    source_language = 'DE'
    result = translator.translate_text(txt, target_lang=target_language, source_lang=source_language) 
    
    with open(transfile, 'w') as f:
        json.dump({'from':txt,'to':result.text}, f)
        
    return result.text

In [317]:
'''
OpenAI API to generate DALL·E2 images
'''
def getAIimage(query, forcenew=False):
    query = query.lower()
    imgfile = 'image/'+query+'.jpg'
    try:
        response = openai.Image.create(
          prompt=query,
          n=1,
          size="256x256"
        )
        image_url = response['data'][0]['url']
        storeimgdirectAI(query,image_url)
        return image_url
    except:
        print('ERROR '+query)
        return None
        
    return query

In [None]:
'''
Generate Images of 200 Courses
'''
with open('res.txt', 'r') as f:
    courses_read = f.read()
  
cnt = 200
courses = courses_read.split("\n")
courses.pop(0)

for i in courses:
    x = i.split('|')
    
    # all validated as correct and appropriate
    if len(x)>=6 and int(x[1]) == 1 and int(x[2]) == 1 and int(x[3]) == 1 and int(x[4]) == 1 and int(x[5]) == 1 and int(x[6]) == 1 and not exists('course_img_ai/'+x[0]):
        print(x[0])
        if cnt > 0:
            dat = x[0]
            with open('course/'+dat, 'r') as f: course = json.load(f)

            img = {}
            # get word images
            for el in ['A','B','C','D','E','F','G','H','a','b','c','d']:
                el_en = translatetoenglish(course[el])
                if getAIimage(el_en): #False and 
                    img[el] = el_en.lower()    

            for el in ['Aa','Ba','Ca','Da','ABb','CDc','ACa','BDa']:
                if course[el] != False:
                    
                    sent = translatetoenglish(course[el])
                    if getAIimage(sent):
                        img[el] = sent.lower().replace('\'','').replace('?','')

            txt_img = []
            for sentence in course['txt']:
                sent = translatetoenglish(sentence)
                if getAIimage(sent):
                    txt_img.append(sent.lower().replace('\'','').replace('?',''))
                    
                else:
                    txt_img.append(None)
            img['txt'] = txt_img       
            course['img']=img
            
            # = 30 images per course
            with open('image/'+dat, "w") as outfile:
                outfile.write(json.dumps(course))

            cnt -= 1