In [15]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import smtplib
import os

from sklearn.cluster import KMeans
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
from email.mime.multipart import MIMEMultipart
from keys import keys

sns.set_theme(style="darkgrid")

In [8]:
toolkit = pd.read_csv("CikguHub_Marketing Planner_2023 - PBD Toolkit Responses.csv")
registraton = pd.read_csv("CikguHub_Marketing Planner_2023 - Registration Form.csv")

In [9]:
# slim down the column names so its readable
tool_users = pd.DataFrame({})
tool_users['teaching_level'] = toolkit[['1. Anda mengajar di sekolah jenis? / Which school are you teaching in?']]
tool_users['experience'] = toolkit[['2. Berapakah tahun anda menjadi pendidik? / How long have you been an educator?']]
tool_users['role'] = toolkit[['3. Apakah jawatan anda di sekolah? / What is your role in school?']]
tool_users['wanted_skills'] = toolkit[['4. Apakah kemahiran yang anda ingin bangunkan? / What are the skills you wish to develop?']]
reg_users = pd.DataFrame({})
reg_users['teaching_level'] = registraton[['1. Anda mengajar di sekolah jenis? / Which school are you teaching in?']]
reg_users['experience'] = registraton[['2. Berapakah tahun anda menjadi pendidik? / How long have you been an educator?']]
reg_users['role'] = registraton[['3. Apakah jawatan anda di sekolah? / What is your role in school?']]
reg_users['wanted_skills'] = registraton[['4. Apakah kemahiran yang anda ingin bangunkan? / What are the skills you wish to develop?']]

# ok let's encode the responses now so we can read them easier
skills_code = {'KemahiranMengajar/TeachingSkills' : 'T', 
              'Bimbingan&Pementoran/Coaching&Mentoring': 'C',
              'Kepimpinan/Leadership': 'L', 
              'KemahiranDigital/DigitalSkills(contoh:aplikasiMicrosoftWord/Excel/PowerPointdanGoogleDoc/Sheet/Slide)': 'D',
              'KemahiranMultimedia/MultimediaSkills(contoh:pembangunanvideo)': 'M'}

exp_code = {'Kurang daripada 1 tahun / Less than 1 year': 0,
            '1 hingga 5 tahun / 1 to 5 years' : 1,
            '6 hingga 10 tahun / 6 to 10 years': 2,
           'Lebih daripada 10 tahun / More than 10 years': 3}

role_code = {'GuruAkademikBiasa/AcademicTeacher': 'T', 
             'KetuaPanitia/PanelHead': 'P', 
             'Officer': 'O'}

level_code = {'Other:': -1,
              'Saya bukan seorang cikgu / I am not a teacher': 0,
              'Sekolah Kebangsaan / National Primary School': 1,
              'Sekolah Menengah Kebangsaan / National Secondary School': 2}

def code_responses(df):

    coded_exp = []
    coded_skills = []
    coded_roles = []
    coded_level = []

    for i, col in df.iterrows(): # read through every row in df

        # grab column data for the row
        lvl = col['teaching_level']
        exp = col['experience']
        skills = col['wanted_skills'].split(',')
        roles = str(col['role']).split(',')
        
        coded_level.append(level_code[lvl]) # pass through the school level they teach

        if exp in exp_code.keys(): # this if/else handles other/nan input
            coded_exp.append(exp_code[exp])
        else:
            coded_exp.append(exp)

        for j in range(len(skills)): # look at each skill selected
            s = skills[j].replace(' ', '')
            if s in skills_code.keys():
                skills[j] = s.replace(s, skills_code[s])
            else:
                skills[j] = '*' # for now mark free response with star
        coded_skills.append(skills)

        for j in range(len(roles)): # look at each role selected
            r = roles[j].replace(' ', '')
            if r in role_code.keys():
                roles[j] = r.replace(r, role_code[r])
            else:
                roles[j] = '*' # for now mark free response with star
        coded_roles.append(roles)
        
    return coded_exp, coded_skills, coded_roles, coded_level


In [10]:

coded_exp1, coded_skills1, coded_roles1, coded_level1 = code_responses(tool_users)
coded_exp2, coded_skills2, coded_roles2, coded_level2 = code_responses(reg_users)

In [11]:
user_model = pd.DataFrame({})
user_model['experience'] = coded_exp1 + coded_exp2
user_model['wanted_skills'] = coded_skills1 + coded_skills2
user_model['roles'] = coded_roles1 + coded_roles2
user_model['teaching_level'] = coded_level1 + coded_level2

# first step is to one hot encode data since several columns are categorical
one_hot_um = pd.DataFrame({})
one_hot_um['experience'] = user_model['experience']
one_hot_um['teaching_level'] = user_model['teaching_level']
one_hot_um['T'] = [1 if 'T' in i else 0 for i in user_model['wanted_skills']]
one_hot_um['C'] = [1 if 'C' in i else 0 for i in user_model['wanted_skills']]
one_hot_um['L'] = [1 if 'L' in i else 0 for i in user_model['wanted_skills']]
one_hot_um['D'] = [1 if 'D' in i else 0 for i in user_model['wanted_skills']]
one_hot_um['M'] = [1 if 'M' in i else 0 for i in user_model['wanted_skills']]
one_hot_um['*'] = [1 if '*' in i else 0 for i in user_model['wanted_skills']]
one_hot_um['roles_T'] = [1 if 'T' in i else 0 for i in user_model['roles']]
one_hot_um['roles_P'] = [1 if 'P' in i else 0 for i in user_model['roles']]
one_hot_um['roles_O'] = [1 if 'O' in i else 0 for i in user_model['roles']]
one_hot_um['roles_*'] = [1 if '*' in i else 0 for i in user_model['roles']]

# user_interests = LearnerModel.skill_interests
# might make more sense to store user interests as a list or dict than a text field
# but this function will transform text from response form into a list of letters
def code_user_interests(user_interests):

    skills_code = {'KemahiranMengajar/TeachingSkills' : 'T', 
                  'Bimbingan&Pementoran/Coaching&Mentoring': 'C',
                  'Kepimpinan/Leadership': 'L', 
                  'KemahiranDigital/DigitalSkills(contoh:aplikasiMicrosoftWord/Excel/PowerPointdanGoogleDoc/Sheet/Slide)': 'D',
                  'KemahiranMultimedia/MultimediaSkills(contoh:pembangunanvideo)': 'M'}

    skills = user_interests.split(',')
    coded_skills = []
    for j in range(len(skills)): # look at each skill selected
        s = skills[j].replace(' ', '')
        if s in skills_code.keys():
            skills[j] = s.replace(s, skills_code[s])
        else:
            skills[j] = '*' # for now mark free response with star
        coded_skills.append(skills)
    return coded_skills

def create_user_df(max_track_num, ranking, user_open_tracks, user_interests, user_engagement, cluster_engagement):
    
    df = pd.DataFrame({'Open Tracks': user_open_tracks, 'Max': max_track_num, 'Ranking': ranking, 
                       'Cluster': cluster_engagement, 'User Interest': user_interests, 
                       'User Engagement': user_engagement})
    df['Avaliable'] = [1 if row['Open Tracks'] < row['Max'] else 0 for i, row in df.iterrows()]
    
    return df

def recommend_videos(df):
    
    feature_order = ['Open Tracks', 'User Engagement', 'Cluster', 'Ranking'] # order in which we consider features
    
    avaliable_tracks = df[df['Avaliable'] ==1]
    user_chosen_recs = avaliable_tracks[df['User Interest'] ==1]
    sorted_recs = list(user_chosen_recs.sort_values(feature_order, ascending=False).index)
    
    if len(sorted_recs) == 2:
        return sorted_recs
    elif len(user_chosen_recs) > 2:
        return sorted_recs[0:2]
    else:
        n = 2 - len(sorted_recs)
        other_recs = avaliable_tracks[df['User Interest'] == 0]
        other_recs = list(other_recs.sort_values(feature_order, ascending=False).index)
        return sorted_recs + other_recs[0:n]
    
# sample system data
max_track_num = {'T':5, 'C':2, 'L':3, 'D':3, 'M':2} # number of tracks for each interest cat.
ranking = one_hot_um.mean()[2:7] # can replace this ranking with any ranking we want

# sample user data
user_open_tracks = {'T':2, 'C':1, 'L':2, 'D':0, 'M':0}
user_interests = one_hot_um.iloc[1][2:7] # these are input by users and can be changed at any time
user_engagement = {'T':0.4, 'C':0.7, 'L':0.2, 'D':0.6, 'M':0.4} # we need to calculate this metric
cluster_engagement = {'T':0.8, 'C':0.2, 'L':0.4, 'D':0.95, 'M':0.6} # we need to calculate this metric

df = create_user_df(max_track_num, ranking, user_open_tracks, user_interests, user_engagement, cluster_engagement)
df

Unnamed: 0,Open Tracks,Max,Ranking,Cluster,User Interest,User Engagement,Avaliable
T,2,5,0.846154,0.8,0,0.4,1
C,1,2,0.538462,0.2,0,0.7,1
L,2,3,0.576923,0.4,0,0.2,1
D,0,3,0.807692,0.95,1,0.6,1
M,0,2,0.807692,0.6,0,0.4,1


In [12]:
recommend_videos(df)

['D', 'T']

### Email System

In [23]:
def send_email(subject, body, sender, recipients, password):
    
    msg = MIMEMultipart()
    msg['Subject'] = subject
    msg['From'] = sender
    msg['To'] = ', '.join(recipients)

    msg.attach(MIMEText(body))

    # Sign off with Logo
    attachment = 'cikgu_logo.png'
    msgText = MIMEText('<br/><br/>Best regards,<br/><b>%s</b><br/><img src="cid:%s"/ width="200"><br/>' % ("Christopher Kok", attachment), 'html')   
    msg.attach(msgText)   

    with open(attachment, 'rb') as fp:
        img = MIMEImage(fp.read())
    img.add_header('Content-ID', '<{}>'.format(attachment))
    msg.attach(img)

    smtp_server = smtplib.SMTP_SSL('smtp.gmail.com', 465)
    smtp_server.login(sender, password)
    smtp_server.sendmail(sender, recipients, msg.as_string())
    smtp_server.quit()

subject = "[Example] CikguHub: Here's what you should watch next!"
body = "Dear X,\n\nThank you for being one of our first supporters! Part of being a CikguHub member is getting personalized video recommendations based on your preferences and watch history. Here is the next video we think you should check out!\n\nEpisode 1: Introduction to Collaborative Learning - https://www.cikguhub.my/courses/cikguhub/introduction_to_collaborative_learning/?cl=1"
sender = "cikguhub@gmail.com"
recipients = ["pherkok@gmail.com", "janice@myedvolution.com", "melissa@myedvolution.com", "jqs@umich.edu", "chriskok@umich.edu"]
password = keys['google_app_pass']

send_email(subject, body, sender, recipients, password)