# Programmatically Generating Flex Weekly Check-In Message - Final

- Goal is to be able to generate (mostly) pre-filled messages to students based 
upon their milestone progress. 

>- Note: this is a cleaned-up version of [the original WIP notebook](https://colab.research.google.com/drive/19I8iK7f7AuzEfmL_5JdNbdvfGgZyGVBx?usp=sharing). 


## Imports and Functions

In [1]:
# !pip install -U fsds
# from fsds.imports import *

import pandas as pd
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt

import datetime as dt
import os, sys, glob

# from google.colab import files

def get_student_milestones(df, student_name, as_dates=False,verbose=False,
                           drop_columns=['Instructor', 'Learn UUID',
                                         'First Name','Last Name','Current Pacing In Weeks']):
    
    ## Get student data
    if df['Full Name'].isin([student_name]).any():
        student_full = df.loc[ df["Full Name"] == student_name].copy()
    else:
        if verbose: print(f'Exact match not found for "{student_name}"", using a partial match.')
        student_full = df.loc[ df["Full Name"].str.contains(student_name)].copy()


    ## Set the correct index based on as_dates
    if as_dates:
        milestones = student_full.set_index('Due Date').sort_index()
    else:
        milestones = student_full.set_index(['Full Name','Ordinality']).sort_index()
        
  
    # Sorting through all milestones to generate milestone report
    drop_cols = [c for c in milestones.columns if "Last" in c]
    drop_cols.extend(drop_columns)
    
    return milestones.drop(columns=drop_cols)
    



def get_milestone_report(df,student_name,
                         report_cols = ['Ordinality','Milestone Name',
                                        'Due Date','Completed Date',
                                        'Milestone on Timeness'],verbose=True):
    
    ## Get single row for summary/personal info
    try:
        student = df.loc[ df["Full Name"] == student_name].iloc[0].copy()
    except:
        if verbose: print(f'[!] Exact match not found for "{student_name}", using a partial match instead.')
        student = df.loc[ df["Full Name"].str.contains(student_name)].iloc[0].copy()

    ## Get the milestones as dates
    mstones_ts = get_student_milestones(df,student_name=student_name,
                                        as_dates=True)
    mstones_ts.rename({'Ordinality':'Milestone #'},inplace=True,axis=1)
    

    ## Get last and next
    last_mstone= mstones_ts.loc[mstones_ts['Completed Date'].notnull()].tail(1).copy()
    next_mstone= mstones_ts.loc[mstones_ts['Completed Date'].isnull()].head(1).copy()

    ## Use today to slice out milestones
    today = dt.date.today()
    before_today = mstones_ts.loc[:today].copy()

    ## get past-due
    past_due = before_today.loc[before_today['Completed Date'].isna()].copy()#, 
                                # ['Ordinality','Milestone Name','Milestone on Timeness']].copy()
    ## Generate report df
    report_df = pd.concat({'last':last_mstone.reset_index().set_index('Milestone #'),
                           'next':next_mstone.reset_index().set_index('Milestone #'),
                           'past_due':past_due.reset_index().set_index('Milestone #')})
    
    report_df.drop(columns=['Full Name'],inplace=True)


    # report_df.set_index('Milestone #',inplace=True)
    # report = {'student':student_name,
    #         'milestones':mstones_ts,
    #           'before_today':before_today.reset_index(),
    #           'last':last_mstone.reset_index(),
    #           'next':next_mstone.reset_index(),
    #           'past_due':past_due.reset_index()}
        
    results = {'Full Name':student['Full Name'],
               'First Name':student['First Name'],
               'pace':student["Current Pacing In Weeks"],
               'instructor':student['Instructor'],
               'id':student['Learn UUID'],
               'milestones':mstones_ts,
            # 'milestones':mstones_ts,
            'report':report_df }
                                
    return results





def generate_detailed_message(df,student_name, unique_q = "I was wondering what topics are you planning on tackling this week?",
                        print_ =False,error = False):
    """Generates a weekly check-in message when applied across rows.
    
    Example Usage:
    >> df.apply(generate_message,axis=1)"""
    
    report = get_milestone_report(df,student_name)


    # student_full = df.loc[ df["Full Name"] == student_name].copy()
    # student = student_full.iloc[0].copy()

    # ## Calculating last completed milestones
    # last_df = df.set_index('Completed Date').sort_index()
    # last_df = last_df.groupby('Full Name').last()

    # student_last_mstone = last_df.loc[ student['Full Name']]

    ## SAVING MESSAGE COMPONENTS
    import datetime as dt
    today_date = dt.datetime.today()

    ## get day name
    day_of_week = today_date.strftime("%A")

    ## Get 1=Monday version of weekday
    if today_date.isoweekday() <= 2:
        greeting_q = "How was your weekend?"
    else:
        greeting_q = "How is your week going so far?"


    ## Format unique q
    unique_q = unique_q[0].lower() + unique_q[1:]
    if unique_q[:2] == 'i ':
        unique_q = unique_q.capitalize()

    report_reset = report['report'].reset_index(level=1)
    
    try:

        message = \
f"""Hey {report['First Name']},
:sunrise: Happy {day_of_week}!:sun-happy:

I just wanted to check in with you and see how everything is going. {greeting_q}

- Also, {unique_q}

- I also wanted to remind you of the milestones (base.flatironschool.com/milestones) for your {report['pace']}-week pace:
    - Your Last Milestone completed was:
        - {report_reset.loc['last','Milestone Name']}, which you completed {report_reset.loc['last','Milestone on Timeness']}.

    - Your next milestone is to complete:
        {report_reset.loc['next','Milestone Name']} by {report_reset.loc['next','Due Date'].strftime('%m-%d-%Y')}

    - Right now you are <INSTRUCTOR WRITTEN SUMMARY> your desired {report['pace']}-week pace.


- Finally, I just wanted to check if there is anything that you need or that I can assist you with?

Thanks!
"""
    except Exception as e:
        if error:
            msg = f"Error when processing {student_name}:\n"+e
            raise Exception(e)
        message = \
f"""Hey {report['First Name']},
:sunrise: Happy {day_of_week}!:sun-happy:

I just wanted to check in with you and see how everything is going. {greeting_q}

- Also, {unique_q}

- I also wanted to remind you of the milestones (base.flatironschool.com/milestones) for your {report['pace']}-week pace:

    - Your next milestone is to complete:
        {report_reset.loc['next','Milestone Name']} by {report_reset.loc['next','Due Date'].strftime('%m-%d-%Y')}

    - Right now you are <INSTRUCTOR WRITTEN SUMMARY> your desired {report['pace']}-week pace.


- Finally, I just wanted to check if there is anything that you need or that I can assist you with?

Thanks!
""" 
    
    if print:

        print('---'*20)
        print(report['Full Name'])#['First Name'] + " " + student["Last Name"])

        id = report["id"]
        mstone_url = f"https://guide.flatironschool.com/students/{id}/milestones"
        notes_url = f"https://guide.flatironschool.com/students/{id}/notes"

        print(notes_url)
        print()
        print(message)
        display(report['report'])
        # print('\n'*2)

    else:
        return message


## Download Looker Data and Load into Colab

- Go to the [Student Milestones Look](https://flatiron.looker.com/looks/1071?toggle=fil,pik)
    - change instructor filter to your name.
    - Click `Run`
    - Click on Gear Icon -> Download
    - Save as CSV with the folloowing options:
        - [ ] **As Displayed in Table**
        - [ ] Formatted
        - [ ] All results
        - Click Download.

    - Upload the file below.

In [2]:
fpath = "/Users/jamesirving/Downloads/"
files = sorted(glob.glob(fpath+'Student Milestones*'))
files

# df = 
# ## Upload milestones .csv
# uploaded = files.upload()

# for fn in uploaded.keys():
#   print('User uploaded file "{name}" with length {length} bytes'.format(
#       name=fn, length=len(uploaded[fn])))
  

# ## load csv as df
# if len(uploaded) == 1:
#     df = pd.read_csv(list(uploaded.keys())[0])
#     # display(df.head(2))
# else:
#     raise Exception('Uploading multiple csvs is not currently built into workflow.')


['/Users/jamesirving/Downloads/Student Milestones 2021-10-25T1130.csv',
 '/Users/jamesirving/Downloads/Student Milestones 2021-10-25T1135.csv']

In [3]:
df = pd.read_csv(files[-1])
print(f'[i] Loaded file: {files[-1]}')

## Clean up the column names 
base_header = "Base - Milestones and Paces"
df.columns = [c.replace('[Students] ','').replace(base_header,'').strip() for c in df.columns]

## convert to datetime
date_cols = ['Due Date','Completed Date',
             'Last Milestone Completed Date',
            'Most Recent Cohort Start Date']

for col in date_cols:
    try:
        df[col] = pd.to_datetime(df[col])
    except:
        print(f"[!] {col} not found")
df

[i] Loaded file: /Users/jamesirving/Downloads/Student Milestones 2021-10-25T1135.csv


Unnamed: 0,Instructor,Learn UUID,Full Name,First Name,Last Name,Ordinality,Milestone Name,Due Date,Completed Date,Milestone on Timeness,Current Pacing In Weeks,Most Recent Cohort Start Date,Last Milestone Completed,Last Milestone Completed Date,Timeliness of Last Completed Milestone
0,James Irving,f7734eb5-4486-4402-b600-7a567504a56d,Alec Hing,Alec,Hing,1,Topic 3: Data Serialization Formats - Cumulati...,2021-09-07,2021-09-13,6 days behind schedule,40,2021-08-30,Topic 3: Data Serialization Formats - Cumulati...,2021-09-13 23:43:48,6 days behind schedule
1,James Irving,f7734eb5-4486-4402-b600-7a567504a56d,Alec Hing,Alec,Hing,2,Topic 4: EDA with Pandas - Cumulative Lab,2021-09-17,2021-09-23,6 days behind schedule,40,2021-08-30,Topic 4: EDA with Pandas - Cumulative Lab,2021-09-23 22:34:36,6 days behind schedule
2,James Irving,f7734eb5-4486-4402-b600-7a567504a56d,Alec Hing,Alec,Hing,3,Topic 5: Pandas Data Cleaning - Cumulative Lab,2021-09-21,2021-10-04,13 days behind schedule,40,2021-08-30,Topic 5: Pandas Data Cleaning - Cumulative Lab,2021-10-04 12:54:22,13 days behind schedule
3,James Irving,f7734eb5-4486-4402-b600-7a567504a56d,Alec Hing,Alec,Hing,4,Topic 7: SQL - Cumulative Lab,2021-09-29,2021-10-11,12 days behind schedule,40,2021-08-30,Topic 7: SQL - Cumulative Lab,2021-10-11 23:02:44,12 days behind schedule
4,James Irving,f7734eb5-4486-4402-b600-7a567504a56d,Alec Hing,Alec,Hing,5,Topic 9: APIs - Cumulative Lab,2021-10-03,2021-10-13,10 days behind schedule,40,2021-08-30,Topic 9: APIs - Cumulative Lab,2021-10-13 23:08:35,10 days behind schedule
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
723,James Irving,175ad881-7589-4376-b7d7-600757d9b5f3,Simran Kaur,Simran,Kaur,24,Phase 4 Blog Post,2022-09-12,NaT,due in 322 days,60,2021-10-11,,NaT,
724,James Irving,175ad881-7589-4376-b7d7-600757d9b5f3,Simran Kaur,Simran,Kaur,25,Capstone Topic Decision and Data Gathering Dea...,2022-10-06,NaT,due in 346 days,60,2021-10-11,,NaT,
725,James Irving,175ad881-7589-4376-b7d7-600757d9b5f3,Simran Kaur,Simran,Kaur,26,Capstone MVP Deadline,2022-10-27,NaT,due in 367 days,60,2021-10-11,,NaT,
726,James Irving,175ad881-7589-4376-b7d7-600757d9b5f3,Simran Kaur,Simran,Kaur,27,Capstone Project Review,2022-11-29,NaT,due in 400 days,60,2021-10-11,,NaT,


# Selecting Students

In [4]:
## get students by start date
student_starts = df.groupby('Full Name').last()['Most Recent Cohort Start Date']
student_starts.sort_values()

Full Name
Alexander Casey         2021-07-19
Austin Towery           2021-07-19
Colm yeh                2021-07-19
Daniel Ross-Leutwyler   2021-07-19
Jim Petoskey            2021-08-09
Robert Cauvy            2021-08-09
Morgan Didjurgis        2021-08-09
Kregg Jackson           2021-08-09
Donica Miller           2021-08-09
Ferdinand Beaman        2021-08-09
Freddy Abrahamson       2021-08-09
James Pheby             2021-08-09
Samuel Oliver           2021-08-09
Alec Hing               2021-08-30
Claudia Tsai            2021-08-30
Andrew Martinez         2021-08-30
Alex Liang              2021-08-30
Louis Casanave          2021-09-20
Matthew Noonan          2021-09-20
Nick Winokur            2021-09-20
Noor Syed               2021-09-20
Kevin Culver            2021-09-20
Maliha Momtaj           2021-10-11
Monique Hercules        2021-10-11
Ryan Sajac              2021-10-11
Simran Kaur             2021-10-11
Name: Most Recent Cohort Start Date, dtype: datetime64[ns]

In [1]:
# all start dates present
start_dates_unique = student_starts.dt.strftime('%m-%d-%Y').unique()
start_dates_unique

NameError: name 'student_starts' is not defined

### Filter by Start

In [6]:
## Use this cell to determine the subset of students to process
# START_DATES_TO_USE = ['08-09-2021']
START_DATES_TO_USE = start_dates_unique

# students_to_msg = list(df['Full Name'].unique())
students_to_msg = student_starts[student_starts.isin(START_DATES_TO_USE)].index
students_to_msg

Index(['Donica Miller', 'Ferdinand Beaman', 'Freddy Abrahamson', 'James Pheby',
       'Jim Petoskey', 'Kregg Jackson', 'Morgan Didjurgis', 'Robert Cauvy',
       'Samuel Oliver'],
      dtype='object', name='Full Name')

### Filter by Pace

In [7]:
# ## Student Names in dataset
# students_included = list(df['Full Name'].unique())

# ## Save list of student_paces to use to select students to 
# student_paces = df.groupby('Full Name').last()['Current Pacing In Weeks']
# # student_paces
# student_paces#.head(5)

In [8]:
# ## Use this cell to determine the subset of students to process
# PACES_TO_USE = [20,40,60]

# # students_to_msg = list(df['Full Name'].unique())
# students_to_msg = student_paces[student_paces.isin(PACES_TO_USE)].index
# students_to_msg

## Producing Reports/Messages for Selected Students.

### Interactive Dropdown

In [9]:
students_to_msg

Index(['Donica Miller', 'Ferdinand Beaman', 'Freddy Abrahamson', 'James Pheby',
       'Jim Petoskey', 'Kregg Jackson', 'Morgan Didjurgis', 'Robert Cauvy',
       'Samuel Oliver'],
      dtype='object', name='Full Name')

In [11]:
df_to_use = df.drop(columns="Most Recent Cohort Start Date")
df_to_use

Unnamed: 0,Instructor,Learn UUID,Full Name,First Name,Last Name,Ordinality,Milestone Name,Due Date,Completed Date,Milestone on Timeness,Current Pacing In Weeks,Last Milestone Completed,Last Milestone Completed Date,Timeliness of Last Completed Milestone
0,James Irving,f7734eb5-4486-4402-b600-7a567504a56d,Alec Hing,Alec,Hing,1,Topic 3: Data Serialization Formats - Cumulati...,2021-09-07,2021-09-13,6 days behind schedule,40,Topic 3: Data Serialization Formats - Cumulati...,2021-09-13 23:43:48,6 days behind schedule
1,James Irving,f7734eb5-4486-4402-b600-7a567504a56d,Alec Hing,Alec,Hing,2,Topic 4: EDA with Pandas - Cumulative Lab,2021-09-17,2021-09-23,6 days behind schedule,40,Topic 4: EDA with Pandas - Cumulative Lab,2021-09-23 22:34:36,6 days behind schedule
2,James Irving,f7734eb5-4486-4402-b600-7a567504a56d,Alec Hing,Alec,Hing,3,Topic 5: Pandas Data Cleaning - Cumulative Lab,2021-09-21,2021-10-04,13 days behind schedule,40,Topic 5: Pandas Data Cleaning - Cumulative Lab,2021-10-04 12:54:22,13 days behind schedule
3,James Irving,f7734eb5-4486-4402-b600-7a567504a56d,Alec Hing,Alec,Hing,4,Topic 7: SQL - Cumulative Lab,2021-09-29,2021-10-11,12 days behind schedule,40,Topic 7: SQL - Cumulative Lab,2021-10-11 23:02:44,12 days behind schedule
4,James Irving,f7734eb5-4486-4402-b600-7a567504a56d,Alec Hing,Alec,Hing,5,Topic 9: APIs - Cumulative Lab,2021-10-03,2021-10-13,10 days behind schedule,40,Topic 9: APIs - Cumulative Lab,2021-10-13 23:08:35,10 days behind schedule
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
723,James Irving,175ad881-7589-4376-b7d7-600757d9b5f3,Simran Kaur,Simran,Kaur,24,Phase 4 Blog Post,2022-09-12,NaT,due in 322 days,60,,NaT,
724,James Irving,175ad881-7589-4376-b7d7-600757d9b5f3,Simran Kaur,Simran,Kaur,25,Capstone Topic Decision and Data Gathering Dea...,2022-10-06,NaT,due in 346 days,60,,NaT,
725,James Irving,175ad881-7589-4376-b7d7-600757d9b5f3,Simran Kaur,Simran,Kaur,26,Capstone MVP Deadline,2022-10-27,NaT,due in 367 days,60,,NaT,
726,James Irving,175ad881-7589-4376-b7d7-600757d9b5f3,Simran Kaur,Simran,Kaur,27,Capstone Project Review,2022-11-29,NaT,due in 400 days,60,,NaT,


In [12]:
from ipywidgets import interact, interactive_output

@interact
def get_student_report_interactive(student = students_to_msg, just_milestones=True):

    if just_milestones:
            res = get_milestone_report(df_to_use, student)['milestones']
            res = res.set_index(['Full Name','Milestone #'])
            display(res.style.set_caption(student))
    else:
        generate_detailed_message(df_to_use,student,error=False)


interactive(children=(Dropdown(description='student', options=('Donica Miller', 'Ferdinand Beaman', 'Freddy Ab…

## Looping through all students.

- You can loop through your entire list of studnets, or create a subset of students.
    - e.g. All 20 week studnets

In [None]:
## Process all students
error = []
for i,student in enumerate(students_to_msg):
    generate_detailed_message(df,student,error=False)

    try:
        generate_detailed_message(df,student,error=True)

    except:
        print(f'\n\n[!] Problem with {student}\n\n')
        error.append(student)
        
    finally:
        print(f'\t student # {i+1} / {len(students_to_msg)}')
        print('\n'*2)

print("Students that error'd:")
[print(f'- {student}') for student in error];

# Extracting All Milestones for Every Student

In [None]:
## Use this cell to determine the subset of students to process
PACES_TO_USE = [20,40,60]

# students_to_msg = list(df['Full Name'].unique())
students_to_msg = student_paces[student_paces.isin(PACES_TO_USE)].index
students_to_msg

In [None]:
MILESTONES = {}
for student in students_to_msg:
    res = get_milestone_report(df, student)['milestones']
    MILESTONES[student] = res

    display(res.style.set_caption(student))

In [None]:
MILESTONES

# KNOWN ISSUES



## INCORRECT DUE DATES FOR STUDENTS THAT CHANGED PACE

- Donica Miller's milestones on Looker do not match her [milestones on guide](https://guide.flatironschool.com/students/c94227c8-d3f8-4b90-bc68-f71fabb48454/milestones)
- I think the column that doesn't match base/guide is the Completed Date column, but that the actual milestones completed is accurate. 

>- **After talking with Mtich Beeb, it seems to be something that is not actually stored in the system but that Mitch calculates.**
- See the  [Curriculum Team's Milestones Guide](https://docs.google.com/document/d/1LsKzSCKSPL7jcnzZG1nmN70FFe7Ro7U79XJV71udbdU/edit?usp=sharing) for where to find the raw milestone data. 
    - milestones.flatironschool.com

In [None]:
res = get_milestone_report(df, 'Donica')['milestones']
res

- Copied from Guide ojn 09/27/21

```
Cohort Start: 8/9/2021

Projected Completion: 5/30/2022

Cohort Completion: 11/9/2022


Data Science 40 Weeks
Topic 3: ⭐️ Data Serialization Formats - Cumulative Lab
Due: 8/13/2021

Topic 4: ⭐️ EDA with Pandas - Cumulative Lab
Due: 8/18/2021


Topic 5: ⭐️ Pandas Data Cleaning - Cumulative Lab
Due: 9/14/2021


Topic 7: ⭐️ SQL - Cumulative Lab
Due: 9/22/2021
```

In [None]:
# for i,row in df.iterrows():
#     print('---'*20)
    
#     id = row["Learn UUID"]
#     mstone_url = f"https://guide.flatironschool.com/students/{id}/milestones"
#     notes_url = f"https://guide.flatironschool.com/students/{id}/notes"

#     print(row['First Name'] + " " + row["Last Name"])
#     print(notes_url)
#     print(row['message'])

# APPENDIX - DEVELOPMENT

In [None]:
raise Exception("Development and Demo Code Below")

## Demo Functions WORKFLOW

- Demonstration of indiviudal functions

In [None]:
# Function to get 1 student's milestone - can do an exact OR partial name match
mstones = get_student_milestones(df,'Daniel',verbose=True)
mstones

In [None]:
## can also get with date index
mstones_ts = get_student_milestones(df,'Daniel',as_dates=True, verbose=True)
mstones_ts.head(3)

In [None]:
## get full report 
report = get_milestone_report(df,'Daniel')
report.keys()

In [None]:
## report of last/next/paste due mielstones
report['report']

In [None]:
## Example Student
student_name = students_included[0]
generate_detailed_message(df,student_name)

### Approach for Report

- Now that I have the time series verison of the miletones:
    1. Grab the last completed milestone and date completed
    2. Get a list of all milestones from before today that are not completed. 
    3. Grab the next milestone and due date.
    4. Summarize their current progress as how behind/ahead their last completed mstone was

In [None]:
## get milestones with due date as index
mstones_ts = get_student_milestones(df,'Daniel Ross-Leutwyler',as_dates=True)
mstones_ts.head()

In [None]:
## Get today as DATE (not date-time)
today = dt.date.today()
today

In [None]:
## get due before today 
before_today = mstones_ts.loc[:today].copy()
before_today

In [None]:
## get last_completed
last_mstone= mstones_ts.loc[mstones_ts['Completed Date'].notnull()].tail(1)
next_mstone= mstones_ts.loc[mstones_ts['Completed Date'].isnull()].head(1)

display(last_mstone,next_mstone)

In [None]:
## get past-due
past_due = before_today.loc[before_today['Completed Date'].isna(), 
                            ['Ordinality','Milestone Name','Milestone on Timeness']]
past_due