# Setup Student Roster
For importing into Google Sheets.

Merges data from the Albert roster and Google Form intake questionnaire to produce a consolidated student roster CSV file.

## Import dependencies


Import code dependencies

In [96]:
import numpy as np
import pandas as pd
from pathlib import Path

## Import data

Set up filenames of interest.

In [97]:
course_prefix = 'wd' # all file names will start with this prefix, e.g. 'wd', 'py', 'ad', 'se', etc.

In [98]:
# all relevant filenames for this course
roster_file = f'./rosters/{course_prefix}-roster.csv' # the roster file
questionnaire_file = f'./questionnaires/{course_prefix}-intake.csv' # the intake questionnaire file
result_file = f'./results/{course_prefix}-result.csv' # the output file we will generate

Import the student roster from Albert.

In [99]:
roster_df = pd.read_csv(Path(roster_file).resolve())

Clean up student roster.

In [100]:
# simplify some field names
field_prefixes = ['Email', 'First', 'Last'] # prefixes in field names we want to simplify
for prefix in field_prefixes:
    # rename any field name starting with this prefi to this prefix only
    roster_df.rename(columns=lambda x: x if not x.startswith(prefix) else prefix, inplace=True)
roster_df.columns

Index(['Counter', 'Campus ID', 'Last', 'First', 'Pronoun', 'Name Recording',
       'Email', 'Units Taken', 'Plan Description', 'Academic Level',
       'Student Location', 'Status', 'Status Notes'],
      dtype='object')

Import the student intake questionnaire results.

In [101]:
intake_df = pd.read_csv(Path(questionnaire_file).resolve())

Cleanup intake questionnaire data.

In [102]:
# rename field with name starting with 'Discord' to simply 'Discord'
field_prefixes = ['Discord', 'GitHub', 'Email', 'First', 'Last'] # prefixes in field names we want to simplify
for prefix in field_prefixes:
    # rename any field name starting with this prefi to this prefix only
    intake_df.rename(columns=lambda x: x if not x.startswith(prefix) else prefix, inplace=True)
intake_df.columns

Index(['Timestamp', 'Email', 'First', 'Discord', 'GitHub',
       'What are your motivations in taking this course? (You will not be judged in any way by your answers)',
       'What year are you in?',
       'Are you familiar with any of the following web programming languages?',
       'Have you ever used any of the following web frameworks?',
       'Are there any other programming languages you are comfortable with?',
       'Any other comments you'd like to share or interests you'd like to express?',
       'Last'],
      dtype='object')

## Merge datasets
Create one master dataframe.

In [103]:
# join the two CSV files into a single dataframe
df = pd.merge(roster_df, intake_df, on='Email', how='left', suffixes=(None, '_intake'))
df.sample(3) # random sample

Unnamed: 0,Counter,Campus ID,Last,First,Pronoun,Name Recording,Email,Units Taken,Plan Description,Academic Level,...,First_intake,Discord,GitHub,What are your motivations in taking this course? (You will not be judged in any way by your answers),What year are you in?,Are you familiar with any of the following web programming languages?,Have you ever used any of the following web frameworks?,Are there any other programming languages you are comfortable with?,Any other comments you'd like to share or interests you'd like to express?,Last_intake
6,7,N10219196,Chhabra,Anusha,,Record on File,ac9144@nyu.edu,4,UA-Coll of Arts & Sci-Economic,Senior,...,,,,,,,,,,
19,20,N18213542,Rentz,Adam,,,adr9694@nyu.edu,4,UB-Stern Schl Business-Ugrd-Bu,Junior,...,Adam,Adam Rentz,adamrentz2005,To explore whether web design is something int...,Sophomore,HTML,No - I have never used any frameworks,SQL,,Rentz
26,27,N16791592,Zhang,Kathy,,,tz2557@nyu.edu,4,UE-Steinhardt Sch Cul Ed &Hu-M,Junior,...,Kathy,kzhang1206,KChloee,To explore whether web design is something int...,Sophomore,HTML,No - I have never used any frameworks,None - I have never programmed before,,Zhang


Remove unnecessary columns.

In [104]:
# keep only a few important columns
df = df[['Last', 'First', 'Email', 'GitHub', 'Discord', ]]
df.head(5)

Unnamed: 0,Last,First,Email,GitHub,Discord
0,Arjarasumpun,Nelly,na3723@nyu.edu,Na3723,Na3723
1,Bhate,Anvi,asb9991@nyu.edu,anvibhate,anvi_51413
2,Bin,Ryno,kb4625@nyu.edu,ryno5556,Ryno
3,Bogan,Ella,etb8700@nyu.edu,ellabogan,elbo_69324
4,Cao,Yuna,yc6773@nyu.edu,Yuna-cao,Yuna Cao


## Save to CSV
Save the dataframe to CSV for later use.

In [105]:
# save df to a CSV file
output_path = Path(result_file).resolve()
df.to_csv(output_path, index=False)