In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import json

from collections import Counter

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [4]:
def map_requested(row):
    return json.dumps({
        "CE": row["CE"],
        "Env.": row["Env."],
        "EE": row["EE"],
        "ME": row["ME"],
        "DE": row["DE"],
        "Other": row["Other"],
    })

def create_project_embeddings(row, model):
    soup = row['Project name'] + row['Sponsor']
    embeddings = model.encode(soup, convert_to_tensor=True)
    return embeddings

projects = pd.read_csv("projects_not_clean.csv")
projects = projects.fillna({"S": 0, "CE": 0, "Env.": 0, "EE": 0, "ME": 0, "DE": 0, "Other": 0})
projects = projects.replace(r'^\s*$', 0, regex=True)
projects = projects.astype({"S": "int32", "CE": "int32", "Env.": "int32", "EE": "int32", "ME": "int32", "DE": "int32", "Other": "int32"})

# Map majors requested
projects["majors_requested"] = projects.apply(lambda x: map_requested(x), axis=1)
projects = projects.drop(["S", "CE", "Env.", "EE", "ME", "DE", "Other"], axis=1)

# projects['embeddings'] = projects.apply(lambda x: create_project_embeddings(x, model), axis=1)
projects

Unnamed: 0,ID,Project name,Sponsor,majors_requested
0,F23-01,AISC Steel Bridge - Team #1,AISC,"{""CE"": 7, ""Env."": 0, ""EE"": 0, ""ME"": 0, ""DE"": 0..."
1,F23-02,AISC Steel Bridge - Team #2,AISC,"{""CE"": 7, ""Env."": 0, ""EE"": 0, ""ME"": 0, ""DE"": 0..."
2,F23-03,ASCE Concrete Canoe,ASCE,"{""CE"": 7, ""Env."": 0, ""EE"": 0, ""ME"": 0, ""DE"": 0..."
3,F23-04,ASCE Timber Strong Competition,ASCE,"{""CE"": 5, ""Env."": 0, ""EE"": 0, ""ME"": 8, ""DE"": 0..."
4,F23-06,South Platte Renew Process Upgrades,RMWEA Competition,"{""CE"": 2, ""Env."": 2, ""EE"": 0, ""ME"": 0, ""DE"": 2..."
5,F23-07,Sodium Sulfate for A Circular Economy – Commun...,WERC Environmental Design Contest,"{""CE"": 2, ""Env."": 4, ""EE"": 0, ""ME"": 0, ""DE"": 0..."
6,F23-08,The Clean Water Challenge,Greenway Foundation,"{""CE"": 2, ""Env."": 2, ""EE"": 0, ""ME"": 2, ""DE"": 0..."
7,F23-09,Big Idea Challenge: Inflatable Systems for Lun...,NASA & Mines Space Resources,"{""CE"": 0, ""Env."": 0, ""EE"": 0, ""ME"": 8, ""DE"": 0..."
8,F23-10,Human Lander Challenge,NASA & Mines Space Resources,"{""CE"": 0, ""Env."": 0, ""EE"": 1, ""ME"": 8, ""DE"": 0..."
9,F23-11,NASA Robotic Mining Competition - Lunabotics,NASA & Mines Space Resources,"{""CE"": 0, ""Env."": 0, ""EE"": 1, ""ME"": 8, ""DE"": 0..."


In [8]:
# projects.to_csv('projects.csv')

In [6]:
project_ids = [
    'F23-01','F23-03','F23-04','F23-06','F23-07','F23-08','F23-09','F23-10','F23-11','F23-12','F23-13','F23-14','F23-15',
    'F23-17.1','F23-17.2','F23-17.3','F23-17.4','F23-17.5','F23-20','F23-21','F23-22','F23-23','F23-24','F23-25','F23-26','F23-27',
    'F23-30','F23-31','F23-32','F23-33','F23-40','F23-41','F23-42','F23-50','F23-51','F23-52','F23-53','F23-60','F23-61','F23-62',
    'F23-63','F23-64','F23-65','F23-70','F23-71','F23-72','F23-73','F23-74','F23-75','F23-76','F23-77','F23-78','F23-80','F23-84',
    'F23- 85','F23-86','F23-87','F23-88','F23-89','F23-90','F23-91','F23-92','F23-100','F23-101','F23 - 102','F23-110','F23-111',
    'F23 - 112','F23- 114','F23- 115','F23-116',
]

secondary_skills = [
    'Aerospace experience',
    'Additive manufacturing',
    'Advance manufacturing courses, but not taking Minor',
    'ArcGIS',
    'AutoCAD',
    'Automotive experience',
    'Circuit design',
    'Construction engineering',
    'Computer Science classes (e.g. CSCI 261, 262)',
    'Ecology',
    'Electrical Drives and Electrical machinery',
    'Geotechnical engineering',
    'Groundwater-remediation',
    'Infrastructure design',
    'Instrumentation and Electrical measurements',
    'Machining proficient',
    'Microcontroller interfacing',
    'Microcontroller programming',
    'Modeling',
    'Optics',
    'Orthopedics',
    'PC board fabrication',
    'Power electronics',
    'Programming control systems',
    'Programming (other)',
    'Renewable energy',
    'Software\xa0engineering',
    'SOLIDWORKS Certification(s)',
    'SOLIDWORKS proficient',
    'Structural engineering',
    'Surveying',
    'Systems engineering experience',
    'Wastewater treatment',
    'Water reclamation',
    'Water resources engineering',
    'Water treatment',
    'Welding',
]

majors = {
    'EE': ['EE-Engy & Pwr Electrncs Spclty',
           'EE-Info Sys & Science Spclty',
           'EE-Intg Crcts & Elctrnc Spclty',
           'Electrical Engineering',
           'EE-Antenn & Wirels Comm Spclty',
          ],
    'ME': ['Mechanical Engineering',
           'Mechanical Eng - Aerospace',
           'Mechanical Eng - Energy',
           'MECHANICal ENGR',
           'Mechanical Eng - Automotive',
           'Mechanical Eng - Biomechanics',
           'Mechanical Eng - Manufacturing',
           'Mech Eng - Nuclear Energy',
          ],
    'CE': ['Civil Engineering'],
    'Env.': ['Environmental Engineering'],
}


def get_secondary_skill(row, secondary_skills):
    skills = []
    for key, value in zip(row.keys(), row.values):
        if (key in secondary_skills) and value != None:
            skills.append(key)
    return skills

def get_project_order(row, project_ids):
    order = []
    ranked_list = []
    for key, value in zip(row.keys(), row.values):
        if (key in project_ids) and value != None and (type(value) == float or type(value) == int):
            value = int(value)
            if value != 0:
                order.append({'key': key, 'rank': value})

    sorted_order = sorted(order, key=lambda d: d['rank'])
    return [project['key'] for project in sorted_order]

def get_major(row, majors):
    degree = row['Degree']
    for major in majors.keys():
        if degree in majors[major]:
            return major
    return 'Other'

def create_student_embeddings(row, model):
    soup = ''
    if row['Degree']:
        soup += row['Degree']
    if row['Other_desc']:
        soup += row['Other_desc']
    if row['secondary_skills']:
        soup += ' '.join(row['secondary_skills'])
    embeddings = model.encode(soup, convert_to_tensor=True)
    return embeddings
    

# Pre-process
students = pd.read_csv("Cleaned Student Pref.csv").reset_index().rename(columns={"index": "id"})
students = students.dropna(subset=["Name", "F23-01"]) # Remove rows if important value missing
students = students.drop(columns=["CWID", "Status", "Class", "Major", "2nd\nMajor", "Minor", "Email", "Recitation", "Project ID", 
                      "Your full name", "CWID.1", "GPA", "Major.1", "Response", "Other (please specify)", "Other", "Other.1", "ERROR HCDS"])
students = students.replace({np.nan: None})

# Map secondary skill
students["secondary_skills"] = students.apply(lambda x: get_secondary_skill(x, secondary_skills), axis=1)
students = students.drop(columns=secondary_skills, axis=1)

# Map project order
students["project_order"] = students.apply(lambda x: get_project_order(x, project_ids), axis=1)
students = students.drop(columns=project_ids, axis=1)

# Map major
students['Major'] = students.apply(lambda x: get_major(x, majors), axis=1)

# students['embeddings'] = students.apply(lambda x: create_student_embeddings(x, model), axis=1)
students

Unnamed: 0,id,Name,Degree,Other_desc,secondary_skills,project_order,Major
0,0,"Dechant, Bradley",Mechanical Engineering,Other,"[Additive manufacturing, Circuit design, Geote...","[F23-11, F23-14, F23-50, F23-09, F23-64, F23-6...",ME
1,1,"Drosselmeyer, Dillon",Mechanical Eng - Aerospace,Professional project management experience,"[Advance manufacturing courses, but not taking...","[F23-50, F23-51, F23-87, F23-88, F23-77, F23-7...",ME
3,3,"Crane, Michael",Mechanical Eng - Aerospace,,"[Circuit design, Microcontroller interfacing, ...","[F23-63, F23-111, F23-75, F23-77, F23-73, F23-...",ME
4,4,"Elliott, Tanner",Mechanical Engineering,,"[Additive manufacturing, Advance manufacturing...","[F23-60, F23-72, F23-51, F23-116, F23-101, F23...",ME
5,5,"Apted, Michaelan",Mechanical Engineering,,"[Ecology, Structural engineering]","[F23-111, F23-92, F23-75, F23-80, F23 - 112, F...",ME
6,6,"Dowden, Donavan",Mechanical Engineering,,"[Aerospace experience, Advance manufacturing c...","[F23-71, F23-77, F23-86, F23-42, F23-20, F23-9...",ME
7,7,"Billy, Kiara",EE-Engy & Pwr Electrncs Spclty,"Protection & Controls experience, some Physica...","[Construction engineering, Geotechnical engine...","[F23-17.3, F23-13, F23-17.1, F23-17.2, F23-15,...",EE
8,8,"Misra, Anamika",Environmental Engineering,,[Aerospace experience],"[F23-30, F23-27, F23-21, F23-07, F23-06, F23-0...",Env.
9,9,"Durrani, Zafar",EE-Info Sys & Science Spclty,,"[Circuit design, Construction engineering, Eco...","[F23-13, F23-111, F23-92, F23-62, F23-87, F23-...",EE
10,10,"Yakimow, Samantha",Mechanical Engineering,Human Motion Capture Systems,"[Aerospace experience, AutoCAD, Automotive exp...","[F23-15, F23-64, F23-63, F23 - 112, F23-80, F2...",ME


In [7]:
# students.to_csv('students.csv')