# Grading

In [26]:
import pandas as pd

In [27]:
dfRos = pd.read_excel("./data/roster.xlsx", dtype={"sec": str})
dfRos.head()

Unnamed: 0,no_reg,sec,student_id,firstname,lastname,email
0,1,1,910787990,First1,Last1,email1@cmu.ac.th
1,2,1,877304244,First2,Last2,email2@cmu.ac.th
2,3,1,288785711,First3,Last3,email3@cmu.ac.th
3,4,1,129937341,First4,Last4,email4@cmu.ac.th
4,5,1,105284582,First5,Last5,email5@cmu.ac.th


In [28]:
dfRos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234 entries, 0 to 233
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   no_reg      234 non-null    int64 
 1   sec         234 non-null    object
 2   student_id  234 non-null    int64 
 3   firstname   234 non-null    object
 4   lastname    234 non-null    object
 5   email       234 non-null    object
dtypes: int64(2), object(4)
memory usage: 11.1+ KB


In [29]:
# Check for duplicates
dfRos.duplicated().sum()

0

In [30]:
dfGroup = pd.read_excel("./data/student_groups.xlsx")
dfGroup.head()

Unnamed: 0,student_id,group_name
0,878130557,
1,272210128,Sec6: มี 2
2,682511317,
3,910787990,Sec1: วิดวะมช.รู้จักปะ
4,576447795,Sec4: eiei


In [31]:
# I expect null values since not all students form a group.
dfGroup.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234 entries, 0 to 233
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   student_id  234 non-null    int64 
 1   group_name  157 non-null    object
dtypes: int64(1), object(1)
memory usage: 3.8+ KB


In [32]:
# Check for duplicates
dfGroup.duplicated().sum()

0

In [33]:
# The group name comes from user input which can have multiple spaces and leading and trailing spaces.
# I want to clean it.
import re


def formatGroupName(text):
    out = text.strip()
    out = re.sub(r"\s+", " ", out)
    return out


dfGroup["group_name"] = dfGroup["group_name"].fillna("")
dfGroup["group_name"] = dfGroup["group_name"].apply(formatGroupName)

In [34]:
# Also, I want to make the group key (for merging) from the group name to make merging robust against accidentally modifying group name with space or capitalization.
# This logic will be used in the other data as well.
def makeGroupKey(text):
    out = re.sub(r"\s+", "", text)
    out = out.lower()
    return out


dfGroup["group_key"] = dfGroup["group_name"].apply(makeGroupKey)
dfGroup["group_key"].value_counts()

group_key
                                            77
sec1:เซรั่มนารา                              6
sec6:มีหนุ่มตี๋เป็นเส้นชัย                   5
sec3:ไก่บักโจ้น                              5
sec6:jungdaikoboru                           5
sec1:ปาท่องโก๋ทอด                            5
sec4:abc                                     5
sec3:goodmorning                             5
sec3:ไออีสีชมพู                              5
sec6:oppaicy                                 5
sec4:จุบุจุบุ                                5
sec3:nguang-non                              5
sec1:กุ๊กๆกู๋                                5
sec1:เอนทาเนียร์ตัวจิ๋ว                      5
sec1:เหงาจัง                                 5
sec3:eiei                                    5
sec3:no123                                   5
sec1:หารู้ไม่                                5
sec6:ไม่บอกหรอก                              5
sec4:eiei                                    5
sec1:วิดวะมช.รู้จักปะ                        5
sec

In [35]:
dfrs = dfRos.merge(
    dfGroup[["student_id", "group_name", "group_key"]],
    left_on="student_id",
    right_on="student_id",
    how="left",
)
dfrs.head()

Unnamed: 0,no_reg,sec,student_id,firstname,lastname,email,group_name,group_key
0,1,1,910787990,First1,Last1,email1@cmu.ac.th,Sec1: วิดวะมช.รู้จักปะ,sec1:วิดวะมช.รู้จักปะ
1,2,1,877304244,First2,Last2,email2@cmu.ac.th,Sec1: หารู้ไม่,sec1:หารู้ไม่
2,3,1,288785711,First3,Last3,email3@cmu.ac.th,Sec1: เซรั่มนารา,sec1:เซรั่มนารา
3,4,1,129937341,First4,Last4,email4@cmu.ac.th,Sec1: กุ๊กๆกู๋,sec1:กุ๊กๆกู๋
4,5,1,105284582,First5,Last5,email5@cmu.ac.th,Sec1: กุ๊กๆกู๋,sec1:กุ๊กๆกู๋


In [36]:
# Check if there are any students in section 003, 006, 803, 806 with no group.
# Turns out there are.
filtSec = dfrs["sec"].isin(["003", "006", "803", "806"])
dfrmFiltSec = dfrs[filtSec]
filtNotNull = dfrmFiltSec["group_name"].isnull()
display(dfrmFiltSec[filtNotNull])

Unnamed: 0,no_reg,sec,student_id,firstname,lastname,email,group_name,group_key
55,56,3,228248149,First56,Last56,email56@cmu.ac.th,,


In [37]:
# Find group_name that is not empty
filtNotEmpty = dfGroup["group_name"] != ""
dfGroupNotEmpty = dfGroup.loc[filtNotEmpty] 

# Find student_id in the group assignment that is not in the roster.
filtMismatchId = ~dfGroupNotEmpty["student_id"].isin(dfRos["student_id"])
dfGroupNotEmpty[filtMismatchId]

Unnamed: 0,student_id,group_name,group_key
16,943301355,Sec3: no 123,sec3:no123
233,466742761,Sec1: เซรั่มนารา,sec1:เซรั่มนารา


In [38]:
# Get index of the mismatched id. Let's assume that the problematic id is 943301355.
# In reality, I double checked with the student first.
filtStudent = dfGroup["student_id"] == 943301355
idx = filtStudent[filtStudent].index
print(idx)

Index([16], dtype='int64')


In [39]:
# Update student_id to be the one in the roster.
dfGroup.loc[idx, 'student_id'] = 228248149

In [40]:
# Rerun the merge.
dfrs = dfRos.merge(
    dfGroup[["student_id", "group_name", "group_key"]],
    left_on="student_id",
    right_on="student_id",
    how="left",
)

# Recheck if there are any students in section 003, 006, 803, 806 with no group.
# Now there is no problem.
filtSec = dfrs["sec"].isin(["003", "006", "803", "806"])
dfrmFiltSec = dfrs[filtSec]
filtNotNull = dfrmFiltSec["group_name"].isnull()
display(dfrmFiltSec[filtNotNull])

Unnamed: 0,no_reg,sec,student_id,firstname,lastname,email,group_name,group_key


In [41]:
# Check if there are any empty group_name.
# Turns out there are.
filtNotEmpty = dfrmFiltSec["group_name"] == ""
dfrmFiltSec[filtNotEmpty]

Unnamed: 0,no_reg,sec,student_id,firstname,lastname,email,group_name,group_key
195,196,803,543046351,First196,Last196,email196@cmu.ac.th,,


In [42]:
# Ask the student about the group name.
filtStu = dfGroup["student_id"] == 543046351
dfGroup[filtStu]

# Update group_name
# Again, I need to ask the student first.
dfGroup.loc[filtStu, "group_name"] = "Sec3: no 123"
dfGroup.loc[filtStu, "group_key"] = "sec3:no123"

In [43]:
# Rerun the merge.
dfrs = dfRos.merge(
    dfGroup[["student_id", "group_name", "group_key"]],
    left_on="student_id",
    right_on="student_id",
    how="left",
)

# Recheck if there are any students in section 003, 006, 803, 806 with no group.
# Now there is no problem
filtSec = dfrs["sec"].isin(["003", "006", "803", "806"])
dfrmFiltSec = dfrs[filtSec]
filtNotNull = dfrmFiltSec["group_name"].isnull()
display(dfrmFiltSec[filtNotNull])

# Check if there are any empty group_name. Yes!
filtNotEmpty = dfrmFiltSec["group_name"] == ""
display(dfrmFiltSec[filtNotEmpty])

Unnamed: 0,no_reg,sec,student_id,firstname,lastname,email,group_name,group_key


Unnamed: 0,no_reg,sec,student_id,firstname,lastname,email,group_name,group_key


In [44]:
dfrs.head()

Unnamed: 0,no_reg,sec,student_id,firstname,lastname,email,group_name,group_key
0,1,1,910787990,First1,Last1,email1@cmu.ac.th,Sec1: วิดวะมช.รู้จักปะ,sec1:วิดวะมช.รู้จักปะ
1,2,1,877304244,First2,Last2,email2@cmu.ac.th,Sec1: หารู้ไม่,sec1:หารู้ไม่
2,3,1,288785711,First3,Last3,email3@cmu.ac.th,Sec1: เซรั่มนารา,sec1:เซรั่มนารา
3,4,1,129937341,First4,Last4,email4@cmu.ac.th,Sec1: กุ๊กๆกู๋,sec1:กุ๊กๆกู๋
4,5,1,105284582,First5,Last5,email5@cmu.ac.th,Sec1: กุ๊กๆกู๋,sec1:กุ๊กๆกู๋


In [45]:
dfGroupGrade = pd.read_excel("./data/group_grade.xlsx")
dfGroupGrade.head(3)

Unnamed: 0,sec,group_name,project_title,project_note,proposal(4),monitoring(5),monitoring_note,noti(5),noti_note,control(5),...,storage(5),storage_note,logic(5),logic_note,slide(3),slide_note,present(3),present_note,total,abet
0,003/803,Sec3: Good morning,ระบบตรวจจับความเข้มข้นของ Gas,Use Light Sensor instead of pressure meter,4,4.0,OK,5.0,Telegram and sound,4.0,...,5.0,Two collections,4.5,OK,3,OK,3,OK,32.5,4.0
1,003/803,Sec3: no 123,ระบบตรวจจับสินค้าค้าง Stock,Should turn off auto noti,4,4.0,OK,4.0,OK,3.0,...,4.0,One collection,3.5,OK,3,OK,3,OK,28.5,2.0
2,003/803,Sec3: eiei,ระบบเปิดปิดไฟอัตโนมัติ,Should be able to turn the system on/off manua...,4,4.0,OK,4.0,OK,4.0,...,4.0,One collection,4.0,OK,3,OK,3,OK,30.0,4.0


In [46]:
dfGroupGrade["group_name"] = dfGroupGrade["group_name"].fillna("")
dfGroupGrade["group_name"] = dfGroupGrade["group_name"].apply(formatGroupName)
dfGroupGrade.head(3)

Unnamed: 0,sec,group_name,project_title,project_note,proposal(4),monitoring(5),monitoring_note,noti(5),noti_note,control(5),...,storage(5),storage_note,logic(5),logic_note,slide(3),slide_note,present(3),present_note,total,abet
0,003/803,Sec3: Good morning,ระบบตรวจจับความเข้มข้นของ Gas,Use Light Sensor instead of pressure meter,4,4.0,OK,5.0,Telegram and sound,4.0,...,5.0,Two collections,4.5,OK,3,OK,3,OK,32.5,4.0
1,003/803,Sec3: no 123,ระบบตรวจจับสินค้าค้าง Stock,Should turn off auto noti,4,4.0,OK,4.0,OK,3.0,...,4.0,One collection,3.5,OK,3,OK,3,OK,28.5,2.0
2,003/803,Sec3: eiei,ระบบเปิดปิดไฟอัตโนมัติ,Should be able to turn the system on/off manua...,4,4.0,OK,4.0,OK,4.0,...,4.0,One collection,4.0,OK,3,OK,3,OK,30.0,4.0


In [47]:
dfGroupGrade["group_key"] = dfGroupGrade["group_name"].apply(makeGroupKey)
dfGroupGrade["group_key"].value_counts()

group_key
sec3:goodmorning              1
sec3:no123                    1
sec6:มี2                      1
sec6:มา3                      1
sec6:ไม่บอกหรอก               1
sec6:บิดหมดไม่สลดบิดอีก       1
sec6:print(a)                 1
sec6:oppaicy                  1
sec6:มีหนุ่มตี๋เป็นเส้นชัย    1
sec6:jungdaikoboru            1
sec3:ภูเพียง                  1
sec3:ไออีสีชมพู               1
sec3:นางฟ้าตัวน้อยของแม่      1
sec3:ไก่บักโจ้น               1
sec3:tew                      1
sec3:nguang-non               1
sec3:eiei                     1
sec6:ไม่ทันแล้ว               1
Name: count, dtype: int64

In [48]:
# Notice the use of suffixes to avoid duplicate column names
dfStuGrade = pd.merge(dfrs, dfGroupGrade, on="group_key", how="left", suffixes=("","_y"))
dfStuGrade = dfStuGrade.drop(columns=["sec_y", "group_name_y"])
dfStuGrade.head(3)

Unnamed: 0,no_reg,sec,student_id,firstname,lastname,email,group_name,group_key,project_title,project_note,...,storage(5),storage_note,logic(5),logic_note,slide(3),slide_note,present(3),present_note,total,abet
0,1,1,910787990,First1,Last1,email1@cmu.ac.th,Sec1: วิดวะมช.รู้จักปะ,sec1:วิดวะมช.รู้จักปะ,,,...,,,,,,,,,,
1,2,1,877304244,First2,Last2,email2@cmu.ac.th,Sec1: หารู้ไม่,sec1:หารู้ไม่,,,...,,,,,,,,,,
2,3,1,288785711,First3,Last3,email3@cmu.ac.th,Sec1: เซรั่มนารา,sec1:เซรั่มนารา,,,...,,,,,,,,,,


In [49]:
# Check if the students in section 003, 006, 803, 806 have the score.
filtSec = dfStuGrade["sec"].isin(["003", "006", "803", "806"])
dfStuGradeFiltSec = dfStuGrade[filtSec]

# We see that no row has null value in the "total" column. 
filtNull = dfStuGradeFiltSec["total"].isnull()
dfStuGradeFiltSec[filtNull]

Unnamed: 0,no_reg,sec,student_id,firstname,lastname,email,group_name,group_key,project_title,project_note,...,storage(5),storage_note,logic(5),logic_note,slide(3),slide_note,present(3),present_note,total,abet


In [50]:
dfStuGrade.to_excel("out_stu_grade.xlsx", index=False)