In [1]:
from bs4 import BeautifulSoup
import pandas as pd

from course import Course, CourseLoader

In [2]:
with open('content.html', 'r', encoding='utf-8') as f:
    content = f.read()

In [3]:
soup = BeautifulSoup(content, 'html.parser')

In [4]:
main_content_div = soup.find('div', id='main-content')

In [5]:
courses = main_content_div.find('table').find('tbody').find_all('tr')

In [6]:
results = []
course_loader = None
for course in courses:
    fields = course.find_all('td')
    if len(fields) == 18:
      # 新的course
      if course_loader is not None:
        results.append(course_loader.load_item())
      course_loader = CourseLoader(Course())
      course_loader.add_value('index', fields[0])
      course_loader.add_value('department', fields[1])
      course_loader.add_value('id', fields[2])
      course_loader.add_value('name', fields[3])
      course_loader.add_value('type', fields[4])
      course_loader.add_value('discipline', fields[5])
      hour_credit = fields[6].get_text(strip=True)
      course_loader.add_value('hours', int(hour_credit.split('/')[0]))
      course_loader.add_value('credits', float(hour_credit.split('/')[1]))
      course_loader.add_value('limit_cnt', fields[7])
      course_loader.add_value('chosen_cnt', fields[8])
      arrange = (fields[9], fields[10], fields[11])
      course_loader.add_value('arrange', arrange)
      course_loader.add_value('teach_method', fields[12])
      course_loader.add_value('exam_method', fields[13])
      course_loader.add_value('chair_professor', fields[14])
      course_loader.add_value('teacher', fields[15])
      course_loader.add_value('assistant', fields[16])
      course_loader.add_value('remote_learning', fields[17])
    elif len(fields) == 3:
      arrange = (fields[0], fields[1], fields[2])
      course_loader.add_value('arrange', arrange)
    else:
      raise Exception('Unknown Format')
if course_loader is not None:
    results.append(course_loader.load_item())

In [7]:
print(f'总共获取到 {len(results)} 门课信息')

总共获取到 1718 门课信息


In [8]:
choice_list = ['0839X2M04001H', '081203M04003H', '083500M01001H-2', '081202MGX001H',  '030500MGB001H-13', '120400MGB001H-20', '081200MGB001H-1', '081202M04001H-1', '050200MGB003H-002']

chosen_course_list = list(filter(lambda course: course['id'] in choice_list, results))

In [9]:
# 检测冲突
conflict = False
for i in range(len(chosen_course_list)):
    for j in range(i + 1, len(chosen_course_list)):
        course_x = chosen_course_list[i]
        course_y = chosen_course_list[j]
        arrange_x = course_x['arrange']
        arrange_y = course_y['arrange']
        for period_x in arrange_x:
            if conflict:
                break
            for period_y in arrange_y:
                if period_x[0] == period_y[0] and period_x[1] == period_y[1]:
                    set_x = set(period_x[2])
                    set_y = set(period_y[2])
                    if set_x & set_y:
                        print(f'检测到 {course_x["name"]} 和 {course_y["name"]} 在 {period_x[0]} 周, 星期 {period_x[1]} 冲突: {period_x[2]} {period_y[2]}')
                        conflict = True
                        break
if not conflict:
    print('所选课程无冲突')

所选课程无冲突


In [10]:
%%HTML
<style type="text/css">
table {
    border-collapse: collapse !important;
}
th, td {
    padding: 8px !important;
    text-align: center !important;
    width: 120px !important
}
th {
    background-color: #f2f2f2 !important;
}
td {
    border: 1px solid #ddd !important;
}
</style>

In [11]:
import ipywidgets as widgets
from IPython.display import display, clear_output


button = widgets.Button(description="显示")
output = widgets.Output()
menu = widgets.Dropdown(
       options=list(range(1, 21)),
       value=1,
       description="周次")


def update_course_table():
    course_table = [[""] * 7 for _ in range(12)]  # 每天12节课
    for course in chosen_course_list:
        arrange = course["arrange"]
        for course_week, course_day, course_periods, classroom in arrange:
            if course_week == menu.value:
                for period in course_periods:
                    course_table[period - 1][course_day - 1] = course["name"]
    df_course_table = pd.DataFrame(
        course_table,
        index=range(1, 13),
        columns=["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"],
    )
    time_ranges = {
        1: "1 [8:30-9:20]",
        2: "2 [9:20-10:10]",
        3: "3 [10:30-11:20]",
        4: "4 [11:20-12:10]",
        5: "5 [13:30-14:20]",
        6: "6 [14:20-15:10]",
        7: "7 [15:30-16:20]",
        8: "8 [16:20-17:10]",
        9: "9 [18:10-19:00]",
        10: "10 [19:00-19:50]",
        11: "11 [20:10-21:00]",
        12: "12 [21:00-21:50]",
    }
    df_course_table = df_course_table.rename(index=time_ranges)
    return df_course_table

def redraw(_):
    course_table = update_course_table()
    with output:
        clear_output()
        print(f"第 {menu.value} 周课程表")
        course_table = update_course_table()
        display(course_table)


button.on_click(redraw)
redraw(None)
widgets.VBox([widgets.HBox([menu, button]), output])

VBox(children=(HBox(children=(Dropdown(description='周次', options=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1…

In [12]:
# 计算学分
speciality_credit = 0.
public_compulsory_credit = 0.
public_elective_credit = 0.

for course in chosen_course_list:
    if course['type'] == '公共必修课':
        public_compulsory_credit += course['credits']
    elif course['type'] == '公共选修课':
        public_elective_credit += course['credits']
    else:
        speciality_credit += course['credits']

print(f'专业学位课学分数: {speciality_credit}')
print(f'公共必修课学分数: {public_compulsory_credit}')
print(f'公共选修学分数: {public_elective_credit}')
print(f'总学分数: {speciality_credit + public_compulsory_credit + public_elective_credit}')

专业学位课学分数: 12.0
公共必修课学分数: 6.0
公共选修学分数: 1.0
总学分数: 19.0


In [13]:
# 分析选课顺序，以限选和已选之间的差作为排序依据
for course in chosen_course_list:
    course['sort_key'] = course['limit_cnt'] - course['chosen_cnt']

chosen_course_list.sort(key=lambda x: x['sort_key'])

chosen_course_df = pd.DataFrame(columns=['课程编码', '课程名称', '开课单位', '课程属性', '所属学科/专业', '学分', '学时', '首席教授', '授课方式', '考试方式', '限选', '已选', '相差'])

print("课程名 限选 已选 相差")
for index, course in enumerate(chosen_course_list):
    chosen_course_df.loc[index] = [course['id'], course['name'], course['department'], course['type'], course['discipline'], course['credits'], course['hours'], course.get('chair_professor') or course.get('teacher'), course['teach_method'], course['exam_method'], course['limit_cnt'], course['chosen_cnt'], course['limit_cnt'] - course['chosen_cnt']]
chosen_course_df

课程名 限选 已选 相差


Unnamed: 0,课程编码,课程名称,开课单位,课程属性,所属学科/专业,学分,学时,首席教授,授课方式,考试方式,限选,已选,相差
0,120400MGB001H-20,学术道德与学术写作规范-通论,公共政策与管理学院,公共必修课,公共管理,0.5,10,刘朝,授课、讨论,大开卷,300,301,-1
1,050200MGB003H-002,英语A-02班（怀）,外语系,公共必修课,外国语言文学,3.0,32,曹笃鑫,课堂讲授为主,其它（需说明）,29,29,0
2,081200MGB001H-1,学术道德与学术写作规范-分论,计算机科学与技术学院,公共必修课,计算机科学与技术,0.5,10,刘兴武,授课、讨论,读书报告,243,243,0
3,0839X2M04001H,自然语言处理,网络空间安全学院,专业核心课,信息内容安全,3.0,60,胡玥,课堂讲授为主,闭卷笔试,150,150,0
4,030500MGB001H-13,新时代中国特色社会主义理论与实践(西区),马克思主义学院,公共必修课,马克思主义理论,2.0,36,刘彦随等,课堂讲授为主,其它（需说明）,200,199,1
5,081203M04003H,高级人工智能,计算机科学与技术学院,专业核心课,计算机应用技术,3.0,60,沈华伟,课堂讲授为主,闭卷笔试,502,493,9
6,081202MGX001H,C++程序设计,计算机科学与技术学院,公共选修课,计算机软件与理论,1.0,40,杨力祥,课堂讲授为主,大开卷,214,200,14
7,083500M01001H-2,计算机算法设计与分析,计算机科学与技术学院,一级学科核心课,软件工程,3.0,60,刘玉贵,课堂讲授为主,闭卷笔试,154,132,22
8,081202M04001H-1,高级软件工程,计算机科学与技术学院,专业核心课,计算机软件与理论,3.0,60,魏峻,授课、讨论,闭卷笔试,214,67,147


In [None]:
# 分析公共必修课，公共必修课有很多班，选择很多，不应与已选其余课程冲突
course_name = '新时代中国特色社会主义理论与实践(西区)'
choice_list = list(filter(lambda course: course['name'] == course_name, results))

In [None]:
def check_conflict(candidate, course_list):
    for course in course_list:
        arrange_x = candidate['arrange']
        arrange_y = course['arrange']
        for period_x in arrange_x:
            for period_y in arrange_y:
                if period_x[0] == period_y[0] and period_x[1] == period_y[1]:
                    set_x = set(period_x[2])
                    set_y = set(period_y[2])
                    if set_x & set_y:
                        return True
    return False

In [None]:
actual_choice_list = []
for choice in choice_list:
    if check_conflict(choice, chosen_course_list):
        continue
    actual_choice_list.append(choice)

In [None]:
len(actual_choice_list)

3

In [None]:
actual_choice_list

[{'arrange': [(2, 4, [1, 2, 3, 4], '教一楼101'),
              (3, 4, [1, 2, 3, 4], '教一楼101'),
              (4, 4, [1, 2, 3, 4], '教一楼101'),
              (5, 4, [1, 2, 3, 4], '教一楼101'),
              (6, 4, [1, 2, 3, 4], '教一楼101'),
              (7, 4, [1, 2, 3, 4], '教一楼101'),
              (8, 4, [1, 2, 3, 4], '教一楼101'),
              (9, 4, [1, 2, 3, 4], '教一楼101'),
              (10, 4, [1, 2, 3, 4], '教一楼101')],
  'assistant': '张慧博',
  'chosen_cnt': 200,
  'credits': 2.0,
  'department': '马克思主义学院',
  'exam_method': '其它（需说明）',
  'hours': 36,
  'id': '030500MGB001H-07',
  'index': '84',
  'limit_cnt': 200,
  'name': '新时代中国特色社会主义理论与实践(西区)',
  'remote_learning': '否',
  'teach_method': '课堂讲授为主',
  'teacher': '樊杰等',
  'type': '公共必修课'},
 {'arrange': [(11, 4, [1, 2, 3, 4], '教一楼101'),
              (12, 4, [1, 2, 3, 4], '教一楼101'),
              (13, 4, [1, 2, 3, 4], '教一楼101'),
              (14, 4, [1, 2, 3, 4], '教一楼101'),
              (15, 4, [1, 2, 3, 4], '教一楼101'),
              (16, 4, [1,