# 初始化模块

In [4]:
import re
import numpy as np
import pandas as pd
import pymongo
import requests
import ipywidgets
from IPython import display
from bokeh import charts
from bokeh.io import push_notebook, show, output_notebook
output_notebook()

client = pymongo.MongoClient()
db = client['hfut']

# 数据分析

## 数据总量

In [5]:
print('专业记录', db['major'].count(), '条')
print('学期记录', db['term'].count(), '条')
print('课程记录', db['course'].count(), '条')
print('专业计划记录', db['plan'].count(), '条')
print('教学班记录', db['class'].count(), '条')
print('学生记录', db['student'].count(), '条')
print('教学班-学生关系记录', db['class_student'].count(), '条')

专业记录 120 条
学期记录 10 条
课程记录 1231 条
专业计划记录 2201 条
教学班记录 10013 条
学生记录 13567 条
教学班-学生关系记录 822919 条


In [11]:
# http://bokeh.pydata.org/en/latest/docs/user_guide/charts.html
student_df = pd.DataFrame(list(db['student'].find()), columns=['学号', '姓名', '性别'])
student_df['入学年份'] = student_df['学号'] // 1000000
# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.groupby.html
_ = student_df.groupby(student_df['入学年份']).apply(lambda df: df.groupby(df['性别']).count())
_ = _.unstack()['学号']
_['比例'] = _['男'] / _['女']
_['合计'] = _['男'] + _['女']
_

性别,女,男,比例,合计
入学年份,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2012,586,2037,3.476109,2623
2013,604,2332,3.860927,2936
2014,590,2412,4.088136,3002
2015,650,2346,3.609231,2996
2016,459,1551,3.379085,2010


In [7]:
# http://bokeh.pydata.org/en/latest/docs/reference/charts.html#bar
p = charts.Bar(student_df,label='入学年份', values='性别', agg='count', stack='性别')
show(p)

## 学生数量及人数分布

In [8]:
class StudentQueryPanel:
    def __init__(self):
        self.code_text = ipywidgets.Text(description='学号:')
        self.code_text.on_submit(self.handle_text_submit)
        self.name_text = ipywidgets.Text(
            disabled=True, value='无', description='姓名:')
        self.image = ipywidgets.Image(format='jpg', width=260)
        self.html = ipywidgets.HTML(description='已选课程班级')
        self.panel = ipywidgets.VBox(
            [self.image, self.name_text, self.code_text, self.html])
        display.display(self.panel)

    def handle_text_submit(self, t):
        # 先重置
        self.image.value = b''
        self.name_text.value = '无'
        self.html.value = '<b>没有查询到信息</b>'
        
        student_code = t.value
        student = None
        if re.match(r'201[2-6]21\d{4}', student_code):
            student_code = int(student_code)
            student = db['student'].find_one(filter={'学号': student_code})
        if student:
            self.image.value = requests.get(
                'http://222.195.8.201/student/photo/{:d}/{:d}.JPG'.format(
                    student_code // 1000000, student_code)).content
            name = student['姓名']
            if student['性别'] == '女':
                name += '*'
            self.name_text.value = name
            classes = db['class_student'].find(filter={'学号': student_code})
            query = list(map(lambda v: { k: v[k] for k in ('学期代码', '课程代码', '教学班号')}, classes))
            classes = list(db['class'].find({
                '$or': query
            }).sort([('学期代码', pymongo.DESCENDING), ('课程代码', pymongo.DESCENDING)]))
            
            classes_df = pd.DataFrame(
                classes,
                columns=[
                    '学期代码', '课程代码', '课程名称', '教学班号', '校区', '时间地点', '起止周',
                    '开课单位', '任课老师', '优选范围', '性别限制', '禁选范围', '课程类型', '考核类型',
                    '学分', '班级容量', '选中人数', '备注'
                ])
            self.html.value = classes_df._repr_html_()


panel = StudentQueryPanel()

In [9]:
db['class'].find().sort

<bound method Cursor.sort of <pymongo.cursor.Cursor object at 0x7fdca4da5358>>