In [28]:
from matplotlib.pyplot import switch_backend
from selenium.webdriver.remote.webdriver import WebDriver as wd
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait as wdw
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains as AC
import selenium
from bs4 import BeautifulSoup as BS
import json

class WebVPN:
    def __init__(self, opt: dict, headless=False):
        self.root_handle = None
        self.driver: wd = None
        self.userid = opt["username"]
        self.passwd = opt["password"]
        self.headless = headless

    def login_webvpn(self):
        """
        Log in to WebVPN with the account specified in `self.userid` and `self.passwd`

        :return:
        """
        d = self.driver
        if d is not None:
            d.close()
        d = selenium.webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
        d.get("https://webvpn.tsinghua.edu.cn/login")
        username = d.find_elements(By.XPATH,
                                   '//div[@class="login-form-item"]//input'
                                   )[0]
        password = d.find_elements(By.XPATH,
                                   '//div[@class="login-form-item password-field" and not(@id="captcha-wrap")]//input'
                                   )[0]
        username.send_keys(str(self.userid))
        password.send_keys(self.passwd)
        d.find_element(By.ID, "login").click()
        self.root_handle = d.current_window_handle
        self.driver = d
        return d

    def access(self, url_input):
        """
        Jump to the target URL in WebVPN

        :param url_input: target URL
        :return:
        """
        d = self.driver
        url = By.ID, "quick-access-input"
        btn = By.ID, "go"
        wdw(d, 5).until(EC.visibility_of_element_located(url))
        actions = AC(d)
        actions.move_to_element(d.find_element(*url))
        actions.click()
        actions.\
            key_down(Keys.CONTROL).\
            send_keys("A").\
            key_up(Keys.CONTROL).\
            send_keys(Keys.DELETE).\
            perform()

        d.find_element(*url)
        d.find_element(*url).send_keys(url_input)
        d.find_element(*btn).click()

    def switch_another(self):
        """
        If there are only 2 windows handles, switch to the other one

        :return:
        """
        d = self.driver
        assert len(d.window_handles) == 2
        wdw(d, 5).until(EC.number_of_windows_to_be(2))
        for window_handle in d.window_handles:
            if window_handle != d.current_window_handle:
                d.switch_to.window(window_handle)
                return

    def to_root(self):
        """
        Switch to the home page of WebVPN

        :return:
        """
        self.driver.switch_to.window(self.root_handle)

    def close_all(self):
        """
        Close all window handles

        :return:
        """
        while True:
            try:
                l = len(self.driver.window_handles)
                if l == 0:
                    break
            except selenium.common.exceptions.InvalidSessionIdException:
                return
            self.driver.switch_to.window(self.driver.window_handles[0])
            self.driver.close()

    def login_info(self):
        """
        TODO: After successfully logged into WebVPN, login to info.tsinghua.edu.cn

        :return:
        """

        self.access("info.tsinghua.edu.cn")
        self.switch_another()
        d=self.driver
        user=d.find_element(By.ID,"userName")
        user.send_keys(self.userid)
        passwd=d.find_element(By.NAME,"password")
        passwd.send_keys(self.passwd)
        but= d.find_element(By.XPATH,"/html/body/table[2]/tbody/tr/td[3]/table/tbody/tr/td[6]/input")
        but.click()
        wdw(d,5).until(EC.visibility_of_element_located((By.XPATH,"//*[@id=\"9-771_table\"]/div/ul/li[2]")))
        d.close()
        self.to_root()
        return 
        # Hint: - Use `access` method to jump to info.tsinghua.edu.cn
        #       - Use `switch_another` method to change the window handle
        #       - Wait until the elements are ready, then preform your actions
        #       - Before return, make sure that you have logged in successfully
        raise NotImplementedError

    def get_grades(self):
        """
        TODO: Get and calculate the GPA for each semester.

        Example return / print:
            2020-秋: *.**
            2021-春: *.**
            2021-夏: *.**
            2021-秋: *.**
            2022-春: *.**

        :return:
        """
        self.access("zhjw.cic.tsinghua.edu.cn/cj.cjCjbAll.do?m=bks_cjdcx&cjdlx=zw")
        self.switch_another()
        d=self.driver
        table = d.find_element(By.XPATH,"/html/body/center/table[2]/tbody")
        soup=BS(table.get_attribute("innerHTML"),'lxml')
        print(soup)
        return soup
        # Hint: - You can directly switch into
        #         `zhjw.cic.tsinghua.edu.cn/cj.cjCjbAll.do?m=bks_cjdcx&cjdlx=zw`
        #         after logged in
        #       - You can use Beautiful Soup to parse the HTML content or use
        #         XPath directly to get the contents
        #       - You can use `element.get_attribute("innerHTML")` to get its
        #         HTML code

        raise NotImplementedError

if __name__ == "__main__":
    # TODO: Write your own query process
    with open("settings.json") as f:
      dic = json.load(f)
      f.close()
    web = WebVPN(dic)
    web.login_webvpn()
    web.login_info()
    soup=web.get_grades()
    
    #raise NotImplementedError





[WDM] - Current google-chrome version is 103.0.5060
[WDM] - Get LATEST chromedriver version for 103.0.5060 google-chrome
[WDM] - Driver [C:\Users\m1366\.wdm\drivers\chromedriver\win32\103.0.5060.53\chromedriver.exe] found in cache


<html><body><tr class="">
<th height="30" width="80">课程号</th>
<th width="">课程名</th>
<th width="50">学分</th>
<th width="70">成绩</th>
<th width="70">绩点</th>
<th width="70">学年-学期</th>
<!-- <th width="90">考试时间</th> -->
</tr>
<tr>
<td height="30">10240022</td>
<td style="text-align:left;">计算机科学基础</td>
<td>2</td>
<td style="text-align:left;">
		A
   			
   			
    </td>
<td>
    	
			4.0
		
		
    </td>
<td>

    2021-秋
    
    
    </td>
</tr>
<tr>
<td height="30">10421055</td>
<td style="text-align:left;">微积分A(1)</td>
<td>5</td>
<td style="text-align:left;">
		A+
   			
   			
    </td>
<td>
    	
			4.0
		
		
    </td>
<td>

    2021-秋
    
    
    </td>
</tr>
<tr>
<td height="30">10421324</td>
<td style="text-align:left;">线性代数</td>
<td>4</td>
<td style="text-align:left;">
		A
   			
   			
    </td>
<td>
    	
			4.0
		
		
    </td>
<td>

    2021-秋
    
    
    </td>
</tr>
<tr>
<td height="30">10680053</td>
<td style="text-align:left;">思想道德与法治</td>
<td>3</td>
<td style="text-align:left

In [62]:
a=soup.find_all('tr')
a=a[1:]
result={}
sems=[]
b=[]
for i in range(len(a)):
  temp=a[i].find_all("td")
  temp2=[]
  for j in temp:
    r=j.contents[0]
    temp2.append(str(r).replace('\n','').replace('\t','').replace(' ',''))
  b.append(temp2)
for k in b:
  sem=str(k[5])
  st=str(k[3])
  if st=='P' or st== 'F':
    continue
  if result.__contains__(sem):
    result[sem].append((int(k[2]),float(k[4])))
  else:
    result[sem]=[(int(k[2]),float(k[4]))]
    sems.append(sem)
for sem in sems:
  t=result[sem]
  sum=0.0
  score=0.0
  for course in t:
    sum+=course[0]
    score+=course[0]*course[1]
  score/=sum
  score=('%.2f'%score)
  result[sem]=score
result


{'2021-秋': '3.92', '2022-春': '3.97'}

In [53]:
a=soup.find_all('tr')
a=a[1:]
result={}
sems=[]
b=[]
for i in range(len(a)):
  temp=a[i].find_all("td")
  temp2=[]
  for j in temp:
    r=j.contents[0]
    temp2.append(str(r).replace('\n','').replace('\t','').replace(' ',''))
  b.append(temp2)
b

[['10240022', '计算机科学基础', '2', 'A', '4.0', '2021-秋'],
 ['10421055', '微积分A(1)', '5', 'A+', '4.0', '2021-秋'],
 ['10421324', '线性代数', '4', 'A', '4.0', '2021-秋'],
 ['10680053', '思想道德与法治', '3', 'A-', '4.0', '2021-秋'],
 ['10720011', '体育(1)', '1', 'A', '4.0', '2021-秋'],
 ['12090052', '军事理论', '2', 'B+', '3.6', '2021-秋'],
 ['14201082', '英语阅读写作（A）', '2', 'B+', '3.6', '2021-秋'],
 ['20240013', '离散数学(1)', '3', 'A', '4.0', '2021-秋'],
 ['30210041', '信息科学技术概论', '1', 'B+', '3.6', '2021-秋'],
 ['30240233', '程序设计基础', '3', 'A+', '4.0', '2021-秋'],
 ['00240311', '计算机系统研讨', '1', 'P', 'N/A', '2022-春'],
 ['02070012', '党的知识概论', '2', 'P', 'N/A', '2022-春'],
 ['10421065', '微积分A(2)', '5', 'A-', '4.0', '2022-春'],
 ['10421382', '高等线性代数选讲', '2', 'A', '4.0', '2022-春'],
 ['10430484', '大学物理B(1)', '4', 'A-', '4.0', '2022-春'],
 ['10610193', '中国近现代史纲要', '3', 'A-', '4.0', '2022-春'],
 ['10680011', '形势与政策', '1', 'A', '4.0', '2022-春'],
 ['10691342', '写作与沟通', '2', 'A', '4.0', '2022-春'],
 ['10720021', '体育(2)', '1', 'A', '4.0', '2022

KeyError: 'td'

In [2]:
d = selenium.webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))




[WDM] - Current google-chrome version is 103.0.5060
[WDM] - Get LATEST chromedriver version for 103.0.5060 google-chrome
[WDM] - About to download new driver from https://chromedriver.storage.googleapis.com/103.0.5060.53/chromedriver_win32.zip
[WDM] - Driver has been saved in cache [C:\Users\m1366\.wdm\drivers\chromedriver\win32\103.0.5060.53]


In [3]:
import json

In [4]:
d.get("https://webvpn.tsinghua.edu.cn/login")

In [5]:
username=d.find_element(By.ID,"user_name")

In [13]:
with open("settings.json") as f:
  lib = json.load(f)

In [7]:
username.send_keys(lib['username'])
passwd = d.find_element(By.NAME,"password")
passwd.send_keys(lib['password'])

In [8]:
btn= d.find_element(By.ID,"login")
btn.click()

In [31]:
d.get("https://webvpn.tsinghua.edu.cn")


In [9]:
btn2=d.find_element(By.CLASS_NAME,"vpn-content-block-panel__content")

<Response [200]>

<!DOCTYPE html>
<html lang="zh-cmn">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
<title>清华大学WebVPN</title>
<meta content="清华大学VPN,清华大学WebVPN,清华大学内网访问,清华大学访问控制,清华大学内网登录,清华大学资源访问控制系统,WebVPN,WEBVPN,VPN,clientless vpn,网瑞达,北京网瑞达科技有限公司,资源访问控制系统,访问控制,堡垒机,SSLVPN,CASB,认证,成都网瑞达科技有限公司,图书馆认证,图书馆统计,图书馆下载分析,电子资源访问控制,论文下载,访问,图书馆" name="keywords"/>
<meta content="width=device-width, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0, user-scalable=no" name="viewport"/>
<meta content="webkit" name="renderer"/>
<link href="/wengine-vpn/js/css/normalize.css" rel="stylesheet"/>
<link href="/wengine-vpn/js/css/login.css?v=20200501" rel="stylesheet"/>
<link href="/wengine-vpn/js/layui/css/layui.css" rel="stylesheet"/>
<script src="/wengine-vpn/js/js/wechat-font.js"></script>
<style>
        #captcha-wrap{
            display:none;
        }
        #error-message{
            display: none;
        }
        body {
            -webkit-text-size-adjust:

In [16]:
d.get("https://webvpn.tsinghua.edu.cn/http/77726476706e69737468656265737421f9f9479369247b59700f81b9991b2631506205de/?wrdrecordvisit=1657962993000")

In [17]:
usr=d.find_element(By.ID,"userName")

In [16]:
usr

<selenium.webdriver.remote.webelement.WebElement (session="a52a9e8432d3bde26a194f29ea3c60f4", element="7e206aa7-4378-4860-a5a1-bd3f426a2f1f")>

In [18]:
usr.send_keys(lib['username'])

In [19]:
psw=d.find_element(By.NAME,"password")

In [20]:
psw.send_keys(lib['password'])

In [24]:
but= d.find_element(By.XPATH,"/html/body/table[2]/tbody/tr/td[3]/table/tbody/tr/td[6]/input")

In [25]:
but.click()