# 引导区

In [1]:
import json
import requests
import pandas as pd
from pprint import pprint

# 页面解析函数

## 页面解析函数

### 查询
0. 备注
    * 数据全部以DF格式返回
1. query_ticket
    * 输入日期、起点站代码、目的站代码、车票类型(成人/?)，返回可以购买的车次的车票数据（多行）
    * 未解决：解析返回的复杂字符串（数据包含于其内）
        * result里的每一行车次数据的第一个复杂字符串是火车的信息，在后面提交订单的时候就是提交这段字符。这段字符是随机生成的，过几秒就会失效
2. query_price
    * 输入列车代码、起点站代码、目的站代码、`座位类型?`、日期，返回票价数据
3. query_station
    * 输入列车代码、起点站代码、目的站代码、日期，返回列车全程站点数据
4. query_map
    * 查询[站名映射表](https://kyfw.12306.cn/otn/resources/js/framework/station_name.js)，返回站点名称相关数据的DF

In [2]:
#输入日期、起点站代码、目的站代码、车票类型(成人/?)，返回可以购买的车次的车票数据（多行）
def query_ticket(train_date, from_station, to_station, ticket_type):
    url='https://kyfw.12306.cn/otn/leftTicket/query'
    payload = {'leftTicketDTO.train_date': train_date, #日期
           'leftTicketDTO.from_station': from_station, #起点站
           'leftTicketDTO.to_station': to_station, #目的站
           'purpose_codes': ticket_type} #车票类型
    r = requests.get(url, params=payload)
    return r.json()

#输入日期、起点站代码、目的站代码、车票类型，返回相关数据
def query_price(train_no, from_station_no, to_station_no, seat_types, train_date):
    url='https://kyfw.12306.cn/otn/leftTicket/queryTicketPrice'
    payload = {'train_no': train_no, #列车代码
           'from_station_no': from_station_no, #起点站序号
           'to_station_no': to_station_no, #目的站序号
           'seat_types': seat_types, #座位类型
           'train_date': train_date} #日期
    r = requests.get(url, params=payload)
    return r.json()

#输入列车代码、起点站代码、目的站代码、日期，返回列车全程站点数据
def query_station(train_no, from_station, to_station, date):
    url='https://kyfw.12306.cn/otn/czxx/queryByTrainNo'
    payload = {'train_no': train_no, #列车代码
           'from_station_telecode': from_station, #起点站序号
           'to_station_telecode': to_station, #终点站序号
           'depart_date': date} #日期
    r = requests.get(url, params=payload)
    return r.json()

def query_map():
    url='https://kyfw.12306.cn/otn/resources/js/framework/station_name.js'
    r = requests.get(url).text.split("\'")[1]
    return r.split('|')

def query_train_list():
    url='https://kyfw.12306.cn/otn/resources/js/query/train_list.js'
    r = requests.get(url)
    return r.text.split('=')[1]

In [None]:
train_list = json.loads(query_train_list())

## 数据变换函数

### 数据变换
1. price_json_df
2. trans_ticket
3. map_df
    * `map_df(query_map())`返回一个站点名称的DF

In [3]:
#或许还是存为DF格式比较好？
def price_json_df(json_data):
    
    dict_data={}
    dict_data['商务/特等']=json_data['data'].get('A9')
    dict_data['一等座']=json_data['data'].get('M')
    dict_data['二等座']=json_data['data'].get('O')
    dict_data['高级软卧']=json_data['data'].get('A6')
    dict_data['软卧']=json_data['data'].get("A4")
    dict_data['动卧']=json_data['data'].get('F')
    dict_data['硬卧']=json_data['data'].get('A3')
    dict_data['软座']=json_data['data'].get('A2')
    dict_data['硬座']=json_data['data'].get('A1')
    dict_data['无座']=json_data['data'].get('WZ')
#     dict_data['其他']=json_data['data'][]
    
    return dict_data

#将query_ticket查询得到的数据根据映射关系写入DF
def trans_ticket(ticket_list):
    df_empty = pd.DataFrame(columns=['起点站', '列车编号', '终点站', '出发站', '到达站', '出发时间', '到达时间',
                                     '历时', '起点站发车日期', '是否可以预订', '备注', 'seat_types', '软卧',
                                     '无座', '商务座/特等座', '一等座', '二等座'])
    for ticket in ticket_list:
        ticket_split = ticket.split('|')
        print(ticket_split[3], ticket_split[30])
        pprint(ticket_split)
        df_empty.loc[ticket_split[3],['备注', '列车编号', '起点站', '终点站', '出发站',
                                      '到达站', '出发时间', '到达时间',
                                      '历时', '是否可以预订', '起点站发车日期', '软卧', '无座',
                                      '二等座', '一等座', '商务座/特等座', 'seat_types']] = [ticket_split[1], ticket_split[2], 
                                                      ticket_split[4],
                                                      ticket_split[5], ticket_split[6], ticket_split[7],
                                                      ticket_split[8], ticket_split[9], ticket_split[10], 
                                                      ticket_split[11], ticket_split[13], ticket_split[23],
                                                      ticket_split[26], ticket_split[30], ticket_split[31],
                                                      ticket_split[32], ticket_split[35]]

    return df_empty

#将query_map查询得到的数据写入DF
def map_df(station_list):
    df_empty = pd.DataFrame(columns=['@站点简拼', '站点名称', '站点拼音', '站点简拼'])
    df_empty.index.name='站点代码'
    
    nums = int((len(station_list)-1)/5)
    for num in range(nums): #最后一个元素不属于
         df_empty.loc[station_list[int(5*num+2)],['@站点简拼', '站点名称', '站点拼音', '站点简拼']] = [station_list[int(5*num)],
                        station_list[int(5*num+1)], station_list[int(5*num+3)], station_list[int(5*num+4)]]

    return df_empty

In [None]:
train_list = json.loads(query_train_list())

In [4]:
df_empty = pd.DataFrame(columns=['date','type','station_train_code','train_no'])
for day in train_list.keys():
    for ttype in train_list[day].keys():
#         print(day,ttype)
        for element in train_list[day][ttype]:
            df_empty = df_empty.append({'date':day, 
                             'type':ttype, 
                             'station_train_code':element['station_train_code'], 
                             'train_no':element['train_no']}, ignore_index=True)
            
df_empty

NameError: name 'train_list' is not defined

# 跳转函数

# 主函数

In [5]:
# data = query_ticket('2018-07-07', 'BJP', 'GZQ', 'ADULT')['data']['result'] #北京-广州
data = query_ticket('2018-07-07', 'CQW', 'GZQ', 'ADULT')['data']['result'] #重庆-广州
# data = query_ticket('2018-07-07', 'HBB', 'GZQ', 'ADULT')['data']['result'] #哈尔滨-广州
# data = query_ticket('2018-07-07', 'TJP', 'GZQ', 'ADULT')['data']['result'] #天津-广州
# pprint(data)
df_empty=trans_ticket(data)
df_empty
# js=query_price('240000T10911', '01', '09', '14163', '2018-07-07')
# js
# price_json(js)
# query_station('240000D9010F', 'BXP', 'IZQ', '2018-07-07')

K775 
['null',
 '列车停运',
 '8d0000K77800',
 'K775',
 'LAJ',
 'DMQ',
 'CUW',
 'GZQ',
 '24:00',
 '24:00',
 '99:59',
 'IS_TIME_NOT_BUY',
 '',
 '20180707',
 '',
 'J1',
 '09',
 '21',
 '0',
 '1',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '0']
K813 
['null',
 '预订',
 '770000K8130H',
 'K813',
 'RXW',
 'GZQ',
 'CUW',
 'GZQ',
 '20:43',
 '19:20',
 '22:37',
 'N',
 'j6MD25A%2Fu6gFXgtRhNnMGeJ6wrPzsHX6WSi40HnKZKNftmEwWlvqUcH2VrQ%3D',
 '20180707',
 '3',
 'W2',
 '05',
 '18',
 '0',
 '0',
 '',
 '',
 '',
 '无',
 '',
 '',
 '无',
 '',
 '无',
 '无',
 '',
 '',
 '',
 '',
 '10403010',
 '1431',
 '0']
K835 
['GH%2BpeUNc8AbS7EgaBXANKwS4mPXfX%2FioO37Upjmgzxo%2Bh5Rg8rsL9%2BfORelRE4CFj4oeLbiOGabd%0AdXQlhQ54FNKWY4QDDKuZ0iMTxLP2uNKU26so7NZuWSsRjkxlCGyGaLhRs6qIFO8%2FZVb%2FQ%2BSwl4KP%0AswEs6ceR%2F3UtDWVW9J89LtXz3UWYir5Z0YLDj6Dqbv9Iu0xbaN3kaGe8cZobDZwqzCwMZclT%2Bo4q%0AbRqPqRo4YH%2BgR9LcMDUEVjhVEDxmDQVO8rUE9kE%3D',
 '预订',
 '770000K83520',
 'K835',
 'CUW',
 'OSQ',
 'CUW',
 'GZQ',
 '21:30',
 '

Unnamed: 0,起点站,列车编号,终点站,出发站,到达站,出发时间,到达时间,历时,起点站发车日期,是否可以预订,备注,seat_types,软卧,无座,商务座/特等座,一等座,二等座
K775,LAJ,8d0000K77800,DMQ,CUW,GZQ,24:00,24:00,99:59,20180707,IS_TIME_NOT_BUY,列车停运,,,,,,
K813,RXW,770000K8130H,GZQ,CUW,GZQ,20:43,19:20,22:37,20180707,N,预订,1431.0,无,无,,,
K835,CUW,770000K83520,OSQ,CUW,GZQ,21:30,05:19,31:49,20180707,Y,预订,1413.0,无,有,,,
K585,CDW,760000K5880G,OSQ,CUW,GZQ,21:59,04:42,30:43,20180707,Y,预订,1413.0,无,有,,,
K358,CUW,770000K35806,GZQ,CUW,GZQ,23:33,05:12,29:39,20180707,Y,预订,1431.0,无,有,,,


In [None]:
map_df(query_map())