In [3]:
import requests
from requests.exceptions import RequestException
from lxml import etree
import pandas as pd
import numpy as np
from xpinyin import Pinyin
# 拼音对象初始化
p = Pinyin()

In [8]:
class Lianjia_crawler():
    def __init__(self, city, location, page):
        self.city = city
        self.location = location
        self.page = page

    def organize_url(self, i):
        '''
        组合url
        '''
        pinyin_city = p.get_initials(self.city, '').lower()  # 将城市名由中文改为首字母小写
        pinyin_location = p.get_pinyin(self.location, '')  # 将区域由中文改为拼音
        url = 'https://{}.lianjia.com/ershoufang/{}/pg{}sf1sf2sf3/'.format(
            pinyin_city, pinyin_location, str(i))
        return url

    def get_one_page_text(self, url):
        '''
        获取单个页面的html文本
        '''
        try:
            headers = {
                'User-Agent':
                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
            }
            response = requests.get(url, headers=headers)
            if (response.status_code == 200):
                return response.text
            return None
        except RequestException:
            return None

    def switch_to_xpath(self, url_text):
        '''
        将html文本转换为xpath解析对象
        '''
        html = etree.HTML(url_text)
        return html

    def get_house_list(self, html):
        '''
        获取单个页面的房源url列表
        '''
        one_page_url = html.xpath('//div[@class="title"]/a/@href')
        return one_page_url

    def get_whole_house_url(self):
        '''
        获取所有房源url列表
        '''
        url_list = []
        for i in range(1, int(self.page) + 1):
            url = self.organize_url(i)
            text = self.get_one_page_text(url)
            html = self.switch_to_xpath(text)
            url_list.extend(self.get_house_list(html))
        return url_list

In [31]:
def get_one_page(url):
    '''
    获取单个页面的html
    :param url: string类型，网址
    :return: string类型，单页html文本
    '''
    try:
        headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
}
        response = requests.get(url,headers=headers)
        if(response.status_code == 200):
            return response.text
        return None
    except RequestException:
        return None

In [32]:
crawler = Lianjia_crawler('杭州', '萧山', 5)
list=crawler.get_whole_house_url()
html_list = []
for url in list:
    text=get_one_page(url)
    html_list.append(etree.HTML(text))

In [82]:
html=html_list[6]

In [50]:
title = html.xpath('//div[@class="title-wrapper"]//h1[@class="main"]/@title')[0]
title

'北辰国颂府 4室2厅 南'

In [52]:
sub = html.xpath(
    '//div[@class="title-wrapper"]//div[@class="sub"]/@title')[0]
if sub == '链家网真房源，更多房源信息请联系经纪人':
    sub = '无'
sub

'无'

In [62]:
around_info = html.xpath('//div[@class="aroundInfo"]')[0]

community = around_info.xpath(
            '//div[@class="communityName"]//a[@class="info "]/text()')[0]
print(community)

北辰国颂府


In [63]:
main_area = around_info.xpath(
    '//div[@class="areaName"]//a[1]/text()')[0]
sub_area = around_info.xpath(
    '//div[@class="areaName"]//a[2]/text()')[0]
print(main_area,sub_area)

萧山 萧山市区


In [65]:
total_price = int(html.xpath(
    '//div[@class="price "]//span[@class="total"]/text()')[0])  # 价格（万元）
average_price = html.xpath(
    '//div[@class="price "]//span[@class="unitPriceValue"]/text()')[
        0]  # 平米单价（元）
print(total_price,average_price)

899 52132


In [71]:
build_time = html.xpath(
    '//div[@class="houseInfo"]//div[@class="area"]/div[@class="subInfo noHidden"]/text()'
)[0]  #建造时间

if build_time[:3].isdigit() == False:
    build_time = build_time[:4]
else:
    build_time = int(build_time[:4])
print(build_time)

未知年建


In [72]:
intro_content = html.xpath('//div[@class="introContent"]')[0]

In [85]:
total_area = intro_content.xpath(
    '//div[@class="base"]//ul/li/span[contains(text(),"建筑面积")]/../text()')[0][:-1]
total_area

'172.45'

In [86]:
inner_area = intro_content.xpath(
    '//div[@class="base"]//ul/li/span[contains(text(),"套内面积")]/../text()'
)[0][:-1]  # 套内面积（平米）
inner_area

'133.76'

In [89]:
lay_out = intro_content.xpath(
            '//div[@class="base"]//ul/li/span[contains(text(),"房屋户型")]/../text()'
        )[0]  # 户型
lay_out

'4室2厅1厨2卫'

In [93]:
a=2
type(a)

int

In [122]:
def is_null(feature):
    '''
    判断空特征
    '''
    if not feature:
        return ['暂无数据']
    else:
        return feature

In [113]:
intro_content = html.xpath('//div[@class="introContent"]')[0]

In [124]:
ownership = is_null(intro_content.xpath(
    '//div[@class="transaction"]//ul/li/span[contains(text(),"房屋途")]/../span[last()]/text()'
))[0]  # 产权所属
ownership

'暂无数据'

In [125]:
columns = [
            '标题', '介绍', '小区', '价格（万元）', '平米单价（元）', '建筑面积（平米）', '套内面积（平米）',
            '大区域', '小区域', '户型', '朝向', '所在楼层', '装修情况', '户型结构', '建筑类型', '建筑结构',
            '建造时间', '房屋用途', '挂牌时间', '上次交易时间', '房屋年限', '产权所属', '配备电梯', '梯户比例'
        ]

In [126]:
len(columns)

24

In [135]:
import tkinter as tk
from tkinter import filedialog
import tkinter.messagebox as msg
import configparser as cp
import ntpath

class CentralForm(tk.Toplevel):
    def __init__(self, master, my_height=80):
        super().__init__()
        self.master = master

        master_pos_x = self.master.winfo_x()
        master_pos_y = self.master.winfo_y()

        master_width = self.master.winfo_width()
        master_height = self.master.winfo_height()

        my_width = 300

        pos_x = (master_pos_x + (master_width // 2)) - (my_width // 2)
        pos_y = (master_pos_y + (master_height // 2)) - (my_height // 2)

        geometry = "{}x{}+{}+{}".format(my_width, my_height, pos_x, pos_y)
        self.geometry(geometry)


class AddSectionForm(CentralForm):
    def __init__(self, master):
        super().__init__(master)

        self.title("Add New Section")

        self.main_frame = tk.Frame(self, bg="lightgrey")
        self.name_label = tk.Label(self.main_frame, text="Section Name", bg="lightgrey", fg="black")
        self.name_entry = tk.Entry(self.main_frame, bg="white", fg="black")
        self.submit_button = tk.Button(self.main_frame, text="Create", command=self.create_section)

        self.main_frame.pack(expand=1, fill=tk.BOTH)
        self.name_label.pack(side=tk.TOP, fill=tk.X)
        self.name_entry.pack(side=tk.TOP, fill=tk.X, padx=10)
        self.submit_button.pack(side=tk.TOP, fill=tk.X, pady=(10,0), padx=10)

    def create_section(self):
        section_name = self.name_entry.get()
        if section_name:
            self.master.add_section(section_name)
            self.destroy()
            msg.showinfo("Section Added", "Section " + section_name + " successfully added")
        else:
            msg.showerror("No Name", "Please enter a section name", parent=self)


class AddItemForm(CentralForm):
    def __init__(self,  master):

        my_height = 120

        super().__init__(master, my_height)

        self.title("Add New Item")

        self.main_frame = tk.Frame(self, bg="lightgrey")
        self.name_label = tk.Label(self.main_frame, text="Item Name", bg="lightgrey", fg="black")
        self.name_entry = tk.Entry(self.main_frame, bg="white", fg="black")
        self.value_label = tk.Label(self.main_frame, text="Item Value", bg="lightgrey", fg="black")
        self.value_entry = tk.Entry(self.main_frame, bg="white", fg="black")
        self.submit_button = tk.Button(self.main_frame, text="Create", command=self.create_item)

        self.main_frame.pack(fill=tk.BOTH, expand=1)
        self.name_label.pack(side=tk.TOP, fill=tk.X)
        self.name_entry.pack(side=tk.TOP, fill=tk.X, padx=10)
        self.value_label.pack(side=tk.TOP, fill=tk.X)
        self.value_entry.pack(side=tk.TOP, fill=tk.X, padx=10)
        self.submit_button.pack(side=tk.TOP, fill=tk.X, pady=(10,0), padx=10)

    def create_item(self):
        item_name = self.name_entry.get()
        item_value = self.value_entry.get()
        if item_name and item_value:
            self.master.add_item(item_name, item_value)
            self.destroy()
            msg.showinfo("Item Added", item_name + " successfully added")
        else:
            msg.showerror("Missing Info", "Please enter a name and value", parent=self)


class IniEditor(tk.Tk):

    def __init__(self):
        super().__init__()

        self.title("Config File Editor")
        self.geometry("600x600")

        self.active_ini = ""
        self.active_ini_filename = ""
        self.ini_elements = {}

        self.menubar = tk.Menu(self, bg="lightgrey", fg="black")

        self.file_menu = tk.Menu(self.menubar, tearoff=0, bg="lightgrey", fg="black")
        self.file_menu.add_command(label="New", command=self.file_new, accelerator="Ctrl+N")
        self.file_menu.add_command(label="Open", command=self.file_open, accelerator="Ctrl+O")
        self.file_menu.add_command(label="Save", command=self.file_save, accelerator="Ctrl+S")

        self.menubar.add_cascade(label="File", menu=self.file_menu)

        self.config(menu=self.menubar)

        self.left_frame = tk.Frame(self, width=200, bg="grey")
        self.left_frame.pack_propagate(0)

        self.right_frame = tk.Frame(self, width=400, bg="lightgrey")
        self.right_frame.pack_propagate(0)

        self.file_name_var = tk.StringVar(self)
        self.file_name_label = tk.Label(self, textvar=self.file_name_var, fg="black", bg="white", font=(None, 12))
        self.file_name_label.pack(side=tk.TOP, expand=1, fill=tk.X, anchor="n")

        self.section_select = tk.Listbox(self.left_frame, selectmode=tk.SINGLE)
        self.section_select.configure(exportselection=False)
        self.section_select.pack(expand=1)
        self.section_select.bind("<<ListboxSelect>>", self.display_section_contents)

        self.section_add_button = tk.Button(self.left_frame, text="Add Section", command=self.add_section_form)
        self.section_add_button.pack(pady=(0,20))

        self.left_frame.pack(side=tk.LEFT, fill=tk.BOTH)
        self.right_frame.pack(side=tk.LEFT, expand=1, fill=tk.BOTH)

        self.right_frame.bind("<Configure>", self.frame_height)

        self.bind("<Control-n>", self.file_new)
        self.bind("<Control-o>", self.file_open)
        self.bind("<Control-s>", self.file_save)

    def add_section_form(self):
        if not self.active_ini:
            msg.showerror("No File Open", "Please open an ini file first")
            return

        AddSectionForm(self)

    def add_section(self, section_name):
        self.active_ini[section_name] = {}
        self.populate_section_select_box()

    def frame_height(self, event=None):
        new_height = self.winfo_height()
        self.right_frame.configure(height=new_height)

    def file_new(self, event=None):
        ini_file = filedialog.asksaveasfilename(filetypes=[("Configuration file", "*.ini")])

        while ini_file and not ini_file.endswith(".ini"):
            msg.showerror("Wrong Filetype", "Filename must end in .ini")
            ini_file = filedialog.askopenfilename()

        if ini_file:
            self.parse_ini_file(ini_file)

    def file_open(self, event=None):
        ini_file = filedialog.askopenfilename(filetypes=[("Configuration file", "*.ini")])

        while ini_file and not ini_file.endswith(".ini"):
            msg.showerror("Wrong Filetype", "Please select an ini file")
            ini_file = filedialog.askopenfilename()

        if ini_file:
            self.parse_ini_file(ini_file)

    def file_save(self, event=None):
        if not self.active_ini:
            msg.showerror("No File Open", "Please open an ini file first")
            return

        for section in self.active_ini:
            for key in self.active_ini[section]:
                try:
                    self.active_ini[section][key] = self.ini_elements[section][key].get()
                except KeyError:
                    # wasn't changed, no need to save it
                    pass

        with open(self.active_ini_filename, "w") as ini_file:
            self.active_ini.write(ini_file)

        msg.showinfo("Saved", "File Saved Successfully")

    def add_item_form(self):
        AddItemForm(self)

    def add_item(self, item_name, item_value):
        chosen_section = self.section_select.get(self.section_select.curselection())
        self.active_ini[chosen_section][item_name] = item_value
        self.display_section_contents()

    def parse_ini_file(self, ini_file):
        self.active_ini = cp.ConfigParser()
        self.active_ini.read(ini_file)
        self.active_ini_filename = ini_file
        self.populate_section_select_box()

        file_name = ": ".join([ntpath.basename(ini_file), ini_file])
        self.file_name_var.set(file_name)

        self.clear_right_frame()

    def clear_right_frame(self):
        for child in self.right_frame.winfo_children():
            child.destroy()

    def populate_section_select_box(self):
        self.section_select.delete(0, tk.END)

        for index, section in enumerate(self.active_ini.sections()):
            self.section_select.insert(index, section)
            self.ini_elements[section] = {}
        if "DEFAULT" in self.active_ini:
            self.section_select.insert(len(self.active_ini.sections()) + 1, "DEFAULT")
            self.ini_elements["DEFAULT"] = {}

    def display_section_contents(self, event=None):
        if not self.active_ini:
            msg.showerror("No File Open", "Please open an ini file first")
            return

        chosen_section = self.section_select.get(self.section_select.curselection())

        for child in self.right_frame.winfo_children():
            child.pack_forget()

        for key in sorted(self.active_ini[chosen_section]):
            new_label = tk.Label(self.right_frame, text=key, font=(None, 12), bg="black", fg="white")
            new_label.pack(fill=tk.X, side=tk.TOP, pady=(10,0))

            try:
                section_elements = self.ini_elements[chosen_section]
            except KeyError:
                section_elements = {}

            try:
                ini_element = section_elements[key]
            except KeyError:
                value = self.active_ini[chosen_section][key]

                if value.isnumeric():
                    spinbox_default = tk.IntVar(self.right_frame)
                    spinbox_default.set(int(value))
                    ini_element = tk.Spinbox(self.right_frame, from_=0, to=99999, textvariable=spinbox_default, bg="white", fg="black", justify="center")
                else:
                    ini_element = tk.Entry(self.right_frame, bg="white", fg="black", justify="center")
                    ini_element.insert(0, value)

                self.ini_elements[chosen_section][key] = ini_element

            ini_element.pack(fill=tk.X, side=tk.TOP, pady=(0,10))

        save_button = tk.Button(self.right_frame, text="Save Changes", command=self.file_save)
        save_button.pack(side=tk.BOTTOM, pady=(0,20))

        add_button = tk.Button(self.right_frame, text="Add Item", command=self.add_item_form)
        add_button.pack(side=tk.BOTTOM, pady=(0,20))


if __name__ == "__main__":
    ini_editor = IniEditor()
    ini_editor.mainloop()


In [136]:
import tkinter as tk
from tkinter import messagebox as msg
from tkinter.ttk import Notebook

import requests

class TranslateBook(tk.Tk):
    def __init__(self):
        super().__init__()

        self.title("Translation Book v1")
        self.geometry("500x300")

        self.notebook = Notebook(self)

        english_tab = tk.Frame(self.notebook)
        italian_tab = tk.Frame(self.notebook)

        self.translate_button = tk.Button(english_tab, text="Translate", command=self.translate)
        self.translate_button.pack(side=tk.BOTTOM, fill=tk.X)

        self.english_entry = tk.Text(english_tab, bg="white", fg="black")
        self.english_entry.pack(side=tk.TOP, expand=1)

        self.italian_copy_button = tk.Button(italian_tab, text="Copy to Clipboard", command=self.copy_to_clipboard)
        self.italian_copy_button.pack(side=tk.BOTTOM, fill=tk.X)

        self.italian_translation = tk.StringVar(italian_tab)
        self.italian_translation.set("")

        self.italian_label = tk.Label(italian_tab, textvar=self.italian_translation, bg="lightgrey", fg="black")
        self.italian_label.pack(side=tk.TOP, fill=tk.BOTH, expand=1)

        self.notebook.add(english_tab, text="English")
        self.notebook.add(italian_tab, text="Italian")

        self.notebook.pack(fill=tk.BOTH, expand=1)

    def translate(self, target_language="it", text=None):
        if not text:
            text = self.english_entry.get(1.0, tk.END)

        url = "https://translate.googleapis.com/translate_a/single?client=gtx&sl={}&tl={}&dt=t&q={}".format("en", target_language, text)

        try:
            r = requests.get(url)
            r.raise_for_status()
            translation = r.json()[0][0][0]
            self.italian_translation.set(translation)
            msg.showinfo("Translation Successful", "Text successfully translated")
        except Exception as e:
            msg.showerror("Translation Failed", str(e))

    def copy_to_clipboard(self, text=None):
        if not text:
            text = self.italian_translation.get()

        self.clipboard_clear()
        self.clipboard_append(text)
        msg.showinfo("Copied Successfully", "Text copied to clipboard")


if __name__ == "__main__":
    translatebook = TranslateBook()
    translatebook.mainloop()


In [137]:
import tkinter as tk
import tkinter.messagebox as msg

class Todo(tk.Tk):
    def __init__(self, tasks=None):
        super().__init__()

        if not tasks:
            self.tasks = []
        else:
            self.tasks = tasks

        self.tasks_canvas = tk.Canvas(self)

        self.tasks_frame = tk.Frame(self.tasks_canvas)
        self.text_frame = tk.Frame(self)

        self.scrollbar = tk.Scrollbar(self.tasks_canvas, orient="vertical", command=self.tasks_canvas.yview)

        self.tasks_canvas.configure(yscrollcommand=self.scrollbar.set)

        self.title("To-Do App v2")
        self.geometry("300x400")

        self.task_create = tk.Text(self.text_frame, height=3, bg="white", fg="black")

        self.tasks_canvas.pack(side=tk.TOP, fill=tk.BOTH, expand=1)
        self.scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

        self.canvas_frame = self.tasks_canvas.create_window((0, 0), window=self.tasks_frame, anchor="n")

        self.task_create.pack(side=tk.BOTTOM, fill=tk.X)
        self.text_frame.pack(side=tk.BOTTOM, fill=tk.X)
        self.task_create.focus_set()

        todo1 = tk.Label(self.tasks_frame, text="--- Add Items Here ---", bg="lightgrey", fg="black", pady=10)
        todo1.bind("<Button-1>", self.remove_task)

        self.tasks.append(todo1)

        for task in self.tasks:
            task.pack(side=tk.TOP, fill=tk.X)

        self.bind("<Return>", self.add_task)
        self.bind("<Configure>", self.on_frame_configure)
        self.bind_all("<MouseWheel>", self.mouse_scroll)
        self.bind_all("<Button-4>", self.mouse_scroll)
        self.bind_all("<Button-5>", self.mouse_scroll)
        self.tasks_canvas.bind("<Configure>", self.task_width)

        self.colour_schemes = [{"bg": "lightgrey", "fg": "black"}, {"bg": "grey", "fg": "white"}]

    def add_task(self, event=None):
        task_text = self.task_create.get(1.0,tk.END).strip()

        if len(task_text) > 0:
            new_task = tk.Label(self.tasks_frame, text=task_text, pady=10)

            self.set_task_colour(len(self.tasks), new_task)

            new_task.bind("<Button-1>", self.remove_task)
            new_task.pack(side=tk.TOP, fill=tk.X)

            self.tasks.append(new_task)

        self.task_create.delete(1.0, tk.END)

    def remove_task(self, event):
        task = event.widget
        if msg.askyesno("Really Delete?", "Delete " + task.cget("text") + "?"):
            self.tasks.remove(event.widget)
            event.widget.destroy()
            self.recolour_tasks()

    def recolour_tasks(self):
        for index, task in enumerate(self.tasks):
            self.set_task_colour(index, task)

    def set_task_colour(self, position, task):
        _, task_style_choice = divmod(position, 2)

        my_scheme_choice = self.colour_schemes[task_style_choice]

        task.configure(bg=my_scheme_choice["bg"])
        task.configure(fg=my_scheme_choice["fg"])

    def on_frame_configure(self, event=None):
        self.tasks_canvas.configure(scrollregion=self.tasks_canvas.bbox("all"))

    def task_width(self, event):
        canvas_width = event.width
        self.tasks_canvas.itemconfig(self.canvas_frame, width = canvas_width)

    def mouse_scroll(self, event):
        if event.delta:
            self.tasks_canvas.yview_scroll(int(-1*(event.delta/120)), "units")
        else:
            if event.num == 5:
                move = 1
            else:
                move = -1

            self.tasks_canvas.yview_scroll(move, "units")

if __name__ == "__main__":
    todo = Todo()
    todo.mainloop()
