# Python 基础

## 1. 读取操作内存大小的文件

In [4]:
from mmap import mmap


def get_lines(fp):
    with open(fp, 'r+') as f:
        m = mmap(f.fileno(), 0)
        tmp = 0
        for i, char in enumerate(m):
            if char == b'\n':
                yield m[tmp:i + 1].decode()
                tmp = i + 1


if __name__ == "__main__":
    for i in get_lines(r'D:\WorkSpace\notebook\python\pip.ipynb'):
        print(i)

{

 "cells": [

  {

   "cell_type": "markdown",

   "metadata": {},

   "source": [

    "# pip"

   ]

  },

  {

   "cell_type": "markdown",

   "metadata": {},

   "source": [

    "## requirements\n",

    "\n",

    "``` bash\n",

    "# 1. 在项目中分析出所有依赖的库\n",

    "pip freeze > requirements.txt\n",

    "# 2. 下载包到DIR这个目录中\n",

    "pip download -d DIR -r requirements.txt\n",

    "pip wheel -w DIR -r requirements.txt\n",

    "# 3. 离线安装\n",

    "pip3 install --no-index --find-links=DIR -r requirements.txt\n",

    "# 4. 直接安装 requirements.txt\n",

    "pip install -r requirements.txt\n",

    "```\n"

   ]

  },

  {

   "cell_type": "markdown",

   "metadata": {},

   "source": [

    "## pip 镜像\n",

    "\n",

    "**修改 C:\\Users\\Administrator\\AppData\\Roaming\\pip\\pip.ini**\n",

    "```\n",

    "[global]\n",

    "timeout = 60000\n",

    "index-url = https://pypi.tuna.tsinghua.edu.cn/simple\n",

    "[install]\n",

    "use-mirrors = true\n",

    "mirrors = https://pypi.

## 2. 读取路径下所有文件全路径

In [6]:
def print_directory_contents(sPath):
    """
    这个函数接收文件夹的名称作为输入参数
    返回该文件夹中文件的路径
    以及其包含文件夹中文件的路径
    """
    import os
    for s_childs in os.listdir(sPath):
        s_child_path = os.path.join(sPath, s_childs)
        if os.path.isdir(s_child_path):
            print_directory_contents(s_child_path)
        else:
            print(s_child_path)


if __name__ == "__main__":
    print_directory_contents(r"D:\WorkSpace\notebook\database")

D:\WorkSpace\notebook\database\00 - 异常记录.md
D:\WorkSpace\notebook\database\mysql.ipynb
D:\WorkSpace\notebook\database\oracle.ipynb
D:\WorkSpace\notebook\database\redis.ipynb
D:\WorkSpace\notebook\database\sqls.ipynb
D:\WorkSpace\notebook\database\sqlserver.ipynb


## 3. 读取日期所在年的天

In [7]:
from datetime import date


def dayofyear():
    year = input("输入年份:")
    month = input("输入月份:")
    day = input("输入天:")
    date1 = date(year=int(year), month=int(month), day=int(day))
    date2 = date(year=int(year), month=1, day=1)
    return (date1 - date2).days + 1


if __name__ == "__main__":
    print(dayofyear())

298


## 4. 打乱一个list

In [8]:
import random
alist = [1, 2, 3, 4, 5]
random.shuffle(alist)
print(alist)

[3, 2, 1, 5, 4]


## 5. 按 dict 的 value 排序

In [1]:
d = {'a': 24, 'g': 52, 'i': 12, 'k': 33}
sorted(d.items(), key=lambda x: x[1])

[('i', 12), ('a', 24), ('k', 33), ('g', 52)]

## 6. 字典推导式

In [3]:
iterable = [('a', 1), ('b', 2)]
d = {key: value for (key, value) in iterable}
d

{'a': 1, 'b': 2}

## 7. 反转字符串

In [4]:
'aStr'[::-1]

'rtSa'

## 8. 字符串转字典

In [9]:
str1 = 'k:1|k1:2|k2:3|k3:4'


def str2dict(str1):
    dict1 = {}
    for items in str1.split('|'):
        key, value = items.split(':')
        dict1[key] = int(value)
    return dict1


d = str2dict(str1)
print(d)

d = {k: int(v) for t in str1.split('|') for k, v in (t.split(':'), )}
print(d)

{'k': 1, 'k1': 2, 'k2': 3, 'k3': 4}
{'k': 1, 'k1': 2, 'k2': 3, 'k3': 4}


## 9. list 按元素属性大小排序

In [10]:
alist = [{
    'name': 'a',
    'age': 20
}, {
    'name': 'b',
    'age': 30
}, {
    'name': 'c',
    'age': 25
}]


def sort_by_age(l):
    return sorted(alist, key=lambda x: x['age'], reverse=True)


print(sort_by_age(alist))

[{'name': 'b', 'age': 30}, {'name': 'c', 'age': 25}, {'name': 'a', 'age': 20}]


## 10. 切片

In [12]:
l = ['a', 'b', 'c', 'd', 'e']
l[10:]

[]

## 11. 生成一个公差为 11 的等差数列列表生成式

In [13]:
[x * 11 for x in range(10)]

[0, 11, 22, 33, 44, 55, 66, 77, 88, 99]

## 12. 两个列表的交集和差集

In [15]:
list1 = [1, 2, 3]
list2 = [3, 4, 5]
set1 = set(list1)
set2 = set(list2)
print(set1 & set2)
print(set1 ^ set2)

{3}
{1, 2, 4, 5}


## 13. 删除list 中的重复元素，保持原顺序

In [20]:
l1 = ['b', 'c', 'd', 'c', 'a', 'a']

sorted(set(l1), key=l1.index)

['b', 'c', 'd', 'a']

## 17. python 单例模式

In [1]:
# 使用装饰器
def singleton(cls):
    instances = {}

    def wrapper(*args, **kwargs):
        if cls not in instances:
            instances[cls] = cls(*args, **kwargs)
        return instances[cls]

    return wrapper


@singleton
class Foo(object):
    pass


foo1 = Foo()
foo2 = Foo()
print(foo1 is foo2)

True


In [2]:
# 使用基类
class Singleton(object):
    def __new__(cls, *args, **kwargs):
        if not hasattr(cls, '_instance'):
            cls._instance = super(Singleton, cls).__new__(cls, *args, **kwargs)
        return cls._instance


class Foo(Singleton):
    pass


foo1 = Foo()
foo2 = Foo()
print(foo1 is foo2)

True


In [3]:
# 使用元类
class Singleton(type):
    def __call__(cls, *args, **kwargs):
        if not hasattr(cls, '_instance'):
            cls._instance = super(Singleton, cls).__call__(*args, **kwargs)
        return cls._instance


# python2
# class Foo(object):
#     __metaclass__ = Singleton


class Foo(metaclass=Singleton):
    pass


foo1 = Foo()
foo2 = Foo()
print(foo1 is foo2)

True


## 18. 反转一个整数

In [4]:
class Solution(object):
    def reverse(self, x):
        if -10 < x < 10:
            return x
        str_x = str(x)
        if str_x[0] != '-':
            str_x = str_x[::-1]
            x = int(str_x)
        else:
            str_x = str_x[1:][::-1]
            x = int(str_x)
            x = -x
        return x if -2147483648 < x < 2147483647 else 0


s = Solution()
reverse_int = s.reverse(-120)
print(reverse_int)

-21


## 19. 遍历目录，找到 .pyc 文件

In [2]:
# os.walk
import os


def get_files(dir, suffix):
    res = []
    for root, dirs, files in os.walk(dir):
        for filename in files:
            name, suf = os.path.splitext(filename)
            if suf == suffix:
                res.append(os.path.join(root, filename))
    print(res)


get_files('./..', '.pyc')

[]


In [5]:
# 使用 recursion
import os


def pick(obj, suffix):
    if obj.endswith(suffix):
        print(obj)


def scan_path(ph, suffix):
    file_list = os.listdir(ph)
    for obj in file_list:
        if os.path.isfile(os.path.join(ph, obj)):
            pick(obj, suffix)
        elif os.path.isdir(obj):
            scan_path(obj)


scan_path(r'D:\WorkSpace\notebook\lab\udf', '.py')

ei_maskkeys.py
hc_decode.py
hc_depoly.py
lic.py
uop_test.py
useroperation_a.py


In [7]:
# 使用 glob.iglob
from glob import iglob


def func(fp, suffix):
    for i in iglob(f'{fp}/**/*{suffix}', recursive=True):
        print(i)


func(r'D:\WorkSpace\notebook\lab\udf', '.py')

D:\WorkSpace\notebook\lab\udf\ei_maskkeys.py
D:\WorkSpace\notebook\lab\udf\hc_decode.py
D:\WorkSpace\notebook\lab\udf\hc_depoly.py
D:\WorkSpace\notebook\lab\udf\lic.py
D:\WorkSpace\notebook\lab\udf\uop_test.py
D:\WorkSpace\notebook\lab\udf\useroperation_a.py


## 20. 一行代码求 1-100 的和

In [9]:
sum(range(1, 101))

5050

## 21 过滤 list

In [13]:
a = [1, 2, 3, 4, 5, 6, 7, 8]

#filter
b = filter(lambda x: x > 5, a)
print(list(b))

# generater
b = [x for x in a if x > 5]
print(b)

# reverse
print(id(a))
for i in range(len(a) - 1, -1, -1):
    if a[i] <= 5:
        a.remove(a[i])
print(id(a))
print(a)

[6, 7, 8]
[6, 7, 8]
2283469873216
2283469873216
[6, 7, 8]


## 22. 缺失的英文字符

In [2]:
def get_missing_letter(a):
    import string
    letters = string.ascii_lowercase

    letters = ''.join(map(chr, range(ord('a'), ord('z') + 1)))

    letters = set('abcdefghijklmnopqrstuvwxyz')
    letters_a = set(a.lower())
    res = ''.join(sorted(letters - letters_a))
    return res


get_missing_letter('python')

'abcdefgijklmqrsuvwxz'

## 25. 求列表中的所有奇数

In [1]:
a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[x for x in a if x % 2 == 1]

[1, 3, 5, 7, 9]

## 26. 一行代码求和

In [2]:
from functools import reduce
reduce(lambda x, y: x + y, [1, 2, 3, 10248])

10254

In [4]:
sum([1, 2, 3, 10248])

10254

## 28. string 转 int

In [7]:
# str
def atoi(s):
    num = 0
    for v in s:
        for n in range(10):
            if v == str(n):
                num = num * 10 + n
    return num


atoi('123')

123

In [8]:
# ord
def atoi(s):
    num = 0
    for v in s:
        num = num * 10 + ord(v) - ord('0')
    return num


atoi('123')

123

In [9]:
# eval
def atoi(s):
    num = 0
    for v in s:
        num = num * 10 + eval(f'{v}')
    return num


atoi('123')

123

In [2]:
# reduce + ord
from functools import reduce


def atoi(s):
    return reduce(lambda num, v: num * 10 + ord(v) - ord('0'), s, 0)


atoi('123')

123

## 29. 找出数组中和为目标值的两个数

In [1]:
class Solution(object):
    def twoSum(self, nums, target):
        d = {}
        size = 0
        while size < len(nums):
            if target - nums[size] in d:
                if d[target - nums[size]] < size:
                    return [d[target - nums[size]], size]
            else:
                d[nums[size]] = size
            size += 1


solution = Solution()
print(solution.twoSum([2, 7, 11, 15], 9))

[0, 1]


In [2]:
class Solution(object):
    def twoSum(self, nums, target):
        for i in range(len(nums)):
            if target - nums[i] in nums[i + 1:]:
                return [i, nums.index(target - nums[i], i + 1)]


solution = Solution()
print(solution.twoSum([2, 7, 11, 15], 13))

[0, 2]


In [4]:
alist = [{
    "name": "a",
    "age": 20
}, {
    "name": "b",
    "age": 30
}, {
    "name": "c",
    "age": 25
}]
sorted(alist, key=lambda e: e.__getitem__('age'), reverse=True)

[{'name': 'b', 'age': 30}, {'name': 'c', 'age': 25}, {'name': 'a', 'age': 20}]

## 30. 删除一个 list 里面的重复元素

In [5]:
a = [1, 2, 4, 2, 4, 5, 7, 10, 5, 5, 7, 8, 9, 0, 3]
print(list(set(a)))
b = {}
print(list(b.fromkeys(a).keys()))

[0, 1, 2, 3, 4, 5, 7, 8, 9, 10]
[1, 2, 4, 5, 7, 10, 8, 9, 0, 3]


## 31. 统计文本中词频最高的 10 个单词

In [9]:
# using re
import re


def statistics(filePath):

    distone = {}
    with open(filePath) as f:
        for line in f:
            line = re.sub('\W+', ' ', line)
            lineone = line.split()
            for keyone in lineone:
                if not distone.get(keyone):
                    distone[keyone] = 1
                else:
                    distone[keyone] += 1

    num_ten = sorted(distone.items(), key=lambda x: x[1], reverse=True)[:10]
    return [x[0] for x in num_ten]


print(statistics(r'D:\WorkSpace\notebook\lab\interview\19.get_files.py'))

['print', 'size', 'os', 'suffix', 'obj', 'nums', 'def', 'in', 'path', 'if']


In [10]:
# using collections.Counter

import re
from collections import Counter


def statistics(filePath):
    with open(filePath) as f:
        return list(
            map(lambda c: c[0],
                Counter(re.sub('\W+', ' ', f.read()).split()).most_common(10)))


print(statistics(r'D:\WorkSpace\notebook\lab\interview\19.get_files.py'))

['print', 'size', 'os', 'suffix', 'obj', 'nums', 'def', 'in', 'path', 'if']


## 32. 筛选 list 中偶数位的偶数

In [11]:
def list_filter(nums):
    return [i for i in nums if i % 2 == 0 and nums.index(i) % 2 == 0]


nums = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
res = list_filter(nums)
print(res)

[0, 2, 4, 6, 8, 10]


## 33. 使用列表生成式筛选 list 中的偶数位的偶数

In [12]:
nums = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[x for x in nums[::2] if x % 2 == 0]

[0, 2, 4, 6, 8, 10]

## 34. 一行代码生成 [1,4,9,16,25,36,49,64,81,100]

In [13]:
[x * x for x in range(1, 11)]

[1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

## 36. 合并两个有序序列

In [15]:
def merge_sorted_list(l1, l2):
    tmp = []
    while len(l1) > 0 and len(l2) > 0:
        if l1[0] < l2[0]:
            tmp.append(l1[0])
            del l1[0]
        else:
            tmp.append(l2[0])
            del l2[0]
    while len(l1) > 0:
        tmp.append(l1[0])
        del l1[0]
    while len(l2) > 0:
        tmp.append(l2[0])
        del l2[0]
    return tmp


merge_sorted_list([1, 2, 3, 4, 5], [2, 3, 4, 5, 6])

[1, 2, 2, 3, 3, 4, 4, 5, 5, 6]

## 37. 数组重新排列，奇数升序在前，偶数降序在后

In [22]:
# using insert & pop
def func1(nums):
    if isinstance(nums, str):
        nums = [int(i) for i in nums]
    nums.sort(reverse=True)
    for i in range(len(nums)):
        if nums[i] % 2 > 0:
            nums.insert(0, nums.pop(i))
    return ''.join(str(x) for x in nums)


func1('1982376455')

'1355798642'

In [24]:
# using sorted
def func2(nums):
    return ''.join(
        sorted(nums, key=lambda x: int(x) % 2 == 0 and 20 - int(x) or int(x)))


func2('1982376455')

'1355798642'

## 38. 找出整数数组中第二大的数

In [27]:
def find_second_large_num(nums):
    sorted_nums = sorted(nums, reverse=True)
    return sorted_nums[1]


find_second_large_num([34, 11, 23, 56, 78, 0, 9, 12, 3, 7, 5])

56

In [28]:
def find_second_large_num(nums):
    one, two = nums[0], nums[0]
    for i in range(1, len(nums)):
        if nums[i] > one:
            two = one
            one = nums[i]
        elif nums[i] > two:
            two = nums[i]
    return two


find_second_large_num([34, 11, 23, 56, 78, 0, 9, 12, 3, 7, 5])

56

In [30]:
# using reduce & and、or
from functools import reduce


def find_second_large_num(nums):
    return reduce(
        lambda ot, x: ot[1] < x and (ot[1], x) or ot[0] < x and
        (x, ot[1]) or ot, nums, (0, 0))[0]


find_second_large_num([34, 11, 23, 56, 78, 0, 9, 12, 3, 7, 5])

56

## 39. 闭包

In [33]:
def multi():
    return [lambda x: i * x for i in range(4)]


# 闭包中的标量在内部函数被调用的时候被查找
print([m(3) for m in multi()])

[9, 9, 9, 9]


## 40. 统计一段字符串中字符出现的次数

In [35]:
def str_counter(data):
    dict_str = {}
    for i in data:
        dict_str[i] = dict_str.get(i, 0) + 1
    return dict_str


str_counter("AAABBCCAC")

{'A': 4, 'B': 2, 'C': 3}

In [37]:
from collections import Counter

Counter("AAABBCCAC").most_common()

[('A', 4), ('C', 3), ('B', 2)]

## 43. 遍历一个对象的所有属性，打印每个属性名

In [39]:
class Car(object):
    def __init__(self, name, loss):
        self.name = name
        self.loss = loss

    def getName(sefl):
        return self.name

    def getPrice(self):
        return self.loss[0]

    def getLoss(self):
        return self.loss[1] * self.loss[2]


bmw = Car('宝马', [60, 9, 500])
# print(getattr(bmw, 'name'))

print(dir(bmw))

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'getLoss', 'getName', 'getPrice', 'loss', 'name']


## 44. 操作符类

In [40]:
class Array(object):
    __list = []

    def __init__(self):
        print('constructor')

    def __del__(self):
        print('destruct')

    def __str__(self):
        return 'this self-defind array class'

    def __getitem__(self, key):
        return self.__list[key]

    def __len__(self):
        return len(self.__list)

    def add(self, value):
        self.__list.append(value)

    def remove(self, index):
        del self.__list[index]

    def displayItems(self):
        print('show all items')
        for item in self.__list:
            print(item)

## 47. 动态获取和设置对象的属性

In [45]:
class Parent(object):
    x = 0


if hasattr(Parent, 'x'):
    print(getattr(Parent, 'x'))
    setattr(Parent, 'x', 3)

print(getattr(Parent, 'x'))

0
3


## 57. 判断时间的装饰器

In [3]:
from datetime import datetime


class TimeException(Exception):
    def __init__(self, exception):
        super().__init__()
        self.exception = exception

    def __str__(self):
        return self.exception


def timecheck(func):
    def wrapper(*args, **kwargs):
        if datetime.now().year == 2020:
            func(*args, **kwargs)
        else:
            raise TimeException('函数已超时')

    return wrapper


@timecheck
def test(name):
    print(f'Hello {name}, 2020 Happy')


test('backbp')

Hello backbp, 2020 Happy


## 58. filter 的使用

In [7]:
list(filter(lambda x: x % 2 == 0, range(10)))

[0, 2, 4, 6, 8]

## 67. 值传递

In [6]:
a = 10
b = 20
c = [a]
a = 15
c

[10]

## 69. map & reduce

In [10]:
list(map(lambda x: x * x, [1, 2, 3, 4, 5]))

[1, 4, 9, 16, 25]

In [12]:
from functools import reduce
reduce(lambda x, y: x * y, [1, 2, 3, 4])

24

## 77. 闭包的延迟加载

In [15]:
def mutipliers():
    return [lambda x: x * i for i in range(4)]


print([m(2) for m in mutipliers()])

[6, 6, 6, 6]


In [16]:
def multipliers():
    for i in range(4):
        yield lambda x: x * i


print([m(2) for m in multipliers()])

[0, 2, 4, 6]


## 79. 单例模式

In [1]:
class Singleton(object):
    __instance = None

    def __new__(cls, *args, **kwargs):
        if cls.__instance is None:
            cls.__instance = super().__new__(cls, *args, **kwargs)
        return cls.__instance

## 83. 计时装饰器

In [7]:
from functools import wraps
from datetime import datetime
import time

def timeit(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        start = datetime.now()
        res = func(*args, **kwargs)
        stop = datetime.now()
        print(f'used: {stop-start}')
        return res
    return wrapper


@timeit
def foo():
    print('in foo()')
    time.sleep(5)

foo()

in foo()
used: 0:00:05.006940


## 88. 将1-N 的整数数列以3为单位分组

In [13]:
N = 100
[[[x for x in range(1, N+1)] [i:i+3] for i in range(0, N+1, 3)]]

[[[1, 2, 3],
  [4, 5, 6],
  [7, 8, 9],
  [10, 11, 12],
  [13, 14, 15],
  [16, 17, 18],
  [19, 20, 21],
  [22, 23, 24],
  [25, 26, 27],
  [28, 29, 30],
  [31, 32, 33],
  [34, 35, 36],
  [37, 38, 39],
  [40, 41, 42],
  [43, 44, 45],
  [46, 47, 48],
  [49, 50, 51],
  [52, 53, 54],
  [55, 56, 57],
  [58, 59, 60],
  [61, 62, 63],
  [64, 65, 66],
  [67, 68, 69],
  [70, 71, 72],
  [73, 74, 75],
  [76, 77, 78],
  [79, 80, 81],
  [82, 83, 84],
  [85, 86, 87],
  [88, 89, 90],
  [91, 92, 93],
  [94, 95, 96],
  [97, 98, 99],
  [100]]]

## 95. re

In [3]:
import re
a = 'abbbccc'
re.sub(r'b+', 'b', a)

'abccc'

## 106. 进程

In [8]:
# mulitprocessing.Process
import os
from multiprocessing import Process
import time

def pro_func(name, age, **kwargs):
    for i in range(5):
        print(f'子进程正在运行中，name={name},age={age},pid={os.getpid()}')
        print(kwargs)
        time.sleep(0.2)

if __name__ == '__main__':
    p = Process(target=pro_func, args=('小明', 18), kwargs={'m':20})
    p.start()
    time.sleep(1)
    p.terminate()
    p.join()