Python 程序可以分解成模块、语句、表达式、以及对象：
- 程序由模块组成。通常来说一个有代码的文本文件就是一个模块
- 模块包含语句
- 语句包含表达式
- 表达式创建并处理对象

Python是动态类型的，它自动跟踪你的类型而不是要求声明代码

In [1]:
123 + 222

345

In [2]:
1.5 * 4

6.0

In [3]:
2 ** 100

1267650600228229401496703205376

In [4]:
bigdata = 2 ** 1000000

In [5]:
len(str(bigdata))

301030

In [6]:
import sys
sys.getsizeof(bigdata) # unit: Bytes

133360

In [7]:
3.1415 * 2

6.283

In [8]:
print(3.1415 * 2)

6.283


In [9]:
# python3 不能显示这个问题，可以使用下面的表达式
f = 0.1 + 0.2
f

0.30000000000000004

In [10]:
# python3 这样写又是不管用的
print(0.1 + 0.2) # print底层调用的是 __str__()

0.30000000000000004


In [11]:
f.__repr__() # 给开发者看的精确格式

'0.30000000000000004'

In [12]:
f.__str__() # 给人看的友好格式（print()时调用）

'0.30000000000000004'

In [13]:
import math

math.pi

3.141592653589793

In [14]:
math.sqrt(85)

9.219544457292887

In [15]:
import random

for _ in range(10):
    print(random.random())

0.40258698427350037
0.9036928425762016
0.7617752335118757
0.3783477742186734
0.13887116988035542
0.8533281025972212
0.31048820230458996
0.7562244962715244
0.8535418496148726
0.18221676550765775


In [16]:
for _ in range(10):
    print(random.choice([1, 2, 3, 4]))

2
2
1
3
3
3
2
1
3
3


# 字符串

## 序列操作

In [17]:
S = 'spam'
len(S)

4

In [18]:
S[0]

's'

In [19]:
S[1]

'p'

In [20]:
# 不能越界
S[4]

IndexError: string index out of range

In [21]:
# 负数索引就是负数和长度相加

In [22]:
S[-1], S[len(S) - 1]

('m', 'm')

In [23]:
S[-2], S[len(S) - 2]

('a', 'a')

In [24]:
S[-4]

's'

In [25]:
# 不能超过长度
S[-5]

IndexError: string index out of range

In [26]:
# 相当于又循环了 S[-1]
S[len(S) - 5]

'm'

In [27]:
S[1:3]

'pa'

In [28]:
S[1:]

'pam'

In [29]:
S

'spam'

In [30]:
S[0:3]

'spa'

In [31]:
S[:3]

'spa'

In [32]:
S[:-1]

'spa'

In [33]:
S[:]

'spam'

In [34]:
S + 'xyz'

'spamxyz'

In [35]:
S

'spam'

In [36]:
S * 8

'spamspamspamspamspamspamspamspam'

## 不可变性

In [37]:
S

'spam'

In [38]:
S[0]

's'

In [39]:
S[0] = 'z'

TypeError: 'str' object does not support item assignment

In [40]:
print(id(S))
S = 'z' + S[1:]
print(id(S), S)

1404091742320
1404092467952 zpam


- 在核心类型中，数字、字符串、元组是不可变的

In [41]:
S = 'shrubbery'
L = list(S)
L

['s', 'h', 'r', 'u', 'b', 'b', 'e', 'r', 'y']

In [42]:
L[1]

'h'

In [43]:
L[1] = 'c'
L

['s', 'c', 'r', 'u', 'b', 'b', 'e', 'r', 'y']

In [44]:
''.join(L)

'scrubbery'

In [45]:
B = bytearray(b'spam')
B

bytearray(b'spam')

In [46]:
type(B)

bytearray

In [47]:
B.extend(b'eggs')
B

bytearray(b'spameggs')

In [48]:
B.decode()

'spameggs'

## 特定类型的方法

In [49]:
S = 'spam'
S.find('pa')

1

In [50]:
S.replace('pa', 'xyz')

'sxyzm'

In [51]:
S

'spam'

In [52]:
line = 'aaa,bbb,cccc,dd'
line.split(',')

['aaa', 'bbb', 'cccc', 'dd']

In [53]:
S.upper()

'SPAM'

In [54]:
S.isalpha(), 'as123'.isalpha()

(True, False)

In [55]:
'123'.isdigit(), 'as123'.isdigit(), '123dsf'.isdigit()

(True, False, False)

In [56]:
line = 'aaa,bbb,cccc,dd\n'
line

'aaa,bbb,cccc,dd\n'

In [57]:
line.rstrip()

'aaa,bbb,cccc,dd'

In [58]:
line.rstrip().split(',')

['aaa', 'bbb', 'cccc', 'dd']

In [59]:
'%s, eggs, and %s' % ('spam', 'SPAM!')

'spam, eggs, and SPAM!'

In [60]:
'{0}, eggs, and {1}'.format('spam', 'SPAM!', 'asd')

'spam, eggs, and SPAM!'

In [61]:
# 多一个也不影响
'{0}, eggs, and {1}'.format('spam', 'SPAM!', 'asd')

'spam, eggs, and SPAM!'

In [62]:
'{}, eggs, and {}'.format('spam', 'SPAM!')

'spam, eggs, and SPAM!'

In [63]:
'{:.2f}'.format(296999.2567)

'296999.26'

In [64]:
# 增加千分位分隔符，
'{:,.2f}'.format(296999.2567)

'296,999.26'

In [65]:
'%.2f|%+05d'%(3.1415926, -4.2)

'3.14|-0004'

In [66]:
'%.2f|%+05f'%(3.1415926, -4.2)

'3.14|-4.200000'

## 寻求帮助

In [67]:
dir(S)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmod__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'capitalize',
 'casefold',
 'center',
 'count',
 'encode',
 'endswith',
 'expandtabs',
 'find',
 'format',
 'format_map',
 'index',
 'isalnum',
 'isalpha',
 'isascii',
 'isdecimal',
 'isdigit',
 'isidentifier',
 'islower',
 'isnumeric',
 'isprintable',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'maketrans',
 'partition',
 'replace',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit',
 'rstrip',
 'split',
 'splitlines',
 'startswith',
 'strip',
 'swapcase',
 'title',
 'translate',
 'upper',


In [81]:
%%time
S = ''
for i in range(10**5):
    S = S + 'NI'

Wall time: 20.4 ms


In [82]:
%%time
S = ''
for i in range(10**5):
    S = S.__add__('NI')

Wall time: 399 ms


In [85]:
S = ''

In [87]:
help(S) # 会有很长的内容，最后对具体的方法寻求帮助

Help on class str in module builtins:

class str(object)
 |  str(object='') -> str
 |  str(bytes_or_buffer[, encoding[, errors]]) -> str
 |  
 |  Create a new string object from the given object. If encoding or
 |  errors is specified, then the object must expose a data buffer
 |  that will be decoded using the given encoding and error handler.
 |  Otherwise, returns the result of object.__str__() (if defined)
 |  or repr(object).
 |  encoding defaults to sys.getdefaultencoding().
 |  errors defaults to 'strict'.
 |  
 |  Methods defined here:
 |  
 |  __add__(self, value, /)
 |      Return self+value.
 |  
 |  __contains__(self, key, /)
 |      Return key in self.
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __format__(self, format_spec, /)
 |      Return a formatted version of the string as described by format_spec.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  

In [88]:
help(S.replace)

Help on built-in function replace:

replace(old, new, count=-1, /) method of builtins.str instance
    Return a copy with all occurrences of substring old replaced by new.
    
      count
        Maximum number of occurrences to replace.
        -1 (the default value) means replace all occurrences.
    
    If the optional argument count is given, only the first count occurrences are
    replaced.



## 字符串编程的其他方式

In [89]:
S = 'A\nB\nC'
len(S)

5

In [90]:
ord('\n')

10

In [91]:
ord('A')

65

In [93]:
s1 = 'A\OB\OC'
s2 = 'A\oB\oC'
len(s1), len(s2)

(7, 7)

In [94]:
msg = """
aaaaa
bbbb'''bbb""bbb'bb
cccc
"""
msg

'\naaaaa\nbbbb\'\'\'bbb""bbb\'bb\ncccc\n'

In [100]:
s1 = 'C:\text'
s1

'C:\text'

In [101]:
print(s1)

C:	ext


In [102]:
s2 = r'C:\text'
s2

'C:\\text'

In [103]:
print(s2)

C:\text


In [104]:
'\xc4'

'Ä'

In [106]:
# python2
u'\u00'

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 0-3: truncated \uXXXX escape (<ipython-input-106-78663e4097bc>, line 2)

In [107]:
'spam'.encode('utf8')

b'spam'

In [108]:
'spam'.encode('utf16')

b'\xff\xfes\x00p\x00a\x00m\x00'

# m模式匹配

In [114]:
import re

In [123]:
match = re.match('Hello[ \t]*(.*)world', 'Hello         python world')
match

<re.Match object; span=(0, 26), match='Hello         python world'>

In [124]:
match.groups()

('python ',)

In [125]:
match.group(0)

'Hello         python world'

In [126]:
math.group(1)

'python '

In [128]:
m2 = re.match('[/:](.*)[/:](.*)[/:](.*)','/usr/home/:lumberjack')
m2

<re.Match object; span=(0, 21), match='/usr/home/:lumberjack'>

In [129]:
m2.groups()

('usr/home', '', 'lumberjack')

In [130]:
re.split('[/:]', '/usr/home/:lumberjack')

['', 'usr', 'home', '', 'lumberjack']