### list

In [1]:
arr = ["one","two","three"]
arr[0]

'one'

In [2]:
arr.append(4)
arr

['one', 'two', 'three', 4]

In [3]:
del arr[2]
arr

['one', 'two', 4]

### tuple

In [4]:
t = 'one','two',3
t

('one', 'two', 3)

**tuple cannot be changed:**

```python
t.append(4)
```
AttributeError: 'tuple' object has no attribute 'append'

```python
del t[0]
```
TypeError: 'tuple' object doesn't support item deletion

tuple can use "+" and this will create a new tuple to store data

In [5]:
t+(1,)

('one', 'two', 3, 1)

In [6]:
tcopy = t+(1,)
tcopy

('one', 'two', 3, 1)

In [7]:
id(tcopy)

4423264112

In [8]:
id(t)

4423053632

So you can see that the id of these two objects is different

### array.array

In [9]:
import array
arr = array.array('f', (1.0, 1.5, 2.0, 2.5))
arr

array('f', [1.0, 1.5, 2.0, 2.5])

In [10]:
arr[0] = 12.45
arr

array('f', [12.449999809265137, 1.5, 2.0, 2.5])

In [11]:
del arr[2]
arr

array('f', [12.449999809265137, 1.5, 2.5])

In [12]:
arr.append(4.89)
arr

array('f', [12.449999809265137, 1.5, 2.5, 4.889999866485596])

**array can only store the same type of data:**

```python
arr[0] = 'hello
```
TypeError: must be real number, not str

### str

In [13]:
s = '123abc'
s

'123abc'

In [14]:
s[0]

'1'

**str cannot be changed:**

```python
s.append(4)
```
AttributeError: 'str' object has no attribute 'append'

```python
del s[0]
```
TypeError: 'str' object doesn't support item deletion 

To process, you can transfer tuple to list

In [15]:
sn = list(s)
sn

['1', '2', '3', 'a', 'b', 'c']

In [16]:
type(s[0])

str

In [17]:
s2 = s+'23'
s2

'123abc23'

In [18]:
id(s)

4423208304

In [19]:
id(s2)

4423359152

### Numpy array

In [20]:
import numpy as np

In [21]:
a = np.array([2,3,4,5])

In [22]:
a[0]

2

In [23]:
a[1:3]

array([3, 4])

In [24]:
a.dtype

dtype('int64')

In [25]:
a.mean()

3.5

In [26]:
a.max()

5

In [27]:
a.min()

2

In [28]:
a.std()

1.118033988749895

In [29]:
c = a*4
c

array([ 8, 12, 16, 20])

In [30]:
a = np.array([
    [1,2,3,4],
    [5,6,7,8],
    [9,10,11,12]
])
a

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [31]:
a[:,0]

array([1, 5, 9])

In [32]:
a.mean()

6.5

In [33]:
a.mean(axis = 1) # mean of every row

array([ 2.5,  6.5, 10.5])

In [34]:
a.mean(axis = 0) # mean of every column

array([5., 6., 7., 8.])

### Pandas 

**One dimension: Series**

In [35]:
import pandas as pd

In [36]:
stocks = pd.Series([54.74, 190.9, 173.14, 1050.3, 181.86, 1139.49],
                  index = ['A','B','C','D','E','F'])

In [37]:
stocks.describe()

count       6.000000
mean      465.071667
std       491.183757
min        54.740000
25%       175.320000
50%       186.380000
75%       835.450000
max      1139.490000
dtype: float64

In [38]:
stocks.iloc[0]

54.74

In [39]:
stocks.loc['A']

54.74

In [40]:
s1 = pd.Series([1,2,3,4], index = ['A','B','C','D'])
s2 = pd.Series([10,20,30,40], index = ['A','B','E','F'])
s3 = s1 + s2
s3

A    11.0
B    22.0
C     NaN
D     NaN
E     NaN
F     NaN
dtype: float64

In [41]:
s3.dropna()

A    11.0
B    22.0
dtype: float64

In [42]:
s3 = s1.add(s2, fill_value = 0)
s3

A    11.0
B    22.0
C     3.0
D     4.0
E    30.0
F    40.0
dtype: float64

**Two dimensions: DataFrame**

In [43]:
#Define a dictionary
salesDict = {
    'Time':['2018-01-01','2018-01-02','2018-01-06'],
    'ID':['1238938','3828798','2354787'],
    'Name':['A','B','C'],
    'Amount':[1,2,3]
}

from collections import OrderedDict
salesOrderDict = OrderedDict(salesDict)

salesDf = pd.DataFrame(salesOrderDict)
salesDf

Unnamed: 0,Time,ID,Name,Amount
0,2018-01-01,1238938,A,1
1,2018-01-02,3828798,B,2
2,2018-01-06,2354787,C,3


In [44]:
salesDf.mean() #mean of each column by default

Amount    2.0
dtype: float64

**iloc: Query by the location**

In [45]:
salesDf.iloc[0,1] #Query the element in the first row and the second column

'1238938'

In [46]:
salesDf.iloc[0,:] #Query the element in the first row and all columns.

Time      2018-01-01
ID           1238938
Name               A
Amount             1
Name: 0, dtype: object

In [47]:
salesDf.iloc[:,0] #Query the element in the first column and all rows.

0    2018-01-01
1    2018-01-02
2    2018-01-06
Name: Time, dtype: object

**loc: Query by the reference**

In [48]:
salesDf.loc[0,'ID']

'1238938'

In [49]:
salesDf.loc[0,:] #Query the element in the first row and all columns.

Time      2018-01-01
ID           1238938
Name               A
Amount             1
Name: 0, dtype: object

In [50]:
salesDf.loc[:,'Time'] #Query the element in the first column and all rows.

0    2018-01-01
1    2018-01-02
2    2018-01-06
Name: Time, dtype: object

In [51]:
# Alternative simple way:
salesDf['Time']

0    2018-01-01
1    2018-01-02
2    2018-01-06
Name: Time, dtype: object

In [52]:
salesDf.Time

0    2018-01-01
1    2018-01-02
2    2018-01-06
Name: Time, dtype: object

In [53]:
salesDf[['Time','ID']]

Unnamed: 0,Time,ID
0,2018-01-01,1238938
1,2018-01-02,3828798
2,2018-01-06,2354787


In [54]:
salesDf.loc[:,'Time':'Name']

Unnamed: 0,Time,ID,Name
0,2018-01-01,1238938,A
1,2018-01-02,3828798,B
2,2018-01-06,2354787,C


In [55]:
querySer = salesDf.loc[:,'Amount']>1

In [56]:
querySer

0    False
1     True
2     True
Name: Amount, dtype: bool

In [57]:
salesDf.loc[querySer,:]

Unnamed: 0,Time,ID,Name,Amount
1,2018-01-02,3828798,B,2
2,2018-01-06,2354787,C,3


In [58]:
salesDf.shape

(3, 4)

In [59]:
salesDf.loc[:,'Amount'].dtype

dtype('int64')

In [60]:
salesDf.describe()

Unnamed: 0,Amount
count,3.0
mean,2.0
std,1.0
min,1.0
25%,1.5
50%,2.0
75%,2.5
max,3.0
