## Python CheatSheet

#### Basic
- [Array](#array)
- [Dictionary](#dictionary)
- [Enumerate](#enumerate)

#### Pandas
- [Dataframe](#dataframe)

#### Numpy
- [Matrix](#numpy-matrix)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

path = "../src/"

stud_perf = pd.read_csv(path + "data/student/student-mat.csv", sep=";")

## Array
### Array Comparison
1. Singal Condition

In [None]:
A = np.array([1, 2, 3, 4])
B = np.array([2, 3, 1, 5])
C = A < B # -> [True  True False  True]
print(C)

2. Multiple conditions, using `np.logical_and()`, `np.logical_or()` and `np.logical_not()`

In [None]:
res = np.logical_and(A > 1, A < 5)
print(res)

### N-d Array

Use `np.nditer()`

In [None]:
A = np.array([[1, 2],[3, 4],[5, 6]])
for x in np.nditer(A):
    print(x)

## Dictionary 

In [None]:
dict = { "X": "x", "Y": "y", "Z": "z" }

### Add Element

1. `update()` method
1. Assign directly

In [None]:
dict.update({"M": "m"})
dict["W"] = "w"

### Remove Element

1. `pop()` method removes the item with the **specified key name**
1. `popitem()` method removes the last inserted item
1. `del` keyword removes the item with the **specified key name**
1. `clear()` keyword empties the dictionary

In [None]:
dict.pop("X") # remove the key X

In [None]:
dict = { "X": "x", "Y": "y", "Z": "z" }
dict.update({"M": "m"})
dict["W"] = "w"
dict.popitem() # remove the last inserted item
print(dict)

In [None]:
del dict["X"]

In [None]:
dict.clear()

### Loop over dictionary

Use the `items()` method to loop over a dictionary:

In [None]:
dict = { "X": "x", "Y": "y", "Z": "z" }
for key, value in dict.items() :
    print((key, value))

Iterate over keys

In [None]:
for key in dict.keys():
    print(dict[key])

## Enumerate

In [None]:
seq = ['one', 'two', 'three']
for i, element in enumerate(seq):
     print(i, element)

## Dataframe

### Create

In [None]:
data = {'a': [1,2,3,4,5,6], 'b': [6,5,4,3,2,1], 'c': [6,5,4,3,2,1]}
df = pd.DataFrame(data,columns=['a','b']) # select a, b from data and create dataframe

### Name Column

In [None]:
df.columns = ['X', 'Y'] # set a, b column to be X and Y
df

In [None]:
varnames = ["Subject", "Gender", "CA1", "CA2", "HW"]
pd.read_table(path + 'data/ex_1.txt', header=None, names=varnames, sep="\\s+")

### Select columns

In [None]:
stud_perf['G3']     # returns pandas series
stud_perf[['G3']]   # returns pandas dataframe

In [None]:
stud_perf.loc[:,'G3']     # select column 'G3' series
stud_perf.loc[:,['G3']]   # select column 'G3' dataframe
stud_perf.iloc[:, 1]      # select the first column series
stud_perf.iloc[:, [1]]    # select the first column dataframe

In [None]:
stud_perf.loc[:, ['G1','G2']] 
stud_perf.loc[:, 'G1':'G3']   # G1, G2, G3 inclusive
stud_perf.iloc[:, [1, 3]] 
stud_perf.iloc[:, 1:3]        # column 1, 2, exclude 3

### Select Row

In [None]:
stud_perf.loc[1]        # -> pandas.Series
stud_perf.iloc[4]       # -> pandas.Series

stud_perf.loc[[1]]      # -> pandas.DataFrame
stud_perf.iloc[[4]]     # -> pandas.DataFrame

stud_perf.loc[[4, 1]]   # -> pandas.DataFrame
stud_perf.iloc[[1, 2]]  # -> pandas.DataFrame

### Select Mutiple Rows with Multipe Column

In [None]:
stud_perf.loc[[1, 2], ['G1', 'G2']]
stud_perf.iloc[[3, 4], [0, 1]]

### Filter

1. Filter rows: use `cond` to filter dataframe, returns all rows with `True` value

In [None]:
cond = stud_perf.G3 > 10
stud_perf[cond]            # Returns all "True" rows

In [None]:
# stud_perf[stud_perf.G1 < 10 & stud_perf.G3 > 10] -> error 
stud_perf[(stud_perf.address == 'R') & (stud_perf.G3 > 10)] # need parentheses for each condition

In [None]:
stud_perf[(stud_perf.G1 < 10) | (stud_perf.G3 > 10)] # need parentheses for each condition

### Loop over DataFrame

In [None]:
for index, row in stud_perf[0:10].iterrows(): # 0, ..., 9
    print(index) # -> index of each row 
    print(row)   # -> the row object pd.Series

### Get Values

In [None]:
stud_perf[0:10]['G3'].values

In [None]:
stud_perf[0:10][['G2','G3']].to_numpy()

### Map Function

```python
dataframe.apply(func, axis, raw, result_type, args, kwds)
```

In [None]:
def cond(x):
    if x > 15:
        return 'Good'
    else:
        return 'OK'
    
stud_perf.G3.apply(cond) # -> pd.series

In [None]:
stud_perf[['G2','G3']].apply(max, axis=0) # maximum in column G2 and G3

In [None]:
stud_perf[['G2','G3']].apply(max, axis=1) # select max(G2, G3) for each row, return Series

### Groupby

In [None]:
group_df = stud_perf[0:250:20].groupby(['address', 'famsize'], as_index=False)
group_df

In [None]:
for name, group in group_df[['G3']]:
    print(f"Group: {name}")
    print(group) 

In [None]:
group_df[['G3']].sum()

### Divide
Divide the G3 scores into 10 bins: [0,2], (2,4], (4,6], (6,8], (8,10], (10,12], (12,14], (14,16], (16,18], (18,20]

In [None]:
pd.cut(stud_perf.G3, bins=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20], labels=False, include_lowest=True)

### Stack

In [None]:
flex2 = flextime.iloc[:,1:].stack().reset_index()

## Numpy Matrix

### Create

In [None]:
array1 = np.array([1, 2, 3, 4, 5])
array2 = np.array([6, 7, 8, 9, 10])
mat = np.array([array1, array2])
mat

### Select and Slicing

In [None]:
mat[0, 0::3] # -> array([1, 4])
mat[1, 1:3]  # -> array([7, 8])

### Numerical Summary

In [None]:
mat.T                # -> transpose

In [None]:
mat.mean()           # -> mean of all entries
mat.mean(axis=1)     # -> mean of each rows
mat.mean(axis=0)     # -> mean of each columns

In [None]:
mat.sum()           # -> sum of all entries
mat.sum(axis=1)     # -> sum of each rows
mat.sum(axis=0)     # -> sum of each columns

In [None]:
np.argmax(mat)               # -> Flattened array: [1,2,3,4,5,6,7,8,9,10] -> max(array) -> 9(index of max in flatten array)
np.argmax(mat, axis=1)       # -> maximum index of each row
np.argmax(mat, axis=0)       # -> maximum index of each column

### Combination

In [None]:
np.vstack([mat, array1])

In [None]:
np.hstack([array1.reshape((5,1)), array2.reshape((5,1)), mat.T])

## Line diagram

In [None]:
plt.plot(x,y)
plt.show()

## Scatter Plot

In [None]:
plt.scatter(x,y)
plt.show()